mirror of
https://github.com/hl-archive-node/nanoreth.git
synced 2025-12-06 10:59:55 +00:00
fix(db): upgrade to libmdbx 0.12.2; (#377)
This commit is contained in:
@ -34,7 +34,9 @@
|
||||
## The Future will (be) Positive. Всё будет хорошо.
|
||||
##
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.8.2)
|
||||
cmake_minimum_required(VERSION 3.0.2)
|
||||
elseif(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
cmake_minimum_required(VERSION 3.8.2)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
@ -241,34 +243,42 @@ else()
|
||||
option(BUILD_FOR_NATIVE_CPU "Generate code for the compiling machine CPU" OFF)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CONFIGURATION_TYPES OR NOT CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG")
|
||||
set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT ON)
|
||||
else()
|
||||
set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
if(CMAKE_INTERPROCEDURAL_OPTIMIZATION_AVAILABLE
|
||||
OR GCC_LTO_AVAILABLE OR MSVC_LTO_AVAILABLE OR
|
||||
(CLANG_LTO_AVAILABLE AND
|
||||
((DEFINED MDBX_ENABLE_TESTS AND NOT MDBX_ENABLE_TESTS)
|
||||
OR NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)))
|
||||
option(INTERPROCEDURAL_OPTIMIZATION "Enable interprocedural/LTO optimization" ${INTERPROCEDURAL_OPTIMIZATION_DEFAULT})
|
||||
else()
|
||||
set(INTERPROCEDURAL_OPTIMIZATION OFF)
|
||||
OR GCC_LTO_AVAILABLE OR MSVC_LTO_AVAILABLE OR CLANG_LTO_AVAILABLE)
|
||||
if((CMAKE_CONFIGURATION_TYPES OR NOT CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") AND
|
||||
((MSVC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 19) OR
|
||||
(GCC_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7) OR
|
||||
(CLANG_LTO_AVAILABLE AND NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 5)))
|
||||
set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT ON)
|
||||
else()
|
||||
set(INTERPROCEDURAL_OPTIMIZATION_DEFAULT OFF)
|
||||
endif()
|
||||
option(INTERPROCEDURAL_OPTIMIZATION "Enable interprocedural/LTO optimization." ${INTERPROCEDURAL_OPTIMIZATION_DEFAULT})
|
||||
endif()
|
||||
|
||||
if(INTERPROCEDURAL_OPTIMIZATION)
|
||||
if(GCC_LTO_AVAILABLE)
|
||||
set(LTO_ENABLED TRUE)
|
||||
set(CMAKE_AR ${CMAKE_GCC_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE)
|
||||
set(CMAKE_C_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE)
|
||||
set(CMAKE_CXX_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE)
|
||||
set(CMAKE_NM ${CMAKE_GCC_NM} CACHE PATH "Path to nm program with LTO-plugin" FORCE)
|
||||
set(CMAKE_RANLIB ${CMAKE_GCC_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE)
|
||||
set(CMAKE_C_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE)
|
||||
set(CMAKE_CXX_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE)
|
||||
message(STATUS "MDBX indulge Link-Time Optimization by GCC")
|
||||
elseif(CLANG_LTO_AVAILABLE)
|
||||
set(LTO_ENABLED TRUE)
|
||||
if(CMAKE_CLANG_LD)
|
||||
set(CMAKE_LINKER ${CMAKE_CLANG_LD} CACHE PATH "Path to lld or ld program with LTO-plugin" FORCE)
|
||||
endif()
|
||||
set(CMAKE_AR ${CMAKE_CLANG_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE)
|
||||
set(CMAKE_C_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE)
|
||||
set(CMAKE_CXX_COMPILER_AR ${CMAKE_AR} CACHE PATH "Path to ar program with LTO-plugin" FORCE)
|
||||
set(CMAKE_NM ${CMAKE_CLANG_NM} CACHE PATH "Path to nm program with LTO-plugin" FORCE)
|
||||
set(CMAKE_RANLIB ${CMAKE_CLANG_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE)
|
||||
set(CMAKE_C_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE)
|
||||
set(CMAKE_CXX_COMPILER_RANLIB ${CMAKE_RANLIB} CACHE PATH "Path to ranlib program with LTO-plugin" FORCE)
|
||||
message(STATUS "MDBX indulge Link-Time Optimization by CLANG")
|
||||
elseif(MSVC_LTO_AVAILABLE)
|
||||
set(LTO_ENABLED TRUE)
|
||||
@ -375,6 +385,8 @@ if(NOT DEFINED MDBX_CXX_STANDARD)
|
||||
set(MDBX_CXX_STANDARD 14)
|
||||
elseif(NOT HAS_CXX11 LESS 0)
|
||||
set(MDBX_CXX_STANDARD 11)
|
||||
elseif(CXX_FALLBACK_GNU11 OR CXX_FALLBACK_11)
|
||||
set(MDBX_CXX_STANDARD 11)
|
||||
else()
|
||||
set(MDBX_CXX_STANDARD 98)
|
||||
endif()
|
||||
@ -494,16 +506,29 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
if(MDBX_NTDLL_EXTRA_IMPLIB)
|
||||
add_mdbx_option(MDBX_WITHOUT_MSVC_CRT "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF)
|
||||
endif()
|
||||
set(MDBX_AVOID_MSYNC_DEFAULT ON)
|
||||
else()
|
||||
add_mdbx_option(MDBX_USE_OFDLOCKS "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO)
|
||||
mark_as_advanced(MDBX_USE_OFDLOCKS)
|
||||
set(MDBX_AVOID_MSYNC_DEFAULT OFF)
|
||||
endif()
|
||||
add_mdbx_option(MDBX_LOCKING "Locking method (Win32=-1, SysV=5, POSIX=1988, POSIX=2001, POSIX=2008, Futexes=1995)" AUTO)
|
||||
option(MDBX_AVOID_MSYNC "Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP mode" ${MDBX_AVOID_MSYNC_DEFAULT})
|
||||
add_mdbx_option(MDBX_LOCKING "Locking method (Windows=-1, SysV=5, POSIX=1988, POSIX=2001, POSIX=2008, Futexes=1995)" AUTO)
|
||||
mark_as_advanced(MDBX_LOCKING)
|
||||
add_mdbx_option(MDBX_TRUST_RTC "Does a system have battery-backed Real-Time Clock or just a fake" AUTO)
|
||||
mark_as_advanced(MDBX_TRUST_RTC)
|
||||
option(MDBX_FORCE_ASSERTIONS "Force enable assertion checking" OFF)
|
||||
option(MDBX_DISABLE_VALIDATION "Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB" OFF)
|
||||
option(MDBX_ENABLE_REFUND "Zerocost auto-compactification during write-transactions" ON)
|
||||
option(MDBX_ENABLE_MADVISE "Using POSIX' madvise() and/or similar hints" ON)
|
||||
if (CMAKE_TARGET_BITNESS GREATER 32)
|
||||
set(MDBX_BIGFOOT_DEFAULT ON)
|
||||
else()
|
||||
set(MDBX_BIGFOOT_DEFAULT OFF)
|
||||
endif()
|
||||
option(MDBX_ENABLE_BIGFOOT "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" ${MDBX_BIGFOOT_DEFAULT})
|
||||
option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON)
|
||||
option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF)
|
||||
|
||||
if(NOT MDBX_AMALGAMATED_SOURCE)
|
||||
if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG")
|
||||
@ -520,7 +545,7 @@ else()
|
||||
unset(MDBX_LINK_TOOLS_NONSTATIC CACHE)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_LOADED AND MDBX_CXX_STANDARD GREATER_EQUAL 11 AND MDBX_CXX_STANDARD LESS 83)
|
||||
if(CMAKE_CXX_COMPILER_LOADED AND MDBX_CXX_STANDARD LESS 83 AND NOT MDBX_CXX_STANDARD LESS 11)
|
||||
if(NOT MDBX_AMALGAMATED_SOURCE)
|
||||
option(MDBX_ENABLE_TESTS "Build MDBX tests" ${BUILD_TESTING})
|
||||
endif()
|
||||
@ -626,9 +651,13 @@ macro(target_setup_options TARGET)
|
||||
endmacro()
|
||||
|
||||
macro(libmdbx_setup_libs TARGET MODE)
|
||||
target_link_libraries(${TARGET} ${MODE} Threads::Threads)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.1)
|
||||
target_link_libraries(${TARGET} ${MODE} ${CMAKE_THREAD_LIBS_INIT})
|
||||
else()
|
||||
target_link_libraries(${TARGET} ${MODE} Threads::Threads)
|
||||
endif()
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
target_link_libraries(${TARGET} ${MODE} ntdll advapi32)
|
||||
target_link_libraries(${TARGET} ${MODE} ntdll user32 kernel32 advapi32)
|
||||
if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_WITHOUT_MSVC_CRT)
|
||||
target_link_libraries(${TARGET} ${MODE} ntdll_extra)
|
||||
endif()
|
||||
@ -749,7 +778,6 @@ if(MDBX_BUILD_SHARED_LIBRARY)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
install(TARGETS mdbx EXPORT libmdbx
|
||||
LIBRARY DESTINATION ${MDBX_DLL_INSTALL_DESTINATION} COMPONENT runtime
|
||||
OBJECTS DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel
|
||||
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT devel)
|
||||
@ -894,16 +922,16 @@ else()
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_ABI
|
||||
AND NOT (CMAKE_C_COMPILER_ABI MATCHES ".*${MDBX_BUILD_TARGET}.*" OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_COMPILER_ABI}.*"))
|
||||
string(APPEND MDBX_BUILD_TARGET "-${CMAKE_C_COMPILER_ABI}")
|
||||
string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}")
|
||||
endif()
|
||||
if(CMAKE_C_PLATFORM_ID
|
||||
AND NOT (CMAKE_SYSTEM_NAME
|
||||
AND (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_SYSTEM_NAME}.*" OR CMAKE_SYSTEM_NAME MATCHES ".*${CMAKE_C_PLATFORM_ID}.*"))
|
||||
AND NOT (CMAKE_C_PLATFORM_ID MATCHES ".*${CMAKE_C_PLATFORM_ID}.*" OR MDBX_BUILD_TARGET MATCHES ".*${CMAKE_C_PLATFORM_ID}.*"))
|
||||
string(APPEND MDBX_BUILD_TARGET "-${CMAKE_C_COMPILER_ABI}")
|
||||
string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_C_COMPILER_ABI}")
|
||||
endif()
|
||||
if(CMAKE_SYSTEM_NAME)
|
||||
string(APPEND MDBX_BUILD_TARGET "-${CMAKE_SYSTEM_NAME}")
|
||||
string(CONCAT MDBX_BUILD_TARGET "${MDBX_BUILD_TARGET}-${CMAKE_SYSTEM_NAME}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -957,6 +985,7 @@ if (NOT SUBPROJECT)
|
||||
set(CPACK_PACKAGE_VERSION_COMMIT ${MDBX_VERSION_REVISION})
|
||||
set(PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${CPACK_PACKAGE_VERSION_COMMIT}")
|
||||
message(STATUS "libmdbx package version is ${PACKAGE_VERSION}")
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/VERSION.txt" "${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR}.${MDBX_VERSION_RELEASE}.${MDBX_VERSION_REVISION}")
|
||||
endif()
|
||||
|
||||
cmake_policy(POP)
|
||||
|
||||
@ -1,6 +1,179 @@
|
||||
ChangeLog
|
||||
---------
|
||||
|
||||
English version [by Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en)
|
||||
and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md).
|
||||
|
||||
## v0.12.2 (Иван Ярыгин) от 2022-11-11
|
||||
|
||||
Выпуск с существенными доработками и новой функциональностью
|
||||
в память о российском борце [Иване Сергеевиче Ярыгине](https://ru.wikipedia.org/wiki/Ярыгин,_Иван_Сергеевич).
|
||||
|
||||
На Олимпийских играх в Мюнхене в 1972 году Иван Ярыгин уложил всех соперников на лопатки,
|
||||
суммарно затратив менее 9 минут. Этот рекорд никем не побит до сих пор.
|
||||
|
||||
```
|
||||
64 files changed, 5573 insertions(+), 2510 deletions(-)
|
||||
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
|
||||
```
|
||||
|
||||
Новое:
|
||||
|
||||
- Поддержка всех основных опций при сборке посредством CMake.
|
||||
|
||||
- Требования к CMake понижены до версии 3.0.2 для возможности сборки для устаревших платформ.
|
||||
|
||||
- Добавлена возможность профилирования работы GC в сложных и/или нагруженных
|
||||
сценариях (например Ethereum/Erigon). По-умолчанию соответствующий код отключен,
|
||||
а для его активации необходимо указать опцию сборки `MDBX_ENABLE_PROFGC=1`.
|
||||
|
||||
- Добавлена функция `mdbx_env_warmup()` для "прогрева" БД с возможностью
|
||||
закрепления страниц в памяти.
|
||||
В утилиты `mdbx_chk`, `mdbx_copy` и `mdbx_dump` добавлены опции `-u` и `-U`
|
||||
для активации соответствующего функционала.
|
||||
|
||||
- Отключение учета «грязных» страниц в не требующих этого режимах
|
||||
(`MDBX_WRITEMAP` при `MDBX_AVOID_MSYNC=0`). Доработка позволяет снизить
|
||||
накладные расходы и была запланирована давно, но откладывалась так как
|
||||
требовала других изменений.
|
||||
|
||||
- Вытеснение из памяти (спиллинг) «грязных» страниц с учетом размера
|
||||
large/overflow-страниц. Доработка позволяет корректно соблюдать политику
|
||||
задаваемую опциями `MDBX_opt_txn_dp_limit`,
|
||||
`MDBX_opt_spill_max_denominator`, `MDBX_opt_spill_min_denominator` и
|
||||
была запланирована давно, но откладывалась так как требовала других
|
||||
изменений.
|
||||
|
||||
- Для Windows в API добавлены UNICODE-зависимые определения макросов
|
||||
`MDBX_DATANAME`, `MDBX_LOCKNAME` и `MDBX_LOCK_SUFFIX`.
|
||||
|
||||
- Переход на преимущественное использование типа `size_t` для
|
||||
уменьшения накладных расходов на платформе Эльбрус.
|
||||
|
||||
- В API добавлены функции `mdbx_limits_valsize4page_max()` и
|
||||
`mdbx_env_get_valsize4page_max()` возвращающие максимальный размер в
|
||||
байтах значения, которое может быть размещена в одной
|
||||
large/overflow-странице, а не последовательности из двух или более таких
|
||||
страниц. Для таблиц с поддержкой дубликатов вынос значений на
|
||||
large/overflow-страницы не поддерживается, поэтому результат совпадает с
|
||||
`mdbx_limits_valsize_max()`.
|
||||
|
||||
- В API добавлены функции `mdbx_limits_pairsize4page_max()`и
|
||||
`mdbx_env_get_pairsize4page_max()` возвращающие в байтах максимальный
|
||||
суммарный размер пары ключ-значение для их размещения на одной листовой
|
||||
страницы, без выноса значения на отдельную large/overflow-страницу. Для
|
||||
таблиц с поддержкой дубликатов вынос значений на large/overflow-страницы
|
||||
не поддерживается, поэтому результат определяет максимальный/допустимый
|
||||
суммарный размер пары ключ-значение.
|
||||
|
||||
- Реализовано использование асинхронной (overlapped) записи в Windows,
|
||||
включая использования небуфферизированного ввода-вывода и `WriteGather()`.
|
||||
Это позволяет сократить накладные расходы и частично обойти проблемы
|
||||
Windows с низкой производительностью ввода-вывода, включая большие
|
||||
задержки `FlushFileBuffers()`. Новый код также обеспечивает консолидацию
|
||||
записываемых регионов на всех платформах, а на Windows использование
|
||||
событий (events) сведено к минимум, одновременно с автоматических
|
||||
использованием `WriteGather()`. Поэтому ожидается существенное снижение
|
||||
накладных расходов взаимодействия с ОС, а в Windows это ускорение, в
|
||||
некоторых сценариях, может быть кратным в сравнении с LMDB.
|
||||
|
||||
- Добавлена опция сборки `MDBX_AVOID_MSYNC`, которая определяет
|
||||
поведение libmdbx в режиме `MDBX_WRITE_MAP` (когда данные изменяются
|
||||
непосредственно в отображенных в ОЗУ страницах БД):
|
||||
|
||||
* Если `MDBX_AVOID_MSYNC=0` (по умолчанию на всех системах кроме Windows),
|
||||
то (как прежде) сохранение данных выполняется посредством `msync()`,
|
||||
либо `FlushViewOfFile()` на Windows. На платформах с полноценной
|
||||
подсистемой виртуальной памяти и адекватным файловым вводом-выводом
|
||||
это обеспечивает минимум накладных расходов (один системный вызов)
|
||||
и максимальную производительность. Однако, на Windows приводит
|
||||
к значительной деградации, в том числе из-за того что после
|
||||
`FlushViewOfFile()` требуется также вызов `FlushFileBuffers()`
|
||||
с массой проблем и суеты внутри ядра ОС.
|
||||
|
||||
* Если `MDBX_AVOID_MSYNC=1` (по умолчанию только на Windows), то
|
||||
сохранение данных выполняется явной записью в файл каждой измененной
|
||||
страницы БД. Это требует дополнительных накладных расходов, как
|
||||
на отслеживание измененных страниц (ведение списков "грязных"
|
||||
страниц), так и на системные вызовы для их записи.
|
||||
Кроме этого, с точки зрения подсистемы виртуальной памяти ядра ОС,
|
||||
страницы БД измененные в ОЗУ и явно записанные в файл, могут либо
|
||||
оставаться "грязными" и быть повторно записаны ядром ОС позже,
|
||||
либо требовать дополнительных накладных расходов для отслеживания
|
||||
PTE (Page Table Entries), их модификации и дополнительного копирования
|
||||
данных. Тем не менее, по имеющейся информации, на Windows такой путь
|
||||
записи данных в целом обеспечивает более высокую производительность.
|
||||
|
||||
- Улучшение эвристики включения авто-слияния записей GC.
|
||||
|
||||
- Изменение формата LCK и семантики некоторых внутренних полей. Версии
|
||||
libmdbx использующие разный формат не смогут работать с одной БД
|
||||
одновременно, а только поочередно (LCK-файл переписывается при открытии
|
||||
первым открывающим БД процессом).
|
||||
|
||||
- В `C++` API добавлены методы фиксации транзакции с получением информации
|
||||
о задержках.
|
||||
|
||||
- Added `MDBX_HAVE_BUILT IN_CPU_SUPPORTS` build option to control use GCC's
|
||||
`__builtin_cpu_supports()` function, which could be unavailable on a fake
|
||||
OSes (macos, ios, android, etc).
|
||||
|
||||
Исправления (без корректировок вышеперечисленных новых функций):
|
||||
|
||||
- Устранения ряда предупреждений при сборке посредством MinGW.
|
||||
- Устранение ложно-положительных сообщений от Valgrind об использовании
|
||||
не инициализированных данных из-за выравнивающих зазоров в `struct troika`.
|
||||
- Исправлен возврат неожиданной ошибки `MDBX_BUSY` из функций `mdbx_env_set_option()`,
|
||||
`mdbx_env_set_syncbytes()` и `mdbx_env_set_syncperiod()`.
|
||||
- Небольшие исправления для совместимости с CMake 3.8
|
||||
- Больше контроля и осторожности (паранойи) для страховки от дефектов `mremap()`.
|
||||
- Костыль для починки сборки со старыми версиями `stdatomic.h` из GNU Lib C,
|
||||
где макросы `ATOMIC_*_LOCK_FREE` ошибочно переопределяются через функции.
|
||||
- Использование `fcntl64(F_GETLK64/F_SETLK64/F_SETLKW64)` при наличии.
|
||||
Это решает проблему срабатывания проверочного утверждения при сборке для
|
||||
платформ где тип `off_t` шире соответствующих полей `структуры flock`,
|
||||
используемой для блокировки файлов.
|
||||
- Доработан сбор информации о задержках при фиксации транзакций:
|
||||
* Устранено искажение замеров длительности обновления GC
|
||||
при включении отладочного внутреннего аудита;
|
||||
* Защита от undeflow-нуля только общей задержки в метриках,
|
||||
чтобы исключить ситуации, когда сумма отдельных стадий
|
||||
больше общей длительности.
|
||||
- Ряд исправлений для устранения срабатываний проверочных утверждения в
|
||||
отладочных сборках.
|
||||
- Более осторожное преобразование к типу `mdbx_tid_t` для устранения
|
||||
предупреждений.
|
||||
- Исправление лишнего сброса данных на диск в режиме `MDBX_SAFE_NOSYNC`
|
||||
при обновлении GC.
|
||||
- Fixed an extra check for `MDBX_APPENDDUP` inside `mdbx_cursor_put()`
|
||||
which could result in returning `MDBX_EKEYMISMATCH` for valid cases.
|
||||
- Fixed nasty `clz()` bug (by using `_BitScanReverse()`, only MSVC builds affected).
|
||||
|
||||
Мелочи:
|
||||
|
||||
- Исторические ссылки cвязанные с удалённым на ~~github~~ проектом перенаправлены на [web.archive.org](https://web.archive.org/web/https://github.com/erthink/libmdbx).
|
||||
- Синхронизированны конструкции CMake между проектами.
|
||||
- Добавлено предупреждение о небезопасности RISC-V.
|
||||
- Добавлено описание параметров `MDBX_debug_func` и `MDBX_debug_func`.
|
||||
- Добавлено обходное решение для минимизации ложно-положительных
|
||||
конфликтов при использовании файловых блокировок в Windows.
|
||||
- Проверка атомарности C11-операций c 32/64-битными данными.
|
||||
- Уменьшение в 42 раза значения по-умолчанию для `me_options.dp_limit`
|
||||
в отладочных сборках.
|
||||
- Добавление платформы `gcc-riscv64-linux-gnu` в список для цели `cross-gcc`.
|
||||
- Небольшие правки скрипта `long_stochastic.sh` для работы в Windows.
|
||||
- Удаление ненужного вызова `LockFileEx()` внутри `mdbx_env_copy()`.
|
||||
- Добавлено описание использования файловых дескрипторов в различных режимах.
|
||||
- Добавлено использование `_CrtDbgReport()` в отладочных сборках.
|
||||
- Fixed an extra ensure/assertion check of `oldest_reader` inside `txn_end()`.
|
||||
- Removed description of deprecated usage of `MDBX_NODUPDATA`.
|
||||
- Fixed regression ASAN/Valgring-enabled builds.
|
||||
- Fixed minor MingGW warning.
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
|
||||
## v0.12.1 (Positive Proxima) at 2022-08-24
|
||||
|
||||
The planned frontward release with new superior features on the day of 20 anniversary of [Positive Technologies](https://ptsecurty.com).
|
||||
@ -46,10 +219,75 @@ Not a release but preparation for changing feature set and API.
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
|
||||
## v0.11.13 at (Swashplate) 2022-11-10
|
||||
|
||||
The stable bugfix release in memory of [Boris Yuryev](https://ru.wikipedia.org/wiki/Юрьев,_Борис_Николаевич) on his 133rd birthday.
|
||||
|
||||
```
|
||||
30 files changed, 405 insertions(+), 136 deletions(-)
|
||||
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
|
||||
```
|
||||
|
||||
Fixes:
|
||||
|
||||
- Fixed builds with older libc versions after using `fcntl64()` (backport).
|
||||
- Fixed builds with older `stdatomic.h` versions,
|
||||
where the `ATOMIC_*_LOCK_FREE` macros mistakenly redefined using functions (backport).
|
||||
- Added workaround for `mremap()` defect to avoid assertion failure (backport).
|
||||
- Workaround for `encryptfs` bug(s) in the `copy_file_range` implementation (backport).
|
||||
- Fixed unexpected `MDBX_BUSY` from `mdbx_env_set_option()`, `mdbx_env_set_syncbytes()`
|
||||
and `mdbx_env_set_syncperiod()` (backport).
|
||||
- CMake requirements lowered to version 3.0.2 (backport).
|
||||
|
||||
Minors:
|
||||
|
||||
- Minor clarification output of `--help` for `mdbx_test` (backport).
|
||||
- Added admonition of insecure for RISC-V (backport).
|
||||
- Stochastic scripts and CMake files synchronized with the `devel` branch.
|
||||
- Use `--dont-check-ram-size` for small-tests make-targets (backport).
|
||||
|
||||
|
||||
## v0.11.12 (Эребуни) at 2022-10-12
|
||||
|
||||
The stable bugfix release.
|
||||
|
||||
```
|
||||
11 files changed, 96 insertions(+), 49 deletions(-)
|
||||
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
|
||||
```
|
||||
|
||||
Fixes:
|
||||
|
||||
- Fixed static assertion failure on platforms where the `off_t` type is wider
|
||||
than corresponding fields of `struct flock` used for file locking (backport).
|
||||
Now _libmdbx_ will use `fcntl64(F_GETLK64/F_SETLK64/F_SETLKW64)` if available.
|
||||
- Fixed assertion check inside `page_retire_ex()` (backport).
|
||||
|
||||
Minors:
|
||||
|
||||
- Fixed `-Wint-to-pointer-cast` warnings while casting to `mdbx_tid_t` (backport).
|
||||
- Removed needless `LockFileEx()` inside `mdbx_env_copy()` (backport).
|
||||
|
||||
|
||||
## v0.11.11 (Тендра-1790) at 2022-09-11
|
||||
|
||||
The stable bugfix release.
|
||||
|
||||
```
|
||||
10 files changed, 38 insertions(+), 21 deletions(-)
|
||||
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
|
||||
```
|
||||
|
||||
Fixes:
|
||||
|
||||
- Fixed an extra check for `MDBX_APPENDDUP` inside `mdbx_cursor_put()` which could result in returning `MDBX_EKEYMISMATCH` for valid cases.
|
||||
- Fixed an extra ensure/assertion check of `oldest_reader` inside `mdbx_txn_end()`.
|
||||
- Fixed derived C++ builds by removing `MDBX_INTERNAL_FUNC` for `mdbx_w2mb()` and `mdbx_mb2w()`.
|
||||
|
||||
|
||||
## v0.11.10 (the TriColor) at 2022-08-22
|
||||
|
||||
The stable bugfix release.
|
||||
It is planned that this will be the last release of the v0.11 branch.
|
||||
|
||||
```
|
||||
14 files changed, 263 insertions(+), 252 deletions(-)
|
||||
@ -76,7 +314,6 @@ Minors:
|
||||
- Use current transaction geometry for untouched parameters when `env_set_geometry()` called within a write transaction.
|
||||
- Minor clarified `iov_page()` failure case.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
|
||||
## v0.11.9 (Чирчик-1992) at 2022-08-02
|
||||
@ -194,7 +431,7 @@ New:
|
||||
- Support build by MinGW' make from command line without CMake.
|
||||
- Added `mdbx::filesystem` C++ API namespace that corresponds to `std::filesystem` or `std::experimental::filesystem`.
|
||||
- Created [website](https://libmdbx.dqdkfa.ru/) for online auto-generated documentation.
|
||||
- Used `https://web.archive.org/web/20220414235959/https://github.com/erthink/` for dead (or temporarily lost) resources deleted by ~~Github~~.
|
||||
- Used `https://web.archive.org/web/https://github.com/erthink/libmdbx` for dead (or temporarily lost) resources deleted by ~~Github~~.
|
||||
- Added `--loglevel=` command-line option to the `mdbx_test` tool.
|
||||
- Added few fast smoke-like tests into CMake builds.
|
||||
|
||||
@ -234,7 +471,7 @@ Minors:
|
||||
|
||||
The stable release with the complete workaround for an incoherence flaw of Linux unified page/buffer cache.
|
||||
Nonetheless the cause for this trouble may be an issue of Intel CPU cache/MESI.
|
||||
See [issue#269](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/269) for more information.
|
||||
See [issue#269](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269) for more information.
|
||||
|
||||
Acknowledgements:
|
||||
|
||||
@ -243,8 +480,8 @@ Acknowledgements:
|
||||
|
||||
Fixes:
|
||||
|
||||
- [Added complete workaround](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/269) for an incoherence flaw of Linux unified page/buffer cache.
|
||||
- [Fixed](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/272) cursor reusing for read-only transactions.
|
||||
- [Added complete workaround](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269) for an incoherence flaw of Linux unified page/buffer cache.
|
||||
- [Fixed](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/272) cursor reusing for read-only transactions.
|
||||
- Fixed copy&paste typo inside `mdbx::cursor::find_multivalue()`.
|
||||
|
||||
Minors:
|
||||
@ -259,7 +496,7 @@ Minors:
|
||||
## v0.11.5 at 2022-02-23
|
||||
|
||||
The release with the temporary hotfix for a flaw of Linux unified page/buffer cache.
|
||||
See [issue#269](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/269) for more information.
|
||||
See [issue#269](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269) for more information.
|
||||
|
||||
Acknowledgements:
|
||||
|
||||
@ -269,10 +506,10 @@ Acknowledgements:
|
||||
|
||||
Fixes:
|
||||
|
||||
- [Added hotfix](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/269) for a flaw of Linux unified page/buffer cache.
|
||||
- [Fixed/Reworked](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/pull/270) move-assignment operators for "managed" classes of C++ API.
|
||||
- [Added hotfix](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269) for a flaw of Linux unified page/buffer cache.
|
||||
- [Fixed/Reworked](https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/270) move-assignment operators for "managed" classes of C++ API.
|
||||
- Fixed potential `SIGSEGV` while open DB with overrided non-default page size.
|
||||
- [Made](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/267) `mdbx_env_open()` idempotence in failure cases.
|
||||
- [Made](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/267) `mdbx_env_open()` idempotence in failure cases.
|
||||
- Refined/Fixed pages reservation inside `mdbx_update_gc()` to avoid non-reclamation in a rare cases.
|
||||
- Fixed typo in a retained space calculation for the hsr-callback.
|
||||
|
||||
@ -305,15 +542,15 @@ New features, extensions and improvements:
|
||||
Fixes:
|
||||
|
||||
- Fixed handling `MDBX_opt_rp_augment_limit` for GC's records from huge transactions (Erigon/Akula/Ethereum).
|
||||
- [Fixed](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/258) build on Android (avoid including `sys/sem.h`).
|
||||
- [Fixed](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/pull/261) missing copy assignment operator for `mdbx::move_result`.
|
||||
- [Fixed](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/258) build on Android (avoid including `sys/sem.h`).
|
||||
- [Fixed](https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/261) missing copy assignment operator for `mdbx::move_result`.
|
||||
- Fixed missing `&` for `std::ostream &operator<<()` overloads.
|
||||
- Fixed unexpected `EXDEV` (Cross-device link) error from `mdbx_env_copy()`.
|
||||
- Fixed base64 encoding/decoding bugs in auxillary C++ API.
|
||||
- Fixed overflow of `pgno_t` during checking PNL on 64-bit platforms.
|
||||
- [Fixed](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/260) excessive PNL checking after sort for spilling.
|
||||
- [Fixed](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/260) excessive PNL checking after sort for spilling.
|
||||
- Reworked checking `MAX_PAGENO` and DB upper-size geometry limit.
|
||||
- [Fixed](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/265) build for some combinations of versions of MSVC and Windows SDK.
|
||||
- [Fixed](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/265) build for some combinations of versions of MSVC and Windows SDK.
|
||||
|
||||
Minors:
|
||||
|
||||
@ -340,10 +577,10 @@ Acknowledgements:
|
||||
|
||||
New features, extensions and improvements:
|
||||
|
||||
- [Added](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/236) `mdbx_cursor_get_batch()`.
|
||||
- [Added](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/250) `MDBX_SET_UPPERBOUND`.
|
||||
- [Added](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/236) `mdbx_cursor_get_batch()`.
|
||||
- [Added](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/250) `MDBX_SET_UPPERBOUND`.
|
||||
- C++ API is finalized now.
|
||||
- The GC update stage has been [significantly speeded](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/254) when fixing huge Erigon's transactions (Ethereum ecosystem).
|
||||
- The GC update stage has been [significantly speeded](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/254) when fixing huge Erigon's transactions (Ethereum ecosystem).
|
||||
|
||||
Fixes:
|
||||
|
||||
@ -354,12 +591,12 @@ Minors:
|
||||
|
||||
- Fixed returning `MDBX_RESULT_TRUE` (unexpected -1) from `mdbx_env_set_option()`.
|
||||
- Added `mdbx_env_get_syncbytes()` and `mdbx_env_get_syncperiod()`.
|
||||
- [Clarified](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/pull/249) description of `MDBX_INTEGERKEY`.
|
||||
- [Clarified](https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/249) description of `MDBX_INTEGERKEY`.
|
||||
- Reworked/simplified `mdbx_env_sync_internal()`.
|
||||
- [Fixed](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/248) extra assertion inside `mdbx_cursor_put()` for `MDBX_DUPFIXED` cases.
|
||||
- [Fixed](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/248) extra assertion inside `mdbx_cursor_put()` for `MDBX_DUPFIXED` cases.
|
||||
- Avoiding extra looping inside `mdbx_env_info_ex()`.
|
||||
- Explicitly enabled core dumps from stochastic tests scripts on Linux.
|
||||
- [Fixed](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/253) `mdbx_override_meta()` to avoid false-positive assertions.
|
||||
- [Fixed](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/253) `mdbx_override_meta()` to avoid false-positive assertions.
|
||||
- For compatibility reverted returning `MDBX_ENODATA`for some cases.
|
||||
|
||||
|
||||
@ -375,10 +612,10 @@ Acknowledgements:
|
||||
|
||||
Fixes:
|
||||
|
||||
- [Fixed compilation](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/pull/239) with `devtoolset-9` on CentOS/RHEL 7.
|
||||
- [Fixed unexpected `MDBX_PROBLEM` error](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/242) because of update an obsolete meta-page.
|
||||
- [Fixed returning `MDBX_NOTFOUND` error](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/243) in case an inexact value found for `MDBX_GET_BOTH` operation.
|
||||
- [Fixed compilation](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/245) without kernel/libc-devel headers.
|
||||
- [Fixed compilation](https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/239) with `devtoolset-9` on CentOS/RHEL 7.
|
||||
- [Fixed unexpected `MDBX_PROBLEM` error](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/242) because of update an obsolete meta-page.
|
||||
- [Fixed returning `MDBX_NOTFOUND` error](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/243) in case an inexact value found for `MDBX_GET_BOTH` operation.
|
||||
- [Fixed compilation](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/245) without kernel/libc-devel headers.
|
||||
|
||||
Minors:
|
||||
|
||||
@ -395,7 +632,7 @@ Minors:
|
||||
|
||||
The database format signature has been changed to prevent
|
||||
forward-interoperability with an previous releases, which may lead to a
|
||||
[false positive diagnosis of database corruption](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/238)
|
||||
[false positive diagnosis of database corruption](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/238)
|
||||
due to flaws of an old library versions.
|
||||
|
||||
This change is mostly invisible:
|
||||
@ -447,7 +684,7 @@ Acknowledgements:
|
||||
Fixes:
|
||||
|
||||
- Fixed possibility of looping update GC during transaction commit (no public issue since the problem was discovered inside [Positive Technologies](https://www.ptsecurity.ru)).
|
||||
- Fixed `#pragma pack` to avoid provoking some compilers to generate code with [unaligned access](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/235).
|
||||
- Fixed `#pragma pack` to avoid provoking some compilers to generate code with [unaligned access](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/235).
|
||||
- Fixed `noexcept` for potentially throwing `txn::put()` of C++ API.
|
||||
|
||||
Minors:
|
||||
@ -473,7 +710,7 @@ Extensions and improvements:
|
||||
|
||||
Fixes:
|
||||
|
||||
- Always setup `madvise` while opening DB (fixes https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/231).
|
||||
- Always setup `madvise` while opening DB (fixes https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/231).
|
||||
- Fixed checking legacy `P_DIRTY` flag (`0x10`) for nested/sub-pages.
|
||||
|
||||
Minors:
|
||||
@ -494,11 +731,11 @@ Acknowledgements:
|
||||
- [Lionel Debroux](https://github.com/debrouxl) for fuzzing tests and reporting bugs.
|
||||
- [Sergey Fedotov](https://github.com/SergeyFromHell/) for [`node-mdbx` NodeJS bindings](https://www.npmjs.com/package/node-mdbx).
|
||||
- [Kris Zyp](https://github.com/kriszyp) for [`lmdbx-store` NodeJS bindings](https://github.com/kriszyp/lmdbx-store).
|
||||
- [Noel Kuntze](https://github.com/Thermi) for [draft Python bindings](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/commits/python-bindings).
|
||||
- [Noel Kuntze](https://github.com/Thermi) for [draft Python bindings](https://web.archive.org/web/https://github.com/erthink/libmdbx/commits/python-bindings).
|
||||
|
||||
New features, extensions and improvements:
|
||||
|
||||
- [Allow to predefine/override `MDBX_BUILD_TIMESTAMP` for builds reproducibility](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/201).
|
||||
- [Allow to predefine/override `MDBX_BUILD_TIMESTAMP` for builds reproducibility](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/201).
|
||||
- Added options support for `long-stochastic` script.
|
||||
- Avoided `MDBX_TXN_FULL` error for large transactions when possible.
|
||||
- The `MDBX_READERS_LIMIT` increased to `32767`.
|
||||
@ -506,7 +743,7 @@ New features, extensions and improvements:
|
||||
- Minimized the size of poisoned/unpoisoned regions to avoid Valgrind/ASAN stuck.
|
||||
- Added more workarounds for QEMU for testing builds for 32-bit platforms, Alpha and Sparc architectures.
|
||||
- `mdbx_chk` now skips iteration & checking of DB' records if corresponding page-tree is corrupted (to avoid `SIGSEGV`, ASAN failures, etc).
|
||||
- Added more checks for [rare/fuzzing corruption cases](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/217).
|
||||
- Added more checks for [rare/fuzzing corruption cases](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/217).
|
||||
|
||||
Backward compatibility break:
|
||||
|
||||
@ -518,18 +755,18 @@ Backward compatibility break:
|
||||
Fixes:
|
||||
|
||||
- Fixed excess meta-pages checks in case `mdbx_chk` is called to check the DB for a specific meta page and thus could prevent switching to the selected meta page, even if the check passed without errors.
|
||||
- Fixed [recursive use of SRW-lock on Windows cause by `MDBX_NOTLS` option](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/203).
|
||||
- Fixed [log a warning during a new DB creation](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/205).
|
||||
- Fixed [false-negative `mdbx_cursor_eof()` result](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/207).
|
||||
- Fixed [`make install` with non-GNU `install` utility (OSX, BSD)](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/208).
|
||||
- Fixed [installation by `CMake` in special cases by complete use `GNUInstallDirs`'s variables](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/209).
|
||||
- Fixed [C++ Buffer issue with `std::string` and alignment](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/191).
|
||||
- Fixed [recursive use of SRW-lock on Windows cause by `MDBX_NOTLS` option](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/203).
|
||||
- Fixed [log a warning during a new DB creation](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/205).
|
||||
- Fixed [false-negative `mdbx_cursor_eof()` result](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/207).
|
||||
- Fixed [`make install` with non-GNU `install` utility (OSX, BSD)](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/208).
|
||||
- Fixed [installation by `CMake` in special cases by complete use `GNUInstallDirs`'s variables](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/209).
|
||||
- Fixed [C++ Buffer issue with `std::string` and alignment](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/191).
|
||||
- Fixed `safe64_reset()` for platforms without atomic 64-bit compare-and-swap.
|
||||
- Fixed hang/shutdown on big-endian platforms without `__cxa_thread_atexit()`.
|
||||
- Fixed [using bad meta-pages if DB was partially/recoverable corrupted](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/217).
|
||||
- Fixed [using bad meta-pages if DB was partially/recoverable corrupted](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/217).
|
||||
- Fixed extra `noexcept` for `buffer::&assign_reference()`.
|
||||
- Fixed `bootid` generation on Windows for case of change system' time.
|
||||
- Fixed [test framework keygen-related issue](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/127).
|
||||
- Fixed [test framework keygen-related issue](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/127).
|
||||
|
||||
|
||||
## v0.10.1 at 2021-06-01
|
||||
@ -550,10 +787,10 @@ New features:
|
||||
Fixes:
|
||||
|
||||
- Fixed minor "foo not used" warnings from modern C++ compilers when building the C++ part of the library.
|
||||
- Fixed confusing/messy errors when build library from unfit github's archives (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/197).
|
||||
- Fixed confusing/messy errors when build library from unfit github's archives (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/197).
|
||||
- Fixed `#elsif` typo.
|
||||
- Fixed rare unexpected `MDBX_PROBLEM` error during altering data in huge transactions due to wrong spilling/oust of dirty pages (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/195).
|
||||
- Re-Fixed WSL1/WSL2 detection with distinguishing (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/97).
|
||||
- Fixed rare unexpected `MDBX_PROBLEM` error during altering data in huge transactions due to wrong spilling/oust of dirty pages (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/195).
|
||||
- Re-Fixed WSL1/WSL2 detection with distinguishing (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/97).
|
||||
|
||||
|
||||
## v0.10.0 at 2021-05-09
|
||||
@ -576,7 +813,7 @@ New features:
|
||||
and conjointly with the `MDBX_ENV_CHECKPID=0` and `MDBX_TXN_CHECKOWNER=0` options can yield
|
||||
up to 30% more performance compared to LMDB.
|
||||
- Using float point (exponential quantized) representation for internal 16-bit values
|
||||
of grow step and shrink threshold when huge ones (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/166).
|
||||
of grow step and shrink threshold when huge ones (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/166).
|
||||
To minimize the impact on compatibility, only the odd values inside the upper half
|
||||
of the range (i.e. 32769..65533) are used for the new representation.
|
||||
- Added the `mdbx_drop` similar to LMDB command-line tool to purge or delete (sub)database(s).
|
||||
@ -585,7 +822,7 @@ New features:
|
||||
- The internal node sizes were refined, resulting in a reduction in large/overflow pages in some use cases
|
||||
and a slight increase in limits for a keys size to ≈½ of page size.
|
||||
- Added to `mdbx_chk` output number of keys/items on pages.
|
||||
- Added explicit `install-strip` and `install-no-strip` targets to the `Makefile` (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/pull/180).
|
||||
- Added explicit `install-strip` and `install-no-strip` targets to the `Makefile` (https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/180).
|
||||
- Major rework page splitting (af9b7b560505684249b76730997f9e00614b8113) for
|
||||
- An "auto-appending" feature upon insertion for both ascending and
|
||||
descending key sequences. As a result, the optimality of page filling
|
||||
@ -593,7 +830,7 @@ New features:
|
||||
inserting ordered sequences of keys,
|
||||
- A "splitting at middle" to make page tree more balanced on average.
|
||||
- Added `mdbx_get_sysraminfo()` to the API.
|
||||
- Added guessing a reasonable maximum DB size for the default upper limit of geometry (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/183).
|
||||
- Added guessing a reasonable maximum DB size for the default upper limit of geometry (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/183).
|
||||
- Major rework internal labeling of a dirty pages (958fd5b9479f52f2124ab7e83c6b18b04b0e7dda) for
|
||||
a "transparent spilling" feature with the gist to make a dirty pages
|
||||
be ready to spilling (writing to a disk) without further altering ones.
|
||||
@ -609,7 +846,7 @@ New features:
|
||||
- Support `make help` to list available make targets.
|
||||
- Silently `make`'s build by default.
|
||||
- Preliminary [Python bindings](https://github.com/Thermi/libmdbx/tree/python-bindings) is available now
|
||||
by [Noel Kuntze](https://github.com/Thermi) (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/147).
|
||||
by [Noel Kuntze](https://github.com/Thermi) (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/147).
|
||||
|
||||
Backward compatibility break:
|
||||
|
||||
@ -624,22 +861,22 @@ Backward compatibility break:
|
||||
|
||||
Fixes:
|
||||
|
||||
- Fixed performance regression due non-optimal C11 atomics usage (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/160).
|
||||
- Fixed "reincarnation" of subDB after it deletion (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/168).
|
||||
- Fixed performance regression due non-optimal C11 atomics usage (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/160).
|
||||
- Fixed "reincarnation" of subDB after it deletion (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/168).
|
||||
- Fixed (disallowing) implicit subDB deletion via operations on `@MAIN`'s DBI-handle.
|
||||
- Fixed a crash of `mdbx_env_info_ex()` in case of a call for a non-open environment (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/171).
|
||||
- Fixed the selecting/adjustment values inside `mdbx_env_set_geometry()` for implicit out-of-range cases (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/170).
|
||||
- Fixed `mdbx_env_set_option()` for set initial and limit size of dirty page list ((https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/179).
|
||||
- Fixed an unreasonably huge default upper limit for DB geometry (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/183).
|
||||
- Fixed a crash of `mdbx_env_info_ex()` in case of a call for a non-open environment (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/171).
|
||||
- Fixed the selecting/adjustment values inside `mdbx_env_set_geometry()` for implicit out-of-range cases (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/170).
|
||||
- Fixed `mdbx_env_set_option()` for set initial and limit size of dirty page list ((https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/179).
|
||||
- Fixed an unreasonably huge default upper limit for DB geometry (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/183).
|
||||
- Fixed `constexpr` specifier for the `slice::invalid()`.
|
||||
- Fixed (no)readahead auto-handling (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/164).
|
||||
- Fixed (no)readahead auto-handling (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/164).
|
||||
- Fixed non-alloy build for Windows.
|
||||
- Switched to using Heap-functions instead of LocalAlloc/LocalFree on Windows.
|
||||
- Fixed `mdbx_env_stat_ex()` to returning statistics of the whole environment instead of MainDB only (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/190).
|
||||
- Fixed `mdbx_env_stat_ex()` to returning statistics of the whole environment instead of MainDB only (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/190).
|
||||
- Fixed building by GCC 4.8.5 (added workaround for a preprocessor's bug).
|
||||
- Fixed building C++ part for iOS <= 13.0 (unavailability of `std::filesystem::path`).
|
||||
- Fixed building for Windows target versions prior to Windows Vista (`WIN32_WINNT < 0x0600`).
|
||||
- Fixed building by MinGW for Windows (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/155).
|
||||
- Fixed building by MinGW for Windows (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/155).
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
@ -662,7 +899,7 @@ Removed options and features:
|
||||
New features:
|
||||
|
||||
- Package for FreeBSD is available now by Mahlon E. Smith.
|
||||
- New API functions to get/set various options (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/128):
|
||||
- New API functions to get/set various options (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/128):
|
||||
- the maximum number of named databases for the environment;
|
||||
- the maximum number of threads/reader slots;
|
||||
- threshold (since the last unsteady commit) to force flush the data buffers to disk;
|
||||
@ -675,7 +912,7 @@ New features:
|
||||
- maximal part of the dirty pages may be spilled when necessary;
|
||||
- minimal part of the dirty pages should be spilled when necessary;
|
||||
- how much of the parent transaction dirty pages will be spilled while start each child transaction;
|
||||
- Unlimited/Dynamic size of retired and dirty page lists (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/123).
|
||||
- Unlimited/Dynamic size of retired and dirty page lists (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123).
|
||||
- Added `-p` option (purge subDB before loading) to `mdbx_load` tool.
|
||||
- Reworked spilling of large transaction and committing of nested transactions:
|
||||
- page spilling code reworked to avoid the flaws and bugs inherited from LMDB;
|
||||
@ -685,22 +922,22 @@ New features:
|
||||
- Added `MDBX_ENABLE_REFUND` and `MDBX_PNL_ASCENDING` internal/advanced build options.
|
||||
- Added `mdbx_default_pagesize()` function.
|
||||
- Better support architectures with a weak/relaxed memory consistency model (ARM, AARCH64, PPC, MIPS, RISC-V, etc) by means [C11 atomics](https://en.cppreference.com/w/c/atomic).
|
||||
- Speed up page number lists and dirty page lists (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/132).
|
||||
- Speed up page number lists and dirty page lists (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/132).
|
||||
- Added `LIBMDBX_NO_EXPORTS_LEGACY_API` build option.
|
||||
|
||||
Fixes:
|
||||
|
||||
- Fixed missing cleanup (null assigned) in the C++ commit/abort (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/pull/143).
|
||||
- Fixed missing cleanup (null assigned) in the C++ commit/abort (https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/143).
|
||||
- Fixed `mdbx_realloc()` for case of nullptr and `MDBX_WITHOUT_MSVC_CRT=ON` for Windows.
|
||||
- Fixed the possibility to use invalid and renewed (closed & re-opened, dropped & re-created) DBI-handles (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/146).
|
||||
- Fixed 4-byte aligned access to 64-bit integers, including access to the `bootid` meta-page's field (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/153).
|
||||
- Fixed the possibility to use invalid and renewed (closed & re-opened, dropped & re-created) DBI-handles (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/146).
|
||||
- Fixed 4-byte aligned access to 64-bit integers, including access to the `bootid` meta-page's field (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/153).
|
||||
- Fixed minor/potential memory leak during page flushing and unspilling.
|
||||
- Fixed handling states of cursors's and subDBs's for nested transactions.
|
||||
- Fixed page leak in extra rare case the list of retired pages changed during update GC on transaction commit.
|
||||
- Fixed assertions to avoid false-positive UB detection by CLANG/LLVM (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/153).
|
||||
- Fixed `MDBX_TXN_FULL` and regressive `MDBX_KEYEXIST` during large transaction commit with `MDBX_LIFORECLAIM` (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/123).
|
||||
- Fixed assertions to avoid false-positive UB detection by CLANG/LLVM (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/153).
|
||||
- Fixed `MDBX_TXN_FULL` and regressive `MDBX_KEYEXIST` during large transaction commit with `MDBX_LIFORECLAIM` (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123).
|
||||
- Fixed auto-recovery (`weak->steady` with the same boot-id) when Database size at last weak checkpoint is large than at last steady checkpoint.
|
||||
- Fixed operation on systems with unusual small/large page size, including PowerPC (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/157).
|
||||
- Fixed operation on systems with unusual small/large page size, including PowerPC (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/157).
|
||||
|
||||
|
||||
## v0.9.2 at 2020-11-27
|
||||
@ -738,11 +975,11 @@ Fixes:
|
||||
- Fixed copy&paste typos.
|
||||
- Fixed minor false-positive GCC warning.
|
||||
- Added workaround for broken `DEFINE_ENUM_FLAG_OPERATORS` from Windows SDK.
|
||||
- Fixed cursor state after multimap/dupsort repeated deletes (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/121).
|
||||
- Fixed cursor state after multimap/dupsort repeated deletes (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/121).
|
||||
- Added `SIGPIPE` suppression for internal thread during `mdbx_env_copy()`.
|
||||
- Fixed extra-rare `MDBX_KEY_EXIST` error during `mdbx_commit()` (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/131).
|
||||
- Fixed spilled pages checking (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/126).
|
||||
- Fixed `mdbx_load` for 'plain text' and without `-s name` cases (https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/136).
|
||||
- Fixed extra-rare `MDBX_KEY_EXIST` error during `mdbx_commit()` (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/131).
|
||||
- Fixed spilled pages checking (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/126).
|
||||
- Fixed `mdbx_load` for 'plain text' and without `-s name` cases (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/136).
|
||||
- Fixed save/restore/commit of cursors for nested transactions.
|
||||
- Fixed cursors state in rare/special cases (move next beyond end-of-data, after deletion and so on).
|
||||
- Added workaround for MSVC 19.28 (Visual Studio 16.8) (but may still hang during compilation).
|
||||
|
||||
@ -353,7 +353,7 @@ named mutexes are used.
|
||||
Historically, _libmdbx_ is a deeply revised and extended descendant of the
|
||||
[Lightning Memory-Mapped Database](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database).
|
||||
At first the development was carried out within the
|
||||
[ReOpenLDAP](https://web.archive.org/web/20220414235959/https://github.com/erthink/ReOpenLDAP) project. About a
|
||||
[ReOpenLDAP](https://web.archive.org/web/https://github.com/erthink/ReOpenLDAP) project. About a
|
||||
year later _libmdbx_ was separated into a standalone project, which was
|
||||
[presented at Highload++ 2015
|
||||
conference](http://www.highload.ru/2015/abstracts/1831.html).
|
||||
@ -435,7 +435,7 @@ unexpected or broken down.
|
||||
|
||||
### Testing
|
||||
The amalgamated source code does not contain any tests for or several reasons.
|
||||
Please read [the explanation](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/issues/214#issuecomment-870717981) and don't ask to alter this.
|
||||
Please read [the explanation](https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/214#issuecomment-870717981) and don't ask to alter this.
|
||||
So for testing _libmdbx_ itself you need a full source code, i.e. the clone of a git repository, there is no option.
|
||||
|
||||
The full source code of _libmdbx_ has a [`test` subdirectory](https://gitflic.ru/project/erthink/libmdbx/tree/master/test) with minimalistic test "framework".
|
||||
@ -618,7 +618,7 @@ Bindings
|
||||
| Rust | [libmdbx-rs](https://github.com/vorot93/libmdbx-rs) | [Artem Vorotnikov](https://github.com/vorot93) |
|
||||
| Rust | [mdbx](https://crates.io/crates/mdbx) | [gcxfd](https://github.com/gcxfd) |
|
||||
| Java | [mdbxjni](https://github.com/castortech/mdbxjni) | [Castor Technologies](https://castortech.com/) |
|
||||
| Python (draft) | [python-bindings](https://web.archive.org/web/20220414235959/https://github.com/erthink/libmdbx/commits/python-bindings) branch | [Noel Kuntze](https://github.com/Thermi)
|
||||
| Python (draft) | [python-bindings](https://web.archive.org/web/https://github.com/erthink/libmdbx/commits/python-bindings) branch | [Noel Kuntze](https://github.com/Thermi)
|
||||
| .NET (obsolete) | [mdbx.NET](https://github.com/wangjia184/mdbx.NET) | [Jerry Wang](https://github.com/wangjia184) |
|
||||
|
||||
<!-- section-end -->
|
||||
|
||||
@ -1 +1 @@
|
||||
0.12.1.0
|
||||
0.12.2.0
|
||||
|
||||
@ -13,7 +13,9 @@
|
||||
## limitations under the License.
|
||||
##
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.8.2)
|
||||
cmake_minimum_required(VERSION 3.0.2)
|
||||
elseif(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
cmake_minimum_required(VERSION 3.8.2)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
@ -203,9 +205,38 @@ endif()
|
||||
|
||||
if(NOT CMAKE_SYSTEM_ARCH)
|
||||
if(CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID)
|
||||
set(CMAKE_SYSTEM_ARCH "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID}")
|
||||
string(TOLOWER "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ARCHITECTURE_ID}" CMAKE_SYSTEM_ARCH)
|
||||
if(CMAKE_SYSTEM_ARCH STREQUAL "x86")
|
||||
set(X86_32 TRUE)
|
||||
elseif(CMAKE_SYSTEM_ARCH STREQUAL "x86_64" OR CMAKE_SYSTEM_ARCH STREQUAL "x64")
|
||||
set(X86_64 TRUE)
|
||||
set(CMAKE_SYSTEM_ARCH "x86_64")
|
||||
elseif(CMAKE_SYSTEM_ARCH MATCHES "^(aarch.*|arm.*)")
|
||||
if(CMAKE_TARGET_BITNESS EQUAL 64)
|
||||
set(AARCH64 TRUE)
|
||||
else()
|
||||
set(ARM32 TRUE)
|
||||
endif()
|
||||
endif()
|
||||
elseif(CMAKE_ANDROID_ARCH_ABI)
|
||||
set(CMAKE_SYSTEM_ARCH "${CMAKE_ANDROID_ARCH_ABI}")
|
||||
if(CMAKE_SYSTEM_ARCH STREQUAL "x86")
|
||||
set(X86_32 TRUE)
|
||||
elseif(CMAKE_SYSTEM_ARCH STREQUAL "x86_64")
|
||||
set(X86_64 TRUE)
|
||||
elseif(CMAKE_SYSTEM_ARCH MATCHES "^(aarch.*|AARCH.*|arm.*|ARM.*)")
|
||||
if(CMAKE_TARGET_BITNESS EQUAL 64)
|
||||
set(AARCH64 TRUE)
|
||||
else()
|
||||
set(ARM32 TRUE)
|
||||
endif()
|
||||
elseif(CMAKE_SYSTEM_ARCH MATCHES "^(mips|MIPS).*")
|
||||
if(CMAKE_TARGET_BITNESS EQUAL 64)
|
||||
set(MIPS64 TRUE)
|
||||
else()
|
||||
set(MIPS32 TRUE)
|
||||
endif()
|
||||
endif()
|
||||
elseif(CMAKE_COMPILER_IS_ELBRUSC OR CMAKE_COMPILER_IS_ELBRUSCXX
|
||||
OR CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_ID STREQUAL "LCC"
|
||||
OR CMAKE_SYSTEM_PROCESSOR MATCHES "e2k.*|E2K.*|elbrus.*|ELBRUS.*")
|
||||
@ -929,12 +960,13 @@ endmacro(setup_compile_flags)
|
||||
macro(probe_libcxx_filesystem)
|
||||
if(CMAKE_CXX_COMPILER_LOADED AND NOT DEFINED LIBCXX_FILESYSTEM)
|
||||
list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_11 HAS_CXX11)
|
||||
if(NOT HAS_CXX11 LESS 0)
|
||||
if(NOT HAS_CXX11 LESS 0 OR CXX_FALLBACK_GNU11 OR CXX_FALLBACK_11)
|
||||
include(CMakePushCheckState)
|
||||
include(CheckCXXSourceCompiles)
|
||||
cmake_push_check_state()
|
||||
set(stdfs_probe_save_libraries ${CMAKE_REQUIRED_LIBRARIES})
|
||||
set(stdfs_probe_save_flags ${CMAKE_REQUIRED_FLAGS})
|
||||
set(stdfs_probe_flags ${CMAKE_REQUIRED_FLAGS})
|
||||
set(stdfs_probe_save_link_options ${CMAKE_REQUIRED_LINK_OPTIONS})
|
||||
unset(stdfs_probe_clear_cxx_standard)
|
||||
if(NOT DEFINED CMAKE_CXX_STANDARD)
|
||||
@ -945,18 +977,23 @@ macro(probe_libcxx_filesystem)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
elseif(NOT HAS_CXX14 LESS 0)
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
else()
|
||||
elseif(NOT HAS_CXX11 LESS 0)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
elseif(CXX_FALLBACK_GNU11)
|
||||
set(stdfs_probe_flags ${stdfs_probe_flags} "-std=gnu++11")
|
||||
else()
|
||||
set(stdfs_probe_flags ${stdfs_probe_flags} "-std=c++11")
|
||||
endif()
|
||||
set(stdfs_probe_clear_cxx_standard ON)
|
||||
endif()
|
||||
if(CMAKE_COMPILER_IS_ELBRUSCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1.25.23)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.14)
|
||||
set(CMAKE_REQUIRED_FLAGS ${stdfs_probe_save_flags} "-Wl,--allow-multiple-definition")
|
||||
set(stdfs_probe_flags ${stdfs_probe_flags} "-Wl,--allow-multiple-definition")
|
||||
else()
|
||||
set(CMAKE_REQUIRED_LINK_OPTIONS ${stdfs_probe_save_link_options} "-Wl,--allow-multiple-definition")
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS ${stdfs_probe_flags})
|
||||
|
||||
set(stdfs_probe_code [[
|
||||
#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && defined(__clang__) && __clang_major__ < 4
|
||||
@ -1037,6 +1074,7 @@ macro(probe_libcxx_filesystem)
|
||||
unset(stdfs_probe_clear_cxx_standard)
|
||||
unset(stdfs_probe_save_link_options)
|
||||
unset(stdfs_probe_save_flags)
|
||||
unset(stdfs_probe_flags)
|
||||
unset(stdfs_probe_save_libraries)
|
||||
unset(stdfs_probe_code)
|
||||
unset(stdfs_probe_rc)
|
||||
|
||||
@ -13,7 +13,9 @@
|
||||
## limitations under the License.
|
||||
##
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.8.2)
|
||||
cmake_minimum_required(VERSION 3.0.2)
|
||||
elseif(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
cmake_minimum_required(VERSION 3.8.2)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
|
||||
@ -13,7 +13,9 @@
|
||||
## limitations under the License.
|
||||
##
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.8.2)
|
||||
cmake_minimum_required(VERSION 3.0.2)
|
||||
elseif(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
cmake_minimum_required(VERSION 3.8.2)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
|
||||
@ -27,6 +27,12 @@
|
||||
#cmakedefine01 MDBX_TRUST_RTC
|
||||
#endif
|
||||
#cmakedefine01 MDBX_DISABLE_VALIDATION
|
||||
#cmakedefine01 MDBX_AVOID_MSYNC
|
||||
#cmakedefine01 MDBX_ENABLE_REFUND
|
||||
#cmakedefine01 MDBX_ENABLE_MADVISE
|
||||
#cmakedefine01 MDBX_ENABLE_BIGFOOT
|
||||
#cmakedefine01 MDBX_ENABLE_PGOP_STAT
|
||||
#cmakedefine01 MDBX_ENABLE_PROFGC
|
||||
|
||||
/* Windows */
|
||||
#cmakedefine01 MDBX_WITHOUT_MSVC_CRT
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
.\" Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
|
||||
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
|
||||
.TH MDBX_CHK 1 "2022-08-24" "MDBX 0.12.1"
|
||||
.TH MDBX_CHK 1 "2022-11-11" "MDBX 0.12.2"
|
||||
.SH NAME
|
||||
mdbx_chk \- MDBX checking tool
|
||||
.SH SYNOPSIS
|
||||
@ -81,6 +81,13 @@ Turn to a specified meta-page on successful check.
|
||||
.BR \-T
|
||||
Turn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!
|
||||
.TP
|
||||
.BR \-u
|
||||
Warms up the DB before checking via notifying OS kernel of subsequent access to the database pages.
|
||||
.TP
|
||||
.BR \-U
|
||||
Warms up the DB before checking, notifying the OS kernel of subsequent access to the database pages,
|
||||
then forcibly loads ones by sequential access and tries to lock database pages in memory.
|
||||
.TP
|
||||
.BR \-n
|
||||
Open MDBX environment(s) which do not use subdirectories.
|
||||
This is legacy option. For now MDBX handles this automatically.
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
.\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved.
|
||||
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
|
||||
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
|
||||
.TH MDBX_COPY 1 "2022-08-24" "MDBX 0.12.1"
|
||||
.TH MDBX_COPY 1 "2022-11-11" "MDBX 0.12.2"
|
||||
.SH NAME
|
||||
mdbx_copy \- MDBX environment copy tool
|
||||
.SH SYNOPSIS
|
||||
@ -45,6 +45,13 @@ or unused pages will be omitted from the copy. This option will
|
||||
slow down the backup process as it is more CPU-intensive.
|
||||
Currently it fails if the environment has suffered a page leak.
|
||||
.TP
|
||||
.BR \-u
|
||||
Warms up the DB before copying via notifying OS kernel of subsequent access to the database pages.
|
||||
.TP
|
||||
.BR \-U
|
||||
Warms up the DB before copying, notifying the OS kernel of subsequent access to the database pages,
|
||||
then forcibly loads ones by sequential access and tries to lock database pages in memory.
|
||||
.TP
|
||||
.BR \-n
|
||||
Open MDBX environment(s) which do not use subdirectories.
|
||||
This is legacy option. For now MDBX handles this automatically.
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
.\" Copyright 2021-2022 Leonid Yuriev <leo@yuriev.ru>.
|
||||
.\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved.
|
||||
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
|
||||
.TH MDBX_DROP 1 "2022-08-24" "MDBX 0.12.1"
|
||||
.TH MDBX_DROP 1 "2022-11-11" "MDBX 0.12.2"
|
||||
.SH NAME
|
||||
mdbx_drop \- MDBX database delete tool
|
||||
.SH SYNOPSIS
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
.\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved.
|
||||
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
|
||||
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
|
||||
.TH MDBX_DUMP 1 "2022-08-24" "MDBX 0.12.1"
|
||||
.TH MDBX_DUMP 1 "2022-11-11" "MDBX 0.12.2"
|
||||
.SH NAME
|
||||
mdbx_dump \- MDBX environment export tool
|
||||
.SH SYNOPSIS
|
||||
@ -66,6 +66,13 @@ Dump a specific subdatabase. If no database is specified, only the main database
|
||||
.BR \-r
|
||||
Rescure mode. Ignore some errors to dump corrupted DB.
|
||||
.TP
|
||||
.BR \-u
|
||||
Warms up the DB before dumping via notifying OS kernel of subsequent access to the database pages.
|
||||
.TP
|
||||
.BR \-U
|
||||
Warms up the DB before dumping, notifying the OS kernel of subsequent access to the database pages,
|
||||
then forcibly loads ones by sequential access and tries to lock database pages in memory.
|
||||
.TP
|
||||
.BR \-n
|
||||
Dump an MDBX database which does not use subdirectories.
|
||||
This is legacy option. For now MDBX handles this automatically.
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
.\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved.
|
||||
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
|
||||
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
|
||||
.TH MDBX_LOAD 1 "2022-08-24" "MDBX 0.12.1"
|
||||
.TH MDBX_LOAD 1 "2022-11-11" "MDBX 0.12.2"
|
||||
.SH NAME
|
||||
mdbx_load \- MDBX environment import tool
|
||||
.SH SYNOPSIS
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
.\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved.
|
||||
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
|
||||
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
|
||||
.TH MDBX_STAT 1 "2022-08-24" "MDBX 0.12.1"
|
||||
.TH MDBX_STAT 1 "2022-11-11" "MDBX 0.12.2"
|
||||
.SH NAME
|
||||
mdbx_stat \- MDBX environment status tool
|
||||
.SH SYNOPSIS
|
||||
|
||||
6532
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.c
vendored
6532
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.c
vendored
File diff suppressed because it is too large
Load Diff
537
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.c++
vendored
537
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.c++
vendored
@ -12,7 +12,7 @@
|
||||
* <http://www.OpenLDAP.org/license.html>. */
|
||||
|
||||
#define xMDBX_ALLOY 1
|
||||
#define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
|
||||
#define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
|
||||
#ifdef MDBX_CONFIG_H
|
||||
#include MDBX_CONFIG_H
|
||||
#endif
|
||||
@ -127,7 +127,11 @@
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
||||
#define UNICODE
|
||||
#endif /* UNICODE */
|
||||
|
||||
#include "mdbx.h++"
|
||||
/*
|
||||
@ -194,7 +198,7 @@
|
||||
#define SSIZE_MAX INTPTR_MAX
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
|
||||
#define MDBX_WORDBITS 64
|
||||
#else
|
||||
#define MDBX_WORDBITS 32
|
||||
@ -367,10 +371,6 @@ __extern_C key_t ftok(const char *, int);
|
||||
#elif _WIN32_WINNT < 0x0500
|
||||
#error At least 'Windows 2000' API is required for libmdbx.
|
||||
#endif /* _WIN32_WINNT */
|
||||
#if (defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* MinGW */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif /* WIN32_LEAN_AND_MEAN */
|
||||
@ -394,8 +394,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
@ -1147,9 +1149,6 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@ -1180,18 +1179,16 @@ typedef pthread_mutex_t osal_fastmutex_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* OS abstraction layer stuff */
|
||||
|
||||
MDBX_INTERNAL_VAR unsigned sys_pagesize;
|
||||
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_allocation_granularity;
|
||||
|
||||
/* Get the size of a memory page for the system.
|
||||
* This is the basic size that the platform's memory manager uses, and is
|
||||
* fundamental to the use of memory-mapped files. */
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
|
||||
osal_syspagesize(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGE_SIZE);
|
||||
#endif
|
||||
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
|
||||
return sys_pagesize;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -1230,8 +1227,140 @@ typedef union osal_srwlock {
|
||||
} osal_srwlock_t;
|
||||
#endif /* Windows */
|
||||
|
||||
#ifndef MDBX_HAVE_PWRITEV
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
|
||||
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
|
||||
/* FIXME: add checks for IOS versions, etc */
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
|
||||
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
|
||||
typedef struct ior_item {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
OVERLAPPED ov;
|
||||
#define ior_svg_gap4terminator 1
|
||||
#define ior_sgv_element FILE_SEGMENT_ELEMENT
|
||||
#else
|
||||
size_t offset;
|
||||
#if MDBX_HAVE_PWRITEV
|
||||
size_t sgvcnt;
|
||||
#define ior_svg_gap4terminator 0
|
||||
#define ior_sgv_element struct iovec
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
#endif /* !Windows */
|
||||
union {
|
||||
MDBX_val single;
|
||||
#if defined(ior_sgv_element)
|
||||
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
|
||||
#endif /* ior_sgv_element */
|
||||
};
|
||||
} ior_item_t;
|
||||
|
||||
typedef struct osal_ioring {
|
||||
unsigned slots_left;
|
||||
unsigned allocated;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define IOR_DIRECT 1
|
||||
#define IOR_OVERLAPPED 2
|
||||
#define IOR_STATE_LOCKED 1
|
||||
unsigned pagesize;
|
||||
unsigned last_sgvcnt;
|
||||
size_t last_bytes;
|
||||
uint8_t flags, state, pagesize_ln2;
|
||||
unsigned event_stack;
|
||||
HANDLE *event_pool;
|
||||
volatile LONG async_waiting;
|
||||
volatile LONG async_completed;
|
||||
HANDLE async_done;
|
||||
|
||||
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#elif MDBX_HAVE_PWRITEV
|
||||
unsigned last_bytes;
|
||||
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#else
|
||||
#define ior_last_sgvcnt(ior, item) (1)
|
||||
#define ior_last_bytes(ior, item) (item)->single.iov_len
|
||||
#endif /* !Windows */
|
||||
mdbx_filehandle_t fd;
|
||||
ior_item_t *last;
|
||||
ior_item_t *pool;
|
||||
char *boundary;
|
||||
} osal_ioring_t;
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/* Actually this is not ioring for now, but on the way. */
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint8_t flags,
|
||||
#endif /* Windows */
|
||||
mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
|
||||
void *data, const size_t bytes);
|
||||
typedef struct osal_ioring_write_result {
|
||||
int err;
|
||||
unsigned wops;
|
||||
} osal_ioring_write_result_t;
|
||||
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
|
||||
osal_ioring_write(osal_ioring_t *ior);
|
||||
|
||||
typedef struct iov_ctx iov_ctx_t;
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_walk(
|
||||
osal_ioring_t *ior, iov_ctx_t *ctx,
|
||||
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_left(const osal_ioring_t *ior) {
|
||||
return ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_used(const osal_ioring_t *ior) {
|
||||
return ior->allocated - ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int
|
||||
osal_ioring_reserve(osal_ioring_t *ior, size_t items, size_t bytes) {
|
||||
items = (items > 32) ? items : 32;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
const size_t npages = bytes >> ior->pagesize_ln2;
|
||||
items = (items > npages) ? items : npages;
|
||||
#else
|
||||
(void)bytes;
|
||||
#endif
|
||||
items = (items < 65536) ? items : 65536;
|
||||
if (likely(ior->allocated >= items))
|
||||
return MDBX_SUCCESS;
|
||||
return osal_ioring_resize(ior, items);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* libc compatibility stuff */
|
||||
|
||||
@ -1257,10 +1386,53 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
|
||||
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
|
||||
|
||||
/* max bytes to write in one call */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x01000000)
|
||||
#if defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x10000000)
|
||||
#elif defined(_WIN32)
|
||||
#define MAX_WRITE UINT32_C(0x04000000)
|
||||
#else
|
||||
#define MAX_WRITE UINT32_C(0x3fff0000)
|
||||
#define MAX_WRITE UINT32_C(0x3f000000)
|
||||
|
||||
#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \
|
||||
!defined(__ANDROID_API__)
|
||||
#define MDBX_F_SETLK F_SETLK64
|
||||
#define MDBX_F_SETLKW F_SETLKW64
|
||||
#define MDBX_F_GETLK F_GETLK64
|
||||
#if (__GLIBC_PREREQ(2, 28) && \
|
||||
(defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \
|
||||
defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \
|
||||
defined(fcntl64)
|
||||
#define MDBX_FCNTL fcntl64
|
||||
#else
|
||||
#define MDBX_FCNTL fcntl
|
||||
#endif
|
||||
#define MDBX_STRUCT_FLOCK struct flock64
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
|
||||
#endif /* OFF_T_MAX */
|
||||
#else
|
||||
#define MDBX_F_SETLK F_SETLK
|
||||
#define MDBX_F_SETLKW F_SETLKW
|
||||
#define MDBX_F_GETLK F_GETLK
|
||||
#define MDBX_FCNTL fcntl
|
||||
#define MDBX_STRUCT_FLOCK struct flock
|
||||
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */
|
||||
|
||||
#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
|
||||
#else
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX \
|
||||
(((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
|
||||
#endif /* OFF_T_MAX */
|
||||
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
@ -1303,8 +1475,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
|
||||
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
|
||||
int iovcnt, uint64_t offset,
|
||||
size_t expected_written);
|
||||
size_t sgvcnt, uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
|
||||
uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
|
||||
@ -1332,12 +1503,16 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
enum osal_openfile_purpose {
|
||||
MDBX_OPEN_DXB_READ = 0,
|
||||
MDBX_OPEN_DXB_LAZY = 1,
|
||||
MDBX_OPEN_DXB_DSYNC = 2,
|
||||
MDBX_OPEN_LCK = 3,
|
||||
MDBX_OPEN_COPY = 4,
|
||||
MDBX_OPEN_DELETE = 5
|
||||
MDBX_OPEN_DXB_READ,
|
||||
MDBX_OPEN_DXB_LAZY,
|
||||
MDBX_OPEN_DXB_DSYNC,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_OPEN_DXB_OVERLAPPED,
|
||||
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
|
||||
#endif /* Windows */
|
||||
MDBX_OPEN_LCK,
|
||||
MDBX_OPEN_COPY,
|
||||
MDBX_OPEN_DELETE
|
||||
};
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
||||
@ -1371,7 +1546,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
|
||||
MDBX_INTERNAL_FUNC int
|
||||
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
|
||||
#endif /* Windows */
|
||||
MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
|
||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||
size_t length,
|
||||
enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
|
||||
@ -1414,9 +1589,16 @@ osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||
#endif /* !Windows */
|
||||
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
|
||||
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline uint32_t
|
||||
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
|
||||
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
|
||||
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* lck stuff */
|
||||
@ -1526,6 +1708,9 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#define OSAL_MB2WIDE(FROM, TO) \
|
||||
do { \
|
||||
const char *const from_tmp = (FROM); \
|
||||
@ -1659,6 +1844,11 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
|
||||
|
||||
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
|
||||
|
||||
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
|
||||
PUCHAR OverlappedRangeStart,
|
||||
ULONG Length);
|
||||
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -1773,6 +1963,13 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
/** Controls profiling of GC search and updates. */
|
||||
#ifndef MDBX_ENABLE_PROFGC
|
||||
#define MDBX_ENABLE_PROFGC 0
|
||||
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
|
||||
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_PROFGC */
|
||||
|
||||
/** Controls gathering statistics for page operations. */
|
||||
#ifndef MDBX_ENABLE_PGOP_STAT
|
||||
#define MDBX_ENABLE_PGOP_STAT 1
|
||||
@ -1792,7 +1989,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
|
||||
/** Controls use of POSIX madvise() hints and friends. */
|
||||
/** Controls using of POSIX' madvise() and/or similar hints. */
|
||||
#ifndef MDBX_ENABLE_MADVISE
|
||||
#define MDBX_ENABLE_MADVISE 1
|
||||
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
|
||||
@ -1821,23 +2018,22 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
|
||||
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
|
||||
|
||||
/** Basically, this build-option is for TODO. Guess it should be replaced
|
||||
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
|
||||
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
|
||||
* This should be by-default on Linux and may-be other systems
|
||||
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
|
||||
* properly LRU tracking and async writing on-demand.
|
||||
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
|
||||
* spilling with msync(MS_ASYNC). */
|
||||
#ifndef MDBX_FAKE_SPILL_WRITEMAP
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
|
||||
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
|
||||
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
|
||||
* msync() to persist data. This is by-default on Linux and other systems where
|
||||
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
|
||||
* = Tracking of dirty pages but with LRU labels for spilling and explicit
|
||||
* persist ones by write(). This may be reasonable for systems which low
|
||||
* performance of msync() and/or LRU tracking. */
|
||||
#ifndef MDBX_AVOID_MSYNC
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MDBX_AVOID_MSYNC 1
|
||||
#else
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 0
|
||||
#define MDBX_AVOID_MSYNC 0
|
||||
#endif
|
||||
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
|
||||
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
|
||||
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
|
||||
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
|
||||
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
|
||||
#endif /* MDBX_AVOID_MSYNC */
|
||||
|
||||
/** Controls sort order of internal page number lists.
|
||||
* This mostly experimental/advanced option with not for regular MDBX users.
|
||||
@ -1894,6 +2090,27 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#ifndef MDBX_HAVE_C11ATOMICS
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
|
||||
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
|
||||
* for runtime dispatching depending on the CPU's capabilities. */
|
||||
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
|
||||
#if defined(__APPLE__) || defined(BIONIC)
|
||||
/* Never use any modern features on Apple's or Google's OSes
|
||||
* since a lot of troubles with compatibility and/or performance */
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif defined(__e2k__)
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif __has_builtin(__builtin_cpu_supports) || \
|
||||
defined(__BUILTIN_CPU_SUPPORTS__) || \
|
||||
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
|
||||
#else
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#endif
|
||||
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
|
||||
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
|
||||
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
|
||||
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/** Win32 File Locking API for \ref MDBX_LOCKING */
|
||||
@ -1949,7 +2166,10 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
|
||||
/** Advanced: Using POSIX OFD-locks (autodetection by default). */
|
||||
#ifndef MDBX_USE_OFDLOCKS
|
||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \
|
||||
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \
|
||||
defined(F_OFD_GETLK)) || \
|
||||
(defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64))) && \
|
||||
!defined(MDBX_SAFE4QEMU) && \
|
||||
!defined(__sun) /* OFD-lock are broken on Solaris */
|
||||
#define MDBX_USE_OFDLOCKS 1
|
||||
@ -2035,13 +2255,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
|
||||
#ifndef MDBX_64BIT_CAS
|
||||
#if defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2053,6 +2267,12 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2287,7 +2507,7 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
||||
/* FROZEN: The version number for a database's datafile format. */
|
||||
#define MDBX_DATA_VERSION 3
|
||||
/* The version number for a database's lockfile format. */
|
||||
#define MDBX_LOCK_VERSION 4
|
||||
#define MDBX_LOCK_VERSION 5
|
||||
|
||||
/* handle for the DB used to track free pages. */
|
||||
#define FREE_DBI 0
|
||||
@ -2491,14 +2711,34 @@ typedef struct MDBX_page {
|
||||
: PAGETYPE_WHOLE(p))
|
||||
|
||||
/* Size of the page header, excluding dynamic data at the end */
|
||||
#define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
|
||||
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
typedef struct profgc_stat {
|
||||
/* Монотонное время по "настенным часам"
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_monotonic;
|
||||
/* Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
||||
uint64_t xtime_monotonic;
|
||||
/* Процессорное время в режим пользователя
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_cpu;
|
||||
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
||||
uint32_t rsteps;
|
||||
/* Количество запросов на выделение последовательностей страниц,
|
||||
* т.е. когда запрашивает выделение больше одной страницы */
|
||||
uint32_t xpages;
|
||||
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
||||
uint32_t spe_counter;
|
||||
/* page faults (hard page faults) */
|
||||
uint32_t majflt;
|
||||
} profgc_stat_t;
|
||||
|
||||
/* Statistics of page operations overall of all (running, completed and aborted)
|
||||
* transactions */
|
||||
typedef struct {
|
||||
typedef struct pgop_stat {
|
||||
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
||||
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
||||
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
||||
@ -2510,10 +2750,31 @@ typedef struct {
|
||||
MDBX_atomic_uint64_t
|
||||
wops; /* Number of explicit write operations (not a pages) to a disk */
|
||||
MDBX_atomic_uint64_t
|
||||
gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
|
||||
unit/scale is platform-depended, see osal_monotime(). */
|
||||
} MDBX_pgop_stat_t;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||
msync; /* Number of explicit msync/flush-to-disk operations */
|
||||
MDBX_atomic_uint64_t
|
||||
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
||||
|
||||
/* Статистика для профилирования GC.
|
||||
* Логически эти данные может быть стоит вынести в другую структуру,
|
||||
* но разница будет сугубо косметическая. */
|
||||
struct {
|
||||
/* Затраты на поддержку данных пользователя */
|
||||
profgc_stat_t work;
|
||||
/* Затраты на поддержку и обновления самой GC */
|
||||
profgc_stat_t self;
|
||||
/* Итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски */
|
||||
uint32_t wloops;
|
||||
/* Итерации слияния записей GC */
|
||||
uint32_t coalescences;
|
||||
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t wipes;
|
||||
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t flushes;
|
||||
/* Попытки пнуть тормозящих читателей */
|
||||
uint32_t kicks;
|
||||
} gc_prof;
|
||||
} pgop_stat_t;
|
||||
|
||||
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
||||
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
||||
@ -2644,13 +2905,16 @@ typedef struct MDBX_lockinfo {
|
||||
/* Marker to distinguish uniqueness of DB/CLK. */
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
||||
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
||||
* lock at leat one page, so therefore madvise() could return EINVAL. */
|
||||
MDBX_atomic_uint32_t mti_mlcnt[2];
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
/* Statistics of costly ops of all (running, completed and aborted)
|
||||
* transactions */
|
||||
MDBX_pgop_stat_t mti_pgop_stat;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
pgop_stat_t mti_pgop_stat;
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
@ -2661,20 +2925,20 @@ typedef struct MDBX_lockinfo {
|
||||
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
||||
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
||||
* or clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
MDBX_atomic_uint64_t mti_unsynced_pages;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
||||
pgno_t mti_readahead_anchor;
|
||||
|
||||
@ -2777,7 +3041,7 @@ typedef struct MDBX_dp {
|
||||
MDBX_page *ptr;
|
||||
pgno_t pgno;
|
||||
union {
|
||||
unsigned extra;
|
||||
uint32_t extra;
|
||||
__anonymous_struct_extension__ struct {
|
||||
unsigned multi : 1;
|
||||
unsigned lru : 31;
|
||||
@ -2787,10 +3051,10 @@ typedef struct MDBX_dp {
|
||||
|
||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||
typedef struct MDBX_dpl {
|
||||
unsigned sorted;
|
||||
unsigned length;
|
||||
unsigned pages_including_loose; /* number of pages, but not an entries. */
|
||||
unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
size_t sorted;
|
||||
size_t length;
|
||||
size_t pages_including_loose; /* number of pages, but not an entries. */
|
||||
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
||||
@ -2809,11 +3073,17 @@ typedef struct MDBX_dpl {
|
||||
((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
||||
|
||||
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
||||
#define MDBX_PNL_SIZE(pl) ((pl)[0])
|
||||
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
||||
#define MDBX_PNL_SETSIZE(pl, size) \
|
||||
do { \
|
||||
const size_t __size = size; \
|
||||
assert(__size < INT_MAX); \
|
||||
(pl)[0] = (pgno_t)__size; \
|
||||
} while (0)
|
||||
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
||||
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
||||
|
||||
#if MDBX_PNL_ASCENDING
|
||||
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
||||
@ -2823,8 +3093,8 @@ typedef struct MDBX_dpl {
|
||||
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
||||
#endif
|
||||
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Internal structures */
|
||||
@ -2843,6 +3113,9 @@ typedef struct MDBX_dbx {
|
||||
|
||||
typedef struct troika {
|
||||
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
||||
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
||||
uint32_t unused_pad;
|
||||
#endif
|
||||
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
|
||||
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
|
||||
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
|
||||
@ -2864,9 +3137,13 @@ struct MDBX_txn {
|
||||
/* Additional flag for sync_locked() */
|
||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||
|
||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
||||
|
||||
#define TXN_FLAGS \
|
||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
||||
MDBX_TXN_FROZEN_RE)
|
||||
|
||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||
@ -2925,18 +3202,18 @@ struct MDBX_txn {
|
||||
struct {
|
||||
meta_troika_t troika;
|
||||
/* In write txns, array of cursors for each DB */
|
||||
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
pgno_t *relist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
pgno_t loose_refund_wl /* FIXME: describe */;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
||||
* Includes ancestor txns' dirty pages not hidden by other txns'
|
||||
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
||||
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
||||
unsigned dirtyroom;
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
size_t dirtyroom;
|
||||
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
||||
MDBX_dpl *dirtylist;
|
||||
/* The list of reclaimed txns from GC */
|
||||
@ -2947,8 +3224,8 @@ struct MDBX_txn {
|
||||
* in this transaction, linked through `mp_next`. */
|
||||
MDBX_page *loose_pages;
|
||||
/* Number of loose pages (tw.loose_pages) */
|
||||
unsigned loose_count;
|
||||
unsigned spill_least_removed;
|
||||
size_t loose_count;
|
||||
size_t spill_least_removed;
|
||||
/* The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
* shifted left by 1, deleted slots have the LSB set. */
|
||||
@ -3002,9 +3279,7 @@ struct MDBX_cursor {
|
||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||
#define C_RECLAIMING 0x20 /* GC lookup is prohibited */
|
||||
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
|
||||
uint8_t mc_flags; /* see mdbx_cursor */
|
||||
uint8_t mc_flags;
|
||||
|
||||
/* Cursor checking flags. */
|
||||
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
||||
@ -3015,7 +3290,7 @@ struct MDBX_cursor {
|
||||
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
||||
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
||||
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
||||
uint8_t mc_checking; /* page checking level */
|
||||
uint8_t mc_checking;
|
||||
|
||||
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
||||
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
||||
@ -3064,14 +3339,20 @@ struct MDBX_env {
|
||||
osal_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
#define me_fd4data me_ioring.fd
|
||||
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE me_overlapped_fd, me_data_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
struct MDBX_lockinfo *me_lck;
|
||||
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
atomic_pgno_t me_mlocked_pgno;
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
uint16_t me_merge_threshold,
|
||||
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
||||
@ -3143,6 +3424,7 @@ struct MDBX_env {
|
||||
unsigned me_dp_reserve_len;
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
osal_ioring_t me_ioring;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t me_remap_guard;
|
||||
@ -3168,7 +3450,7 @@ struct MDBX_env {
|
||||
#define xMDBX_DEBUG_SPILLING 0
|
||||
#endif
|
||||
#if xMDBX_DEBUG_SPILLING == 2
|
||||
unsigned debug_dirtied_est, debug_dirtied_act;
|
||||
size_t debug_dirtied_est, debug_dirtied_act;
|
||||
#endif /* xMDBX_DEBUG_SPILLING */
|
||||
|
||||
/* ------------------------------------------------- stub for lck-less mode */
|
||||
@ -3273,10 +3555,22 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
||||
#define FATAL(fmt, ...) \
|
||||
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
||||
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
||||
unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
assert_fail(msg, func, line); \
|
||||
} while (0)
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
#define ENSURE_MSG(env, expr, msg) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) \
|
||||
mdbx_assert_fail(env, msg, __func__, __LINE__); \
|
||||
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
||||
@ -3347,7 +3641,9 @@ MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
||||
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
||||
|
||||
MDBX_INTERNAL_FUNC void global_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void global_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -3468,12 +3764,12 @@ typedef struct MDBX_node {
|
||||
#error "Oops, some flags overlapped or wrong"
|
||||
#endif
|
||||
|
||||
/* max number of pages to commit in one writev() call */
|
||||
#define MDBX_COMMIT_PAGES 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
|
||||
#undef MDBX_COMMIT_PAGES
|
||||
#define MDBX_COMMIT_PAGES IOV_MAX
|
||||
#endif
|
||||
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
||||
#define MDBX_AUXILARY_IOV_MAX 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
||||
#undef MDBX_AUXILARY_IOV_MAX
|
||||
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
||||
#endif /* MDBX_AUXILARY_IOV_MAX */
|
||||
|
||||
/*
|
||||
* /
|
||||
@ -3530,20 +3826,24 @@ ceil_powerof2(size_t value, size_t granularity) {
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
||||
log2n_powerof2(size_t value) {
|
||||
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
|
||||
assert((value & -(int32_t)value) == value);
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
return __builtin_ctzl(value);
|
||||
log2n_powerof2(size_t value_uintptr) {
|
||||
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
||||
is_powerof2(value_uintptr));
|
||||
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
||||
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
||||
return __builtin_ctz(value_uint32);
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long)value);
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
||||
_BitScanForward(&index, value_uint32);
|
||||
return index;
|
||||
#else
|
||||
static const uint8_t debruijn_ctz32[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
|
||||
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -3603,7 +3903,7 @@ MDBX_MAYBE_UNUSED static void static_checks(void) {
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
|
||||
|
||||
@ -5013,6 +5313,15 @@ void txn_managed::commit() {
|
||||
MDBX_CXX20_UNLIKELY err.throw_exception();
|
||||
}
|
||||
|
||||
void txn_managed::commit(commit_latency *latency) {
|
||||
const error err =
|
||||
static_cast<MDBX_error_t>(::mdbx_txn_commit_ex(handle_, latency));
|
||||
if (MDBX_LIKELY(err.code() != MDBX_THREAD_MISMATCH))
|
||||
MDBX_CXX20_LIKELY handle_ = nullptr;
|
||||
if (MDBX_UNLIKELY(err.code() != MDBX_SUCCESS))
|
||||
MDBX_CXX20_UNLIKELY err.throw_exception();
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
bool txn::drop_map(const char *name, bool throw_if_absent) {
|
||||
|
||||
290
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.h
vendored
290
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.h
vendored
@ -75,6 +75,14 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#ifndef LIBMDBX_H
|
||||
#define LIBMDBX_H
|
||||
|
||||
#if defined(__riscv) || defined(__riscv__) || defined(__RISCV) || \
|
||||
defined(__RISCV__)
|
||||
#warning The RISC-V architecture is intentionally insecure by design. \
|
||||
Please delete this admonition at your own risk, \
|
||||
if you make such decision informed and consciously. \
|
||||
Refer to https://clck.ru/32d9xH for more information.
|
||||
#endif /* RISC-V */
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push, 1)
|
||||
#pragma warning(disable : 4548) /* expression before comma has no effect; \
|
||||
@ -830,8 +838,14 @@ enum MDBX_constants {
|
||||
#if !(defined(_WIN32) || defined(_WIN64))
|
||||
#define MDBX_LOCKNAME "/mdbx.lck"
|
||||
#else
|
||||
#define MDBX_LOCKNAME L"\\mdbx.lck"
|
||||
#endif
|
||||
#define MDBX_LOCKNAME_W L"\\mdbx.lck"
|
||||
#define MDBX_LOCKNAME_A "\\mdbx.lck"
|
||||
#ifdef UNICODE
|
||||
#define MDBX_LOCKNAME MDBX_LOCKNAME_W
|
||||
#else
|
||||
#define MDBX_LOCKNAME MDBX_LOCKNAME_A
|
||||
#endif /* UNICODE */
|
||||
#endif /* Windows */
|
||||
#endif /* MDBX_LOCKNAME */
|
||||
#ifndef MDBX_DATANAME
|
||||
/** \brief The name of the data file in the environment
|
||||
@ -839,8 +853,14 @@ enum MDBX_constants {
|
||||
#if !(defined(_WIN32) || defined(_WIN64))
|
||||
#define MDBX_DATANAME "/mdbx.dat"
|
||||
#else
|
||||
#define MDBX_DATANAME L"\\mdbx.dat"
|
||||
#endif
|
||||
#define MDBX_DATANAME_W L"\\mdbx.dat"
|
||||
#define MDBX_DATANAME_A "\\mdbx.dat"
|
||||
#ifdef UNICODE
|
||||
#define MDBX_DATANAME MDBX_DATANAME_W
|
||||
#else
|
||||
#define MDBX_DATANAME MDBX_DATANAME_A
|
||||
#endif /* UNICODE */
|
||||
#endif /* Windows */
|
||||
#endif /* MDBX_DATANAME */
|
||||
|
||||
#ifndef MDBX_LOCK_SUFFIX
|
||||
@ -848,8 +868,14 @@ enum MDBX_constants {
|
||||
#if !(defined(_WIN32) || defined(_WIN64))
|
||||
#define MDBX_LOCK_SUFFIX "-lck"
|
||||
#else
|
||||
#define MDBX_LOCK_SUFFIX L"-lck"
|
||||
#endif
|
||||
#define MDBX_LOCK_SUFFIX_W L"-lck"
|
||||
#define MDBX_LOCK_SUFFIX_A "-lck"
|
||||
#ifdef UNICODE
|
||||
#define MDBX_LOCK_SUFFIX MDBX_LOCK_SUFFIX_W
|
||||
#else
|
||||
#define MDBX_LOCK_SUFFIX MDBX_LOCK_SUFFIX_A
|
||||
#endif /* UNICODE */
|
||||
#endif /* Windows */
|
||||
#endif /* MDBX_LOCK_SUFFIX */
|
||||
|
||||
/* DEBUG & LOGGING ************************************************************/
|
||||
@ -970,8 +996,16 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_debug_flags_t)
|
||||
* called before printing the message and aborting.
|
||||
* \see mdbx_setup_debug()
|
||||
*
|
||||
* \param [in] env An environment handle returned by \ref mdbx_env_create().
|
||||
* \param [in] msg The assertion message, not including newline. */
|
||||
* \param [in] loglevel The severity of message.
|
||||
* \param [in] function The function name which emits message,
|
||||
* may be NULL.
|
||||
* \param [in] line The source code line number which emits message,
|
||||
* may be zero.
|
||||
* \param [in] fmt The printf-like format string with message.
|
||||
* \param [in] args The variable argument list respectively for the
|
||||
* format-message string passed by `fmt` argument.
|
||||
* Maybe NULL or invalid if the format-message string
|
||||
* don't contain `%`-specification of arguments. */
|
||||
typedef void MDBX_debug_func(MDBX_log_level_t loglevel, const char *function,
|
||||
int line, const char *fmt,
|
||||
va_list args) MDBX_CXX17_NOEXCEPT;
|
||||
@ -990,8 +1024,12 @@ LIBMDBX_API int mdbx_setup_debug(MDBX_log_level_t log_level,
|
||||
* called before printing the message and aborting.
|
||||
* \see mdbx_env_set_assert()
|
||||
*
|
||||
* \param [in] env An environment handle returned by mdbx_env_create().
|
||||
* \param [in] msg The assertion message, not including newline. */
|
||||
* \param [in] env An environment handle.
|
||||
* \param [in] msg The assertion message, not including newline.
|
||||
* \param [in] function The function name where the assertion check failed,
|
||||
* may be NULL.
|
||||
* \param [in] line The line number in the source file
|
||||
* where the assertion check failed, may be zero. */
|
||||
typedef void MDBX_assert_func(const MDBX_env *env, const char *msg,
|
||||
const char *function,
|
||||
unsigned line) MDBX_CXX17_NOEXCEPT;
|
||||
@ -1020,12 +1058,15 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
|
||||
const size_t bufsize);
|
||||
|
||||
/** \brief Panics with message and causes abnormal process termination. */
|
||||
LIBMDBX_API void mdbx_panic(const char *fmt, ...) MDBX_PRINTF_ARGS(1, 2);
|
||||
MDBX_NORETURN LIBMDBX_API void mdbx_panic(const char *fmt, ...)
|
||||
MDBX_PRINTF_ARGS(1, 2);
|
||||
|
||||
/** \brief Panics with asserton failed message and causes abnormal process
|
||||
* termination. */
|
||||
LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, const char *msg,
|
||||
const char *func, unsigned line);
|
||||
MDBX_NORETURN LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env,
|
||||
const char *msg,
|
||||
const char *func,
|
||||
unsigned line);
|
||||
/** end of c_debug @} */
|
||||
|
||||
/** \brief Environment flags
|
||||
@ -1593,8 +1634,7 @@ enum MDBX_put_flags_t {
|
||||
MDBX_NOOVERWRITE = UINT32_C(0x10),
|
||||
|
||||
/** Has effect only for \ref MDBX_DUPSORT databases.
|
||||
* For upsertion: don't write if the key-value pair already exist.
|
||||
* For deletion: remove all values for key. */
|
||||
* For upsertion: don't write if the key-value pair already exist. */
|
||||
MDBX_NODUPDATA = UINT32_C(0x20),
|
||||
|
||||
/** For upsertion: overwrite the current key/data pair.
|
||||
@ -2424,7 +2464,7 @@ typedef struct MDBX_stat MDBX_stat;
|
||||
/** \brief Return statistics about the MDBX environment.
|
||||
* \ingroup c_statinfo
|
||||
*
|
||||
* At least one of env or txn argument must be non-null. If txn is passed
|
||||
* At least one of `env` or `txn` argument must be non-null. If txn is passed
|
||||
* non-null then stat will be filled accordingly to the given transaction.
|
||||
* Otherwise, if txn is null, then stat will be populated by a snapshot from
|
||||
* the last committed write transaction, and at next time, other information
|
||||
@ -2495,7 +2535,9 @@ struct MDBX_envinfo {
|
||||
uint64_t mi_unsync_volume;
|
||||
/** Current auto-sync threshold, see \ref mdbx_env_set_syncbytes(). */
|
||||
uint64_t mi_autosync_threshold;
|
||||
/** Time since the last steady sync in 1/65536 of second */
|
||||
/** Time since entering to a "dirty" out-of-sync state in units of 1/65536 of
|
||||
* second. In other words, this is the time since the last non-steady commit
|
||||
* or zero if it was steady. */
|
||||
uint32_t mi_since_sync_seconds16dot16;
|
||||
/** Current auto-sync period in 1/65536 of second,
|
||||
* see \ref mdbx_env_set_syncperiod(). */
|
||||
@ -2524,8 +2566,9 @@ struct MDBX_envinfo {
|
||||
uint64_t wops; /**< Number of explicit write operations (not a pages)
|
||||
to a disk */
|
||||
uint64_t
|
||||
gcrtime_seconds16dot16; /**< Time spent loading and searching inside
|
||||
GC (aka FreeDB) in 1/65536 of second. */
|
||||
msync; /**< Number of explicit msync-to-disk operations (not a pages) */
|
||||
uint64_t
|
||||
fsync; /**< Number of explicit fsync-to-disk operations (not a pages) */
|
||||
} mi_pgop_stat;
|
||||
};
|
||||
#ifndef __cplusplus
|
||||
@ -2536,7 +2579,7 @@ typedef struct MDBX_envinfo MDBX_envinfo;
|
||||
/** \brief Return information about the MDBX environment.
|
||||
* \ingroup c_statinfo
|
||||
*
|
||||
* At least one of env or txn argument must be non-null. If txn is passed
|
||||
* At least one of `env` or `txn` argument must be non-null. If txn is passed
|
||||
* non-null then stat will be filled accordingly to the given transaction.
|
||||
* Otherwise, if txn is null, then stat will be populated by a snapshot from
|
||||
* the last committed write transaction, and at next time, other information
|
||||
@ -2782,6 +2825,94 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) {
|
||||
return mdbx_env_close_ex(env, false);
|
||||
}
|
||||
|
||||
/** \brief Warming up options
|
||||
* \ingroup c_settings
|
||||
* \anchor warmup_flags
|
||||
* \see mdbx_env_warmup() */
|
||||
enum MDBX_warmup_flags_t {
|
||||
/** By default \ref mdbx_env_warmup() just ask OS kernel to asynchronously
|
||||
* prefetch database pages. */
|
||||
MDBX_warmup_default = 0,
|
||||
|
||||
/** Peeking all pages of allocated portion of the database
|
||||
* to force ones to be loaded into memory. However, the pages are just peeks
|
||||
* sequentially, so unused pages that are in GC will be loaded in the same
|
||||
* way as those that contain payload. */
|
||||
MDBX_warmup_force = 1,
|
||||
|
||||
/** Using system calls to peeks pages instead of directly accessing ones,
|
||||
* which at the cost of additional overhead avoids killing the current
|
||||
* process by OOM-killer in a lack of memory condition.
|
||||
* \note Has effect only on POSIX (non-Windows) systems with conjunction
|
||||
* to \ref MDBX_warmup_force option. */
|
||||
MDBX_warmup_oomsafe = 2,
|
||||
|
||||
/** Try to lock database pages in memory by `mlock()` on POSIX-systems
|
||||
* or `VirtualLock()` on Windows. Please refer to description of these
|
||||
* functions for reasonability of such locking and the information of
|
||||
* effects, including the system as a whole.
|
||||
*
|
||||
* Such locking in memory requires that the corresponding resource limits
|
||||
* (e.g. `RLIMIT_RSS`, `RLIMIT_MEMLOCK` or process working set size)
|
||||
* and the availability of system RAM are sufficiently high.
|
||||
*
|
||||
* On successful, all currently allocated pages, both unused in GC and
|
||||
* containing payload, will be locked in memory until the environment closes,
|
||||
* or explicitly unblocked by using \ref MDBX_warmup_release, or the
|
||||
* database geomenry will changed, including its auto-shrinking. */
|
||||
MDBX_warmup_lock = 4,
|
||||
|
||||
/** Alters corresponding current resource limits to be enough for lock pages
|
||||
* by \ref MDBX_warmup_lock. However, this option should be used in simpliest
|
||||
* applications since takes into account only current size of this environment
|
||||
* disregarding all other factors. For real-world database application you
|
||||
* will need full-fledged management of resources and their limits with
|
||||
* respective engineering. */
|
||||
MDBX_warmup_touchlimit = 8,
|
||||
|
||||
/** Release the lock that was performed before by \ref MDBX_warmup_lock. */
|
||||
MDBX_warmup_release = 16,
|
||||
};
|
||||
#ifndef __cplusplus
|
||||
typedef enum MDBX_warmup_flags_t MDBX_warmup_flags_t;
|
||||
#else
|
||||
DEFINE_ENUM_FLAG_OPERATORS(MDBX_warmup_flags_t)
|
||||
#endif
|
||||
|
||||
/** \brief Warms up the database by loading pages into memory, optionally lock
|
||||
* ones. \ingroup c_settings
|
||||
*
|
||||
* Depending on the specified flags, notifies OS kernel about following access,
|
||||
* force loads the database pages, including locks ones in memory or releases
|
||||
* such a lock. However, the function does not analyze the b-tree nor the GC.
|
||||
* Therefore an unused pages that are in GC handled (i.e. will be loaded) in
|
||||
* the same way as those that contain payload.
|
||||
*
|
||||
* At least one of `env` or `txn` argument must be non-null.
|
||||
*
|
||||
* \param [in] env An environment handle returned
|
||||
* by \ref mdbx_env_create().
|
||||
* \param [in] txn A transaction handle returned
|
||||
* by \ref mdbx_txn_begin().
|
||||
* \param [in] flags The \ref warmup_flags, bitwise OR'ed together.
|
||||
*
|
||||
* \param [in] timeout_seconds_16dot16 Optional timeout which checking only
|
||||
* during explicitly peeking database pages
|
||||
* for loading ones if the \ref MDBX_warmup_force
|
||||
* option was spefified.
|
||||
*
|
||||
* \returns A non-zero error value on failure and 0 on success.
|
||||
* Some possible errors are:
|
||||
*
|
||||
* \retval MDBX_ENOSYS The system does not support requested
|
||||
* operation(s).
|
||||
*
|
||||
* \retval MDBX_RESULT_TRUE The specified timeout is reached during load
|
||||
* data into memory. */
|
||||
LIBMDBX_API int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn,
|
||||
MDBX_warmup_flags_t flags,
|
||||
unsigned timeout_seconds_16dot16);
|
||||
|
||||
/** \brief Set environment flags.
|
||||
* \ingroup c_settings
|
||||
*
|
||||
@ -3113,6 +3244,21 @@ mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags);
|
||||
MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t
|
||||
mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags);
|
||||
|
||||
/** \brief Returns maximal size of key-value pair to fit in a single page with
|
||||
* the given size and database flags, or -1 if pagesize is invalid.
|
||||
* \ingroup c_statinfo
|
||||
* \see db_flags */
|
||||
MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t
|
||||
mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags);
|
||||
|
||||
/** \brief Returns maximal data size in bytes to fit in a leaf-page or
|
||||
* single overflow/large-page with the given page size and database flags,
|
||||
* or -1 if pagesize is invalid.
|
||||
* \ingroup c_statinfo
|
||||
* \see db_flags */
|
||||
MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t
|
||||
mdbx_limits_valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags);
|
||||
|
||||
/** \brief Returns maximal write transaction size (i.e. limit for summary volume
|
||||
* of dirty pages) in bytes for given page size, or -1 if pagesize is invalid.
|
||||
* \ingroup c_statinfo */
|
||||
@ -3268,6 +3414,32 @@ mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags);
|
||||
MDBX_DEPRECATED MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int
|
||||
mdbx_env_get_maxkeysize(const MDBX_env *env);
|
||||
|
||||
/** \brief Returns maximal size of key-value pair to fit in a single page
|
||||
* for specified database flags.
|
||||
* \ingroup c_statinfo
|
||||
*
|
||||
* \param [in] env An environment handle returned by \ref mdbx_env_create().
|
||||
* \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY
|
||||
* and so on). \see db_flags
|
||||
*
|
||||
* \returns The maximum size of a data can write,
|
||||
* or -1 if something is wrong. */
|
||||
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int
|
||||
mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags);
|
||||
|
||||
/** \brief Returns maximal data size in bytes to fit in a leaf-page or
|
||||
* single overflow/large-page for specified database flags.
|
||||
* \ingroup c_statinfo
|
||||
*
|
||||
* \param [in] env An environment handle returned by \ref mdbx_env_create().
|
||||
* \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY
|
||||
* and so on). \see db_flags
|
||||
*
|
||||
* \returns The maximum size of a data can write,
|
||||
* or -1 if something is wrong. */
|
||||
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int
|
||||
mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags);
|
||||
|
||||
/** \brief Sets application information (a context pointer) associated with
|
||||
* the environment.
|
||||
* \see mdbx_env_get_userctx()
|
||||
@ -3546,8 +3718,8 @@ struct MDBX_commit_latency {
|
||||
/** \brief Duration of preparation (commit child transactions, update
|
||||
* sub-databases records and cursors destroying). */
|
||||
uint32_t preparation;
|
||||
/** \brief Duration of GC/freeDB handling & updation. */
|
||||
uint32_t gc;
|
||||
/** \brief Duration of GC update by wall clock. */
|
||||
uint32_t gc_wallclock;
|
||||
/** \brief Duration of internal audit if enabled. */
|
||||
uint32_t audit;
|
||||
/** \brief Duration of writing dirty/modified data pages to a filesystem,
|
||||
@ -3560,6 +3732,80 @@ struct MDBX_commit_latency {
|
||||
uint32_t ending;
|
||||
/** \brief The total duration of a commit. */
|
||||
uint32_t whole;
|
||||
/** \brief User-mode CPU time spent on GC update. */
|
||||
uint32_t gc_cputime;
|
||||
|
||||
/** \brief Информация для профилирования работы GC.
|
||||
* \note Статистика является общей для всех процессов работающих с одним
|
||||
* файлом БД и хранится в LCK-файле. Данные аккумулируются при фиксации всех
|
||||
* транзакций, но только в сборках libmdbx c установленной опцией
|
||||
* \ref MDBX_ENABLE_PROFGC. Собранная статистика возвращаются любому процессу
|
||||
* при использовании \ref mdbx_txn_commit_ex() и одновременно обнуляется
|
||||
* при завершении транзакций верхнего уровня (не вложенных). */
|
||||
struct {
|
||||
/** \brief Количество итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски. */
|
||||
uint32_t wloops;
|
||||
/** \brief Количество итераций слияния записей GC. */
|
||||
uint32_t coalescences;
|
||||
/** \brief Количество уничтожений предыдущих надежных/устойчивых
|
||||
* точек фиксации при работе в режиме \ref MDBX_UTTERLY_NOSYNC. */
|
||||
uint32_t wipes;
|
||||
/** \brief Количество принудительных фиксаций на диск
|
||||
* во избежания приращения БД при работе вне режима
|
||||
* \ref MDBX_UTTERLY_NOSYNC. */
|
||||
uint32_t flushes;
|
||||
/** \brief Количество обращений к механизму Handle-Slow-Readers
|
||||
* во избежания приращения БД.
|
||||
* \see MDBX_hsr_func */
|
||||
uint32_t kicks;
|
||||
|
||||
/** \brief Счетчик выполнения по медленному пути (slow path execution count)
|
||||
* GC ради данных пользователя. */
|
||||
uint32_t work_counter;
|
||||
/** \brief Время "по настенным часам" затраченное на чтение и поиск внутри
|
||||
* GC ради данных пользователя. */
|
||||
uint32_t work_rtime_monotonic;
|
||||
/** \brief Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC для данных пользователя,
|
||||
* включая подкачку с диска. */
|
||||
uint32_t work_xtime_monotonic;
|
||||
/** \brief Время ЦПУ в режиме пользователе затраченное на чтение и поиск
|
||||
* внтури GC ради данных пользователя. */
|
||||
uint32_t work_rtime_cpu;
|
||||
/** \brief Количество итераций поиска внутри GC при выделении страниц
|
||||
* ради данных пользователя. */
|
||||
uint32_t work_rsteps;
|
||||
/** \brief Количество запросов на выделение последовательностей страниц
|
||||
* ради данных пользователя. */
|
||||
uint32_t work_xpages;
|
||||
/** \brief Количество страничных промахов (page faults) внутри GC
|
||||
* при выделении и подготовки страниц для данных пользователя. */
|
||||
uint32_t work_majflt;
|
||||
|
||||
/** \brief Счетчик выполнения по медленному пути (slow path execution count)
|
||||
* GC для целей поддержки и обновления самой GC. */
|
||||
uint32_t self_counter;
|
||||
/** \brief Время "по настенным часам" затраченное на чтение и поиск внутри
|
||||
* GC для целей поддержки и обновления самой GC. */
|
||||
uint32_t self_rtime_monotonic;
|
||||
/** \brief Монотонное время по "настенным часам" затраченное на подготовку
|
||||
* страниц извлекаемых из GC для целей поддержки и обновления самой GC,
|
||||
* включая подкачку с диска. */
|
||||
uint32_t self_xtime_monotonic;
|
||||
/** \brief Время ЦПУ в режиме пользователе затраченное на чтение и поиск
|
||||
* внтури GC для целей поддержки и обновления самой GC. */
|
||||
uint32_t self_rtime_cpu;
|
||||
/** \brief Количество итераций поиска внутри GC при выделении страниц
|
||||
* для целей поддержки и обновления самой GC. */
|
||||
uint32_t self_rsteps;
|
||||
/** \brief Количество запросов на выделение последовательностей страниц
|
||||
* для самой GC. */
|
||||
uint32_t self_xpages;
|
||||
/** \brief Количество страничных промахов (page faults) внутри GC
|
||||
* при выделении и подготовки страниц для самой GC. */
|
||||
uint32_t self_majflt;
|
||||
} gc_prof;
|
||||
};
|
||||
#ifndef __cplusplus
|
||||
/** \ingroup c_statinfo */
|
||||
|
||||
105
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.h++
vendored
105
crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.h++
vendored
@ -287,7 +287,7 @@ namespace mdbx {
|
||||
// To enable all kinds of an compiler optimizations we use a byte-like type
|
||||
// that don't presumes aliases for pointers as does the `char` type and its
|
||||
// derivatives/typedefs.
|
||||
// Please see todo4recovery://erased_by_github/libmdbx/issues/263
|
||||
// Please see https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/263
|
||||
// for reasoning of the use of `char8_t` type and switching to `__restrict__`.
|
||||
using byte = char8_t;
|
||||
#else
|
||||
@ -3177,6 +3177,7 @@ public:
|
||||
/// \brief Returns the minimal values size in bytes for specified values
|
||||
/// mode.
|
||||
static inline size_t value_min(value_mode) noexcept;
|
||||
|
||||
/// \brief Returns the maximal value size in bytes for specified page size
|
||||
/// and database flags.
|
||||
static inline size_t value_max(intptr_t pagesize, MDBX_db_flags_t flags);
|
||||
@ -3189,6 +3190,35 @@ public:
|
||||
/// \brief Returns the maximal value size in bytes for specified page size
|
||||
/// and values mode.
|
||||
static inline size_t value_max(const env &, value_mode);
|
||||
|
||||
/// \brief Returns maximal size of key-value pair to fit in a single page
|
||||
/// for specified size and database flags.
|
||||
static inline size_t pairsize4page_max(intptr_t pagesize,
|
||||
MDBX_db_flags_t flags);
|
||||
/// \brief Returns maximal size of key-value pair to fit in a single page
|
||||
/// for specified page size and values mode.
|
||||
static inline size_t pairsize4page_max(intptr_t pagesize, value_mode);
|
||||
/// \brief Returns maximal size of key-value pair to fit in a single page
|
||||
/// for given environment and database flags.
|
||||
static inline size_t pairsize4page_max(const env &, MDBX_db_flags_t flags);
|
||||
/// \brief Returns maximal size of key-value pair to fit in a single page
|
||||
/// for specified page size and values mode.
|
||||
static inline size_t pairsize4page_max(const env &, value_mode);
|
||||
|
||||
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
|
||||
/// single overflow/large-page for specified size and database flags.
|
||||
static inline size_t valsize4page_max(intptr_t pagesize,
|
||||
MDBX_db_flags_t flags);
|
||||
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
|
||||
/// single overflow/large-page for specified page size and values mode.
|
||||
static inline size_t valsize4page_max(intptr_t pagesize, value_mode);
|
||||
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
|
||||
/// single overflow/large-page for given environment and database flags.
|
||||
static inline size_t valsize4page_max(const env &, MDBX_db_flags_t flags);
|
||||
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
|
||||
/// single overflow/large-page for specified page size and values mode.
|
||||
static inline size_t valsize4page_max(const env &, value_mode);
|
||||
|
||||
/// \brief Returns the maximal write transaction size (i.e. limit for
|
||||
/// summary volume of dirty pages) in bytes for specified page size.
|
||||
static inline size_t transaction_size_max(intptr_t pagesize);
|
||||
@ -3875,12 +3905,31 @@ public:
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/// \brief Abandon all the operations of the transaction instead of saving
|
||||
/// them.
|
||||
/// \brief Abandon all the operations of the transaction
|
||||
/// instead of saving ones.
|
||||
void abort();
|
||||
|
||||
/// \brief Commit all the operations of a transaction into the database.
|
||||
void commit();
|
||||
|
||||
using commit_latency = MDBX_commit_latency;
|
||||
|
||||
/// \brief Commit all the operations of a transaction into the database
|
||||
/// and collect latency information.
|
||||
void commit(commit_latency *);
|
||||
|
||||
/// \brief Commit all the operations of a transaction into the database
|
||||
/// and collect latency information.
|
||||
void commit(commit_latency &latency) { return commit(&latency); }
|
||||
|
||||
/// \brief Commit all the operations of a transaction into the database
|
||||
/// and return latency information.
|
||||
/// \returns latency information of commit stages.
|
||||
commit_latency commit_get_latency() {
|
||||
commit_latency result;
|
||||
commit(&result);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief Unmanaged cursor.
|
||||
@ -4863,6 +4912,56 @@ inline size_t env::limits::value_max(const env &env, value_mode mode) {
|
||||
return value_max(env, MDBX_db_flags_t(mode));
|
||||
}
|
||||
|
||||
inline size_t env::limits::pairsize4page_max(intptr_t pagesize,
|
||||
MDBX_db_flags_t flags) {
|
||||
const intptr_t result = mdbx_limits_pairsize4page_max(pagesize, flags);
|
||||
if (result < 0)
|
||||
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
|
||||
return static_cast<size_t>(result);
|
||||
}
|
||||
|
||||
inline size_t env::limits::pairsize4page_max(intptr_t pagesize,
|
||||
value_mode mode) {
|
||||
return pairsize4page_max(pagesize, MDBX_db_flags_t(mode));
|
||||
}
|
||||
|
||||
inline size_t env::limits::pairsize4page_max(const env &env,
|
||||
MDBX_db_flags_t flags) {
|
||||
const intptr_t result = mdbx_env_get_pairsize4page_max(env, flags);
|
||||
if (result < 0)
|
||||
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
|
||||
return static_cast<size_t>(result);
|
||||
}
|
||||
|
||||
inline size_t env::limits::pairsize4page_max(const env &env, value_mode mode) {
|
||||
return pairsize4page_max(env, MDBX_db_flags_t(mode));
|
||||
}
|
||||
|
||||
inline size_t env::limits::valsize4page_max(intptr_t pagesize,
|
||||
MDBX_db_flags_t flags) {
|
||||
const intptr_t result = mdbx_limits_valsize4page_max(pagesize, flags);
|
||||
if (result < 0)
|
||||
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
|
||||
return static_cast<size_t>(result);
|
||||
}
|
||||
|
||||
inline size_t env::limits::valsize4page_max(intptr_t pagesize,
|
||||
value_mode mode) {
|
||||
return valsize4page_max(pagesize, MDBX_db_flags_t(mode));
|
||||
}
|
||||
|
||||
inline size_t env::limits::valsize4page_max(const env &env,
|
||||
MDBX_db_flags_t flags) {
|
||||
const intptr_t result = mdbx_env_get_valsize4page_max(env, flags);
|
||||
if (result < 0)
|
||||
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
|
||||
return static_cast<size_t>(result);
|
||||
}
|
||||
|
||||
inline size_t env::limits::valsize4page_max(const env &env, value_mode mode) {
|
||||
return valsize4page_max(env, MDBX_db_flags_t(mode));
|
||||
}
|
||||
|
||||
inline size_t env::limits::transaction_size_max(intptr_t pagesize) {
|
||||
const intptr_t result = mdbx_limits_txnsize_max(pagesize);
|
||||
if (result < 0)
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
* top-level directory of the distribution or, alternatively, at
|
||||
* <http://www.OpenLDAP.org/license.html>. */
|
||||
|
||||
#define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
|
||||
#define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
|
||||
#ifdef MDBX_CONFIG_H
|
||||
#include MDBX_CONFIG_H
|
||||
#endif
|
||||
@ -149,7 +149,11 @@
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
||||
#define UNICODE
|
||||
#endif /* UNICODE */
|
||||
|
||||
#include "mdbx.h"
|
||||
/*
|
||||
@ -216,7 +220,7 @@
|
||||
#define SSIZE_MAX INTPTR_MAX
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
|
||||
#define MDBX_WORDBITS 64
|
||||
#else
|
||||
#define MDBX_WORDBITS 32
|
||||
@ -389,10 +393,6 @@ __extern_C key_t ftok(const char *, int);
|
||||
#elif _WIN32_WINNT < 0x0500
|
||||
#error At least 'Windows 2000' API is required for libmdbx.
|
||||
#endif /* _WIN32_WINNT */
|
||||
#if (defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* MinGW */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif /* WIN32_LEAN_AND_MEAN */
|
||||
@ -416,8 +416,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
@ -1169,9 +1171,6 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@ -1202,18 +1201,16 @@ typedef pthread_mutex_t osal_fastmutex_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* OS abstraction layer stuff */
|
||||
|
||||
MDBX_INTERNAL_VAR unsigned sys_pagesize;
|
||||
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_allocation_granularity;
|
||||
|
||||
/* Get the size of a memory page for the system.
|
||||
* This is the basic size that the platform's memory manager uses, and is
|
||||
* fundamental to the use of memory-mapped files. */
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
|
||||
osal_syspagesize(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGE_SIZE);
|
||||
#endif
|
||||
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
|
||||
return sys_pagesize;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -1252,8 +1249,140 @@ typedef union osal_srwlock {
|
||||
} osal_srwlock_t;
|
||||
#endif /* Windows */
|
||||
|
||||
#ifndef MDBX_HAVE_PWRITEV
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
|
||||
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
|
||||
/* FIXME: add checks for IOS versions, etc */
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
|
||||
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
|
||||
typedef struct ior_item {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
OVERLAPPED ov;
|
||||
#define ior_svg_gap4terminator 1
|
||||
#define ior_sgv_element FILE_SEGMENT_ELEMENT
|
||||
#else
|
||||
size_t offset;
|
||||
#if MDBX_HAVE_PWRITEV
|
||||
size_t sgvcnt;
|
||||
#define ior_svg_gap4terminator 0
|
||||
#define ior_sgv_element struct iovec
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
#endif /* !Windows */
|
||||
union {
|
||||
MDBX_val single;
|
||||
#if defined(ior_sgv_element)
|
||||
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
|
||||
#endif /* ior_sgv_element */
|
||||
};
|
||||
} ior_item_t;
|
||||
|
||||
typedef struct osal_ioring {
|
||||
unsigned slots_left;
|
||||
unsigned allocated;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define IOR_DIRECT 1
|
||||
#define IOR_OVERLAPPED 2
|
||||
#define IOR_STATE_LOCKED 1
|
||||
unsigned pagesize;
|
||||
unsigned last_sgvcnt;
|
||||
size_t last_bytes;
|
||||
uint8_t flags, state, pagesize_ln2;
|
||||
unsigned event_stack;
|
||||
HANDLE *event_pool;
|
||||
volatile LONG async_waiting;
|
||||
volatile LONG async_completed;
|
||||
HANDLE async_done;
|
||||
|
||||
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#elif MDBX_HAVE_PWRITEV
|
||||
unsigned last_bytes;
|
||||
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#else
|
||||
#define ior_last_sgvcnt(ior, item) (1)
|
||||
#define ior_last_bytes(ior, item) (item)->single.iov_len
|
||||
#endif /* !Windows */
|
||||
mdbx_filehandle_t fd;
|
||||
ior_item_t *last;
|
||||
ior_item_t *pool;
|
||||
char *boundary;
|
||||
} osal_ioring_t;
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/* Actually this is not ioring for now, but on the way. */
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint8_t flags,
|
||||
#endif /* Windows */
|
||||
mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
|
||||
void *data, const size_t bytes);
|
||||
typedef struct osal_ioring_write_result {
|
||||
int err;
|
||||
unsigned wops;
|
||||
} osal_ioring_write_result_t;
|
||||
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
|
||||
osal_ioring_write(osal_ioring_t *ior);
|
||||
|
||||
typedef struct iov_ctx iov_ctx_t;
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_walk(
|
||||
osal_ioring_t *ior, iov_ctx_t *ctx,
|
||||
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_left(const osal_ioring_t *ior) {
|
||||
return ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_used(const osal_ioring_t *ior) {
|
||||
return ior->allocated - ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int
|
||||
osal_ioring_reserve(osal_ioring_t *ior, size_t items, size_t bytes) {
|
||||
items = (items > 32) ? items : 32;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
const size_t npages = bytes >> ior->pagesize_ln2;
|
||||
items = (items > npages) ? items : npages;
|
||||
#else
|
||||
(void)bytes;
|
||||
#endif
|
||||
items = (items < 65536) ? items : 65536;
|
||||
if (likely(ior->allocated >= items))
|
||||
return MDBX_SUCCESS;
|
||||
return osal_ioring_resize(ior, items);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* libc compatibility stuff */
|
||||
|
||||
@ -1279,10 +1408,53 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
|
||||
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
|
||||
|
||||
/* max bytes to write in one call */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x01000000)
|
||||
#if defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x10000000)
|
||||
#elif defined(_WIN32)
|
||||
#define MAX_WRITE UINT32_C(0x04000000)
|
||||
#else
|
||||
#define MAX_WRITE UINT32_C(0x3fff0000)
|
||||
#define MAX_WRITE UINT32_C(0x3f000000)
|
||||
|
||||
#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \
|
||||
!defined(__ANDROID_API__)
|
||||
#define MDBX_F_SETLK F_SETLK64
|
||||
#define MDBX_F_SETLKW F_SETLKW64
|
||||
#define MDBX_F_GETLK F_GETLK64
|
||||
#if (__GLIBC_PREREQ(2, 28) && \
|
||||
(defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \
|
||||
defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \
|
||||
defined(fcntl64)
|
||||
#define MDBX_FCNTL fcntl64
|
||||
#else
|
||||
#define MDBX_FCNTL fcntl
|
||||
#endif
|
||||
#define MDBX_STRUCT_FLOCK struct flock64
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
|
||||
#endif /* OFF_T_MAX */
|
||||
#else
|
||||
#define MDBX_F_SETLK F_SETLK
|
||||
#define MDBX_F_SETLKW F_SETLKW
|
||||
#define MDBX_F_GETLK F_GETLK
|
||||
#define MDBX_FCNTL fcntl
|
||||
#define MDBX_STRUCT_FLOCK struct flock
|
||||
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */
|
||||
|
||||
#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
|
||||
#else
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX \
|
||||
(((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
|
||||
#endif /* OFF_T_MAX */
|
||||
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
@ -1325,8 +1497,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
|
||||
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
|
||||
int iovcnt, uint64_t offset,
|
||||
size_t expected_written);
|
||||
size_t sgvcnt, uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
|
||||
uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
|
||||
@ -1354,12 +1525,16 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
enum osal_openfile_purpose {
|
||||
MDBX_OPEN_DXB_READ = 0,
|
||||
MDBX_OPEN_DXB_LAZY = 1,
|
||||
MDBX_OPEN_DXB_DSYNC = 2,
|
||||
MDBX_OPEN_LCK = 3,
|
||||
MDBX_OPEN_COPY = 4,
|
||||
MDBX_OPEN_DELETE = 5
|
||||
MDBX_OPEN_DXB_READ,
|
||||
MDBX_OPEN_DXB_LAZY,
|
||||
MDBX_OPEN_DXB_DSYNC,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_OPEN_DXB_OVERLAPPED,
|
||||
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
|
||||
#endif /* Windows */
|
||||
MDBX_OPEN_LCK,
|
||||
MDBX_OPEN_COPY,
|
||||
MDBX_OPEN_DELETE
|
||||
};
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
||||
@ -1393,7 +1568,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
|
||||
MDBX_INTERNAL_FUNC int
|
||||
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
|
||||
#endif /* Windows */
|
||||
MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
|
||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||
size_t length,
|
||||
enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
|
||||
@ -1436,9 +1611,16 @@ osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||
#endif /* !Windows */
|
||||
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
|
||||
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline uint32_t
|
||||
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
|
||||
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
|
||||
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* lck stuff */
|
||||
@ -1548,6 +1730,9 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#define OSAL_MB2WIDE(FROM, TO) \
|
||||
do { \
|
||||
const char *const from_tmp = (FROM); \
|
||||
@ -1681,6 +1866,11 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
|
||||
|
||||
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
|
||||
|
||||
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
|
||||
PUCHAR OverlappedRangeStart,
|
||||
ULONG Length);
|
||||
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -1795,6 +1985,13 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
/** Controls profiling of GC search and updates. */
|
||||
#ifndef MDBX_ENABLE_PROFGC
|
||||
#define MDBX_ENABLE_PROFGC 0
|
||||
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
|
||||
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_PROFGC */
|
||||
|
||||
/** Controls gathering statistics for page operations. */
|
||||
#ifndef MDBX_ENABLE_PGOP_STAT
|
||||
#define MDBX_ENABLE_PGOP_STAT 1
|
||||
@ -1814,7 +2011,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
|
||||
/** Controls use of POSIX madvise() hints and friends. */
|
||||
/** Controls using of POSIX' madvise() and/or similar hints. */
|
||||
#ifndef MDBX_ENABLE_MADVISE
|
||||
#define MDBX_ENABLE_MADVISE 1
|
||||
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
|
||||
@ -1843,23 +2040,22 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
|
||||
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
|
||||
|
||||
/** Basically, this build-option is for TODO. Guess it should be replaced
|
||||
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
|
||||
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
|
||||
* This should be by-default on Linux and may-be other systems
|
||||
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
|
||||
* properly LRU tracking and async writing on-demand.
|
||||
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
|
||||
* spilling with msync(MS_ASYNC). */
|
||||
#ifndef MDBX_FAKE_SPILL_WRITEMAP
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
|
||||
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
|
||||
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
|
||||
* msync() to persist data. This is by-default on Linux and other systems where
|
||||
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
|
||||
* = Tracking of dirty pages but with LRU labels for spilling and explicit
|
||||
* persist ones by write(). This may be reasonable for systems which low
|
||||
* performance of msync() and/or LRU tracking. */
|
||||
#ifndef MDBX_AVOID_MSYNC
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MDBX_AVOID_MSYNC 1
|
||||
#else
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 0
|
||||
#define MDBX_AVOID_MSYNC 0
|
||||
#endif
|
||||
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
|
||||
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
|
||||
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
|
||||
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
|
||||
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
|
||||
#endif /* MDBX_AVOID_MSYNC */
|
||||
|
||||
/** Controls sort order of internal page number lists.
|
||||
* This mostly experimental/advanced option with not for regular MDBX users.
|
||||
@ -1916,6 +2112,27 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#ifndef MDBX_HAVE_C11ATOMICS
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
|
||||
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
|
||||
* for runtime dispatching depending on the CPU's capabilities. */
|
||||
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
|
||||
#if defined(__APPLE__) || defined(BIONIC)
|
||||
/* Never use any modern features on Apple's or Google's OSes
|
||||
* since a lot of troubles with compatibility and/or performance */
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif defined(__e2k__)
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif __has_builtin(__builtin_cpu_supports) || \
|
||||
defined(__BUILTIN_CPU_SUPPORTS__) || \
|
||||
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
|
||||
#else
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#endif
|
||||
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
|
||||
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
|
||||
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
|
||||
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/** Win32 File Locking API for \ref MDBX_LOCKING */
|
||||
@ -1971,7 +2188,10 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
|
||||
/** Advanced: Using POSIX OFD-locks (autodetection by default). */
|
||||
#ifndef MDBX_USE_OFDLOCKS
|
||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \
|
||||
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \
|
||||
defined(F_OFD_GETLK)) || \
|
||||
(defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64))) && \
|
||||
!defined(MDBX_SAFE4QEMU) && \
|
||||
!defined(__sun) /* OFD-lock are broken on Solaris */
|
||||
#define MDBX_USE_OFDLOCKS 1
|
||||
@ -2057,13 +2277,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
|
||||
#ifndef MDBX_64BIT_CAS
|
||||
#if defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2075,6 +2289,12 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2309,7 +2529,7 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
||||
/* FROZEN: The version number for a database's datafile format. */
|
||||
#define MDBX_DATA_VERSION 3
|
||||
/* The version number for a database's lockfile format. */
|
||||
#define MDBX_LOCK_VERSION 4
|
||||
#define MDBX_LOCK_VERSION 5
|
||||
|
||||
/* handle for the DB used to track free pages. */
|
||||
#define FREE_DBI 0
|
||||
@ -2513,14 +2733,34 @@ typedef struct MDBX_page {
|
||||
: PAGETYPE_WHOLE(p))
|
||||
|
||||
/* Size of the page header, excluding dynamic data at the end */
|
||||
#define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
|
||||
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
typedef struct profgc_stat {
|
||||
/* Монотонное время по "настенным часам"
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_monotonic;
|
||||
/* Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
||||
uint64_t xtime_monotonic;
|
||||
/* Процессорное время в режим пользователя
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_cpu;
|
||||
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
||||
uint32_t rsteps;
|
||||
/* Количество запросов на выделение последовательностей страниц,
|
||||
* т.е. когда запрашивает выделение больше одной страницы */
|
||||
uint32_t xpages;
|
||||
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
||||
uint32_t spe_counter;
|
||||
/* page faults (hard page faults) */
|
||||
uint32_t majflt;
|
||||
} profgc_stat_t;
|
||||
|
||||
/* Statistics of page operations overall of all (running, completed and aborted)
|
||||
* transactions */
|
||||
typedef struct {
|
||||
typedef struct pgop_stat {
|
||||
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
||||
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
||||
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
||||
@ -2532,10 +2772,31 @@ typedef struct {
|
||||
MDBX_atomic_uint64_t
|
||||
wops; /* Number of explicit write operations (not a pages) to a disk */
|
||||
MDBX_atomic_uint64_t
|
||||
gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
|
||||
unit/scale is platform-depended, see osal_monotime(). */
|
||||
} MDBX_pgop_stat_t;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||
msync; /* Number of explicit msync/flush-to-disk operations */
|
||||
MDBX_atomic_uint64_t
|
||||
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
||||
|
||||
/* Статистика для профилирования GC.
|
||||
* Логически эти данные может быть стоит вынести в другую структуру,
|
||||
* но разница будет сугубо косметическая. */
|
||||
struct {
|
||||
/* Затраты на поддержку данных пользователя */
|
||||
profgc_stat_t work;
|
||||
/* Затраты на поддержку и обновления самой GC */
|
||||
profgc_stat_t self;
|
||||
/* Итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски */
|
||||
uint32_t wloops;
|
||||
/* Итерации слияния записей GC */
|
||||
uint32_t coalescences;
|
||||
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t wipes;
|
||||
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t flushes;
|
||||
/* Попытки пнуть тормозящих читателей */
|
||||
uint32_t kicks;
|
||||
} gc_prof;
|
||||
} pgop_stat_t;
|
||||
|
||||
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
||||
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
||||
@ -2666,13 +2927,16 @@ typedef struct MDBX_lockinfo {
|
||||
/* Marker to distinguish uniqueness of DB/CLK. */
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
||||
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
||||
* lock at leat one page, so therefore madvise() could return EINVAL. */
|
||||
MDBX_atomic_uint32_t mti_mlcnt[2];
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
/* Statistics of costly ops of all (running, completed and aborted)
|
||||
* transactions */
|
||||
MDBX_pgop_stat_t mti_pgop_stat;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
pgop_stat_t mti_pgop_stat;
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
@ -2683,20 +2947,20 @@ typedef struct MDBX_lockinfo {
|
||||
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
||||
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
||||
* or clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
MDBX_atomic_uint64_t mti_unsynced_pages;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
||||
pgno_t mti_readahead_anchor;
|
||||
|
||||
@ -2799,7 +3063,7 @@ typedef struct MDBX_dp {
|
||||
MDBX_page *ptr;
|
||||
pgno_t pgno;
|
||||
union {
|
||||
unsigned extra;
|
||||
uint32_t extra;
|
||||
__anonymous_struct_extension__ struct {
|
||||
unsigned multi : 1;
|
||||
unsigned lru : 31;
|
||||
@ -2809,10 +3073,10 @@ typedef struct MDBX_dp {
|
||||
|
||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||
typedef struct MDBX_dpl {
|
||||
unsigned sorted;
|
||||
unsigned length;
|
||||
unsigned pages_including_loose; /* number of pages, but not an entries. */
|
||||
unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
size_t sorted;
|
||||
size_t length;
|
||||
size_t pages_including_loose; /* number of pages, but not an entries. */
|
||||
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
||||
@ -2831,11 +3095,17 @@ typedef struct MDBX_dpl {
|
||||
((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
||||
|
||||
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
||||
#define MDBX_PNL_SIZE(pl) ((pl)[0])
|
||||
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
||||
#define MDBX_PNL_SETSIZE(pl, size) \
|
||||
do { \
|
||||
const size_t __size = size; \
|
||||
assert(__size < INT_MAX); \
|
||||
(pl)[0] = (pgno_t)__size; \
|
||||
} while (0)
|
||||
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
||||
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
||||
|
||||
#if MDBX_PNL_ASCENDING
|
||||
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
||||
@ -2845,8 +3115,8 @@ typedef struct MDBX_dpl {
|
||||
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
||||
#endif
|
||||
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Internal structures */
|
||||
@ -2865,6 +3135,9 @@ typedef struct MDBX_dbx {
|
||||
|
||||
typedef struct troika {
|
||||
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
||||
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
||||
uint32_t unused_pad;
|
||||
#endif
|
||||
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
|
||||
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
|
||||
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
|
||||
@ -2886,9 +3159,13 @@ struct MDBX_txn {
|
||||
/* Additional flag for sync_locked() */
|
||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||
|
||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
||||
|
||||
#define TXN_FLAGS \
|
||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
||||
MDBX_TXN_FROZEN_RE)
|
||||
|
||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||
@ -2947,18 +3224,18 @@ struct MDBX_txn {
|
||||
struct {
|
||||
meta_troika_t troika;
|
||||
/* In write txns, array of cursors for each DB */
|
||||
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
pgno_t *relist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
pgno_t loose_refund_wl /* FIXME: describe */;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
||||
* Includes ancestor txns' dirty pages not hidden by other txns'
|
||||
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
||||
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
||||
unsigned dirtyroom;
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
size_t dirtyroom;
|
||||
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
||||
MDBX_dpl *dirtylist;
|
||||
/* The list of reclaimed txns from GC */
|
||||
@ -2969,8 +3246,8 @@ struct MDBX_txn {
|
||||
* in this transaction, linked through `mp_next`. */
|
||||
MDBX_page *loose_pages;
|
||||
/* Number of loose pages (tw.loose_pages) */
|
||||
unsigned loose_count;
|
||||
unsigned spill_least_removed;
|
||||
size_t loose_count;
|
||||
size_t spill_least_removed;
|
||||
/* The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
* shifted left by 1, deleted slots have the LSB set. */
|
||||
@ -3024,9 +3301,7 @@ struct MDBX_cursor {
|
||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||
#define C_RECLAIMING 0x20 /* GC lookup is prohibited */
|
||||
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
|
||||
uint8_t mc_flags; /* see mdbx_cursor */
|
||||
uint8_t mc_flags;
|
||||
|
||||
/* Cursor checking flags. */
|
||||
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
||||
@ -3037,7 +3312,7 @@ struct MDBX_cursor {
|
||||
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
||||
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
||||
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
||||
uint8_t mc_checking; /* page checking level */
|
||||
uint8_t mc_checking;
|
||||
|
||||
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
||||
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
||||
@ -3086,14 +3361,20 @@ struct MDBX_env {
|
||||
osal_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
#define me_fd4data me_ioring.fd
|
||||
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE me_overlapped_fd, me_data_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
struct MDBX_lockinfo *me_lck;
|
||||
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
atomic_pgno_t me_mlocked_pgno;
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
uint16_t me_merge_threshold,
|
||||
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
||||
@ -3165,6 +3446,7 @@ struct MDBX_env {
|
||||
unsigned me_dp_reserve_len;
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
osal_ioring_t me_ioring;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t me_remap_guard;
|
||||
@ -3190,7 +3472,7 @@ struct MDBX_env {
|
||||
#define xMDBX_DEBUG_SPILLING 0
|
||||
#endif
|
||||
#if xMDBX_DEBUG_SPILLING == 2
|
||||
unsigned debug_dirtied_est, debug_dirtied_act;
|
||||
size_t debug_dirtied_est, debug_dirtied_act;
|
||||
#endif /* xMDBX_DEBUG_SPILLING */
|
||||
|
||||
/* ------------------------------------------------- stub for lck-less mode */
|
||||
@ -3295,10 +3577,22 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
||||
#define FATAL(fmt, ...) \
|
||||
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
||||
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
||||
unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
assert_fail(msg, func, line); \
|
||||
} while (0)
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
#define ENSURE_MSG(env, expr, msg) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) \
|
||||
mdbx_assert_fail(env, msg, __func__, __LINE__); \
|
||||
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
||||
@ -3369,7 +3663,9 @@ MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
||||
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
||||
|
||||
MDBX_INTERNAL_FUNC void global_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void global_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -3490,12 +3786,12 @@ typedef struct MDBX_node {
|
||||
#error "Oops, some flags overlapped or wrong"
|
||||
#endif
|
||||
|
||||
/* max number of pages to commit in one writev() call */
|
||||
#define MDBX_COMMIT_PAGES 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
|
||||
#undef MDBX_COMMIT_PAGES
|
||||
#define MDBX_COMMIT_PAGES IOV_MAX
|
||||
#endif
|
||||
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
||||
#define MDBX_AUXILARY_IOV_MAX 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
||||
#undef MDBX_AUXILARY_IOV_MAX
|
||||
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
||||
#endif /* MDBX_AUXILARY_IOV_MAX */
|
||||
|
||||
/*
|
||||
* /
|
||||
@ -3552,20 +3848,24 @@ ceil_powerof2(size_t value, size_t granularity) {
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
||||
log2n_powerof2(size_t value) {
|
||||
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
|
||||
assert((value & -(int32_t)value) == value);
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
return __builtin_ctzl(value);
|
||||
log2n_powerof2(size_t value_uintptr) {
|
||||
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
||||
is_powerof2(value_uintptr));
|
||||
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
||||
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
||||
return __builtin_ctz(value_uint32);
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long)value);
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
||||
_BitScanForward(&index, value_uint32);
|
||||
return index;
|
||||
#else
|
||||
static const uint8_t debruijn_ctz32[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
|
||||
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -4605,21 +4905,24 @@ bailout:
|
||||
}
|
||||
|
||||
static void usage(char *prog) {
|
||||
fprintf(stderr,
|
||||
"usage: %s [-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] "
|
||||
"dbpath\n"
|
||||
" -V\t\tprint version and exit\n"
|
||||
" -v\t\tmore verbose, could be used multiple times\n"
|
||||
" -q\t\tbe quiet\n"
|
||||
" -c\t\tforce cooperative mode (don't try exclusive)\n"
|
||||
" -w\t\twrite-mode checking\n"
|
||||
" -d\t\tdisable page-by-page traversal of B-tree\n"
|
||||
" -i\t\tignore wrong order errors (for custom comparators case)\n"
|
||||
" -s subdb\tprocess a specific subdatabase only\n"
|
||||
" -0|1|2\tforce using specific meta-page 0, or 2 for checking\n"
|
||||
" -t\t\tturn to a specified meta-page on successful check\n"
|
||||
" -T\t\tturn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!\n",
|
||||
prog);
|
||||
fprintf(
|
||||
stderr,
|
||||
"usage: %s "
|
||||
"[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n"
|
||||
" -V\t\tprint version and exit\n"
|
||||
" -v\t\tmore verbose, could be used multiple times\n"
|
||||
" -q\t\tbe quiet\n"
|
||||
" -c\t\tforce cooperative mode (don't try exclusive)\n"
|
||||
" -w\t\twrite-mode checking\n"
|
||||
" -d\t\tdisable page-by-page traversal of B-tree\n"
|
||||
" -i\t\tignore wrong order errors (for custom comparators case)\n"
|
||||
" -s subdb\tprocess a specific subdatabase only\n"
|
||||
" -u\t\twarmup database before checking\n"
|
||||
" -U\t\twarmup and try lock database pages in memory before checking\n"
|
||||
" -0|1|2\tforce using specific meta-page 0, or 2 for checking\n"
|
||||
" -t\t\tturn to a specified meta-page on successful check\n"
|
||||
" -T\t\tturn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!\n",
|
||||
prog);
|
||||
exit(EXIT_INTERRUPTED);
|
||||
}
|
||||
|
||||
@ -4758,6 +5061,8 @@ int main(int argc, char *argv[]) {
|
||||
bool write_locked = false;
|
||||
bool turn_meta = false;
|
||||
bool force_turn_meta = false;
|
||||
bool warmup = false;
|
||||
MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default;
|
||||
|
||||
double elapsed;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -4781,6 +5086,7 @@ int main(int argc, char *argv[]) {
|
||||
usage(prog);
|
||||
|
||||
for (int i; (i = getopt(argc, argv,
|
||||
"uU"
|
||||
"0"
|
||||
"1"
|
||||
"2"
|
||||
@ -4840,7 +5146,7 @@ int main(int argc, char *argv[]) {
|
||||
envflags &= ~MDBX_RDONLY;
|
||||
#if MDBX_MMAP_INCOHERENT_FILE_WRITE
|
||||
/* Temporary `workaround` for OpenBSD kernel's flaw.
|
||||
* See todo4recovery://erased_by_github/libmdbx/issues/67 */
|
||||
* See https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/67 */
|
||||
envflags |= MDBX_WRITEMAP;
|
||||
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
|
||||
break;
|
||||
@ -4858,6 +5164,14 @@ int main(int argc, char *argv[]) {
|
||||
case 'i':
|
||||
ignore_wrong_order = true;
|
||||
break;
|
||||
case 'u':
|
||||
warmup = true;
|
||||
break;
|
||||
case 'U':
|
||||
warmup = true;
|
||||
warmup_flags =
|
||||
MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock;
|
||||
break;
|
||||
default:
|
||||
usage(prog);
|
||||
}
|
||||
@ -4959,14 +5273,35 @@ int main(int argc, char *argv[]) {
|
||||
(envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative");
|
||||
|
||||
if ((envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) {
|
||||
if (verbose) {
|
||||
print(" - taking write lock...");
|
||||
fflush(nullptr);
|
||||
}
|
||||
rc = mdbx_txn_lock(env, false);
|
||||
if (rc != MDBX_SUCCESS) {
|
||||
error("mdbx_txn_lock() failed, error %d %s\n", rc, mdbx_strerror(rc));
|
||||
goto bailout;
|
||||
}
|
||||
if (verbose)
|
||||
print(" done\n");
|
||||
write_locked = true;
|
||||
}
|
||||
|
||||
if (warmup) {
|
||||
if (verbose) {
|
||||
print(" - warming up...");
|
||||
fflush(nullptr);
|
||||
}
|
||||
rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536);
|
||||
if (MDBX_IS_ERROR(rc)) {
|
||||
error("mdbx_env_warmup(flags %u) failed, error %d %s\n", warmup_flags, rc,
|
||||
mdbx_strerror(rc));
|
||||
goto bailout;
|
||||
}
|
||||
if (verbose)
|
||||
print(" %s\n", rc ? "timeout" : "done");
|
||||
}
|
||||
|
||||
rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
|
||||
if (rc) {
|
||||
error("mdbx_txn_begin() failed, error %d %s\n", rc, mdbx_strerror(rc));
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
* top-level directory of the distribution or, alternatively, at
|
||||
* <http://www.OpenLDAP.org/license.html>. */
|
||||
|
||||
#define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
|
||||
#define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
|
||||
#ifdef MDBX_CONFIG_H
|
||||
#include MDBX_CONFIG_H
|
||||
#endif
|
||||
@ -149,7 +149,11 @@
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
||||
#define UNICODE
|
||||
#endif /* UNICODE */
|
||||
|
||||
#include "mdbx.h"
|
||||
/*
|
||||
@ -216,7 +220,7 @@
|
||||
#define SSIZE_MAX INTPTR_MAX
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
|
||||
#define MDBX_WORDBITS 64
|
||||
#else
|
||||
#define MDBX_WORDBITS 32
|
||||
@ -389,10 +393,6 @@ __extern_C key_t ftok(const char *, int);
|
||||
#elif _WIN32_WINNT < 0x0500
|
||||
#error At least 'Windows 2000' API is required for libmdbx.
|
||||
#endif /* _WIN32_WINNT */
|
||||
#if (defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* MinGW */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif /* WIN32_LEAN_AND_MEAN */
|
||||
@ -416,8 +416,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
@ -1169,9 +1171,6 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@ -1202,18 +1201,16 @@ typedef pthread_mutex_t osal_fastmutex_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* OS abstraction layer stuff */
|
||||
|
||||
MDBX_INTERNAL_VAR unsigned sys_pagesize;
|
||||
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_allocation_granularity;
|
||||
|
||||
/* Get the size of a memory page for the system.
|
||||
* This is the basic size that the platform's memory manager uses, and is
|
||||
* fundamental to the use of memory-mapped files. */
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
|
||||
osal_syspagesize(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGE_SIZE);
|
||||
#endif
|
||||
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
|
||||
return sys_pagesize;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -1252,8 +1249,140 @@ typedef union osal_srwlock {
|
||||
} osal_srwlock_t;
|
||||
#endif /* Windows */
|
||||
|
||||
#ifndef MDBX_HAVE_PWRITEV
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
|
||||
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
|
||||
/* FIXME: add checks for IOS versions, etc */
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
|
||||
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
|
||||
typedef struct ior_item {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
OVERLAPPED ov;
|
||||
#define ior_svg_gap4terminator 1
|
||||
#define ior_sgv_element FILE_SEGMENT_ELEMENT
|
||||
#else
|
||||
size_t offset;
|
||||
#if MDBX_HAVE_PWRITEV
|
||||
size_t sgvcnt;
|
||||
#define ior_svg_gap4terminator 0
|
||||
#define ior_sgv_element struct iovec
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
#endif /* !Windows */
|
||||
union {
|
||||
MDBX_val single;
|
||||
#if defined(ior_sgv_element)
|
||||
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
|
||||
#endif /* ior_sgv_element */
|
||||
};
|
||||
} ior_item_t;
|
||||
|
||||
typedef struct osal_ioring {
|
||||
unsigned slots_left;
|
||||
unsigned allocated;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define IOR_DIRECT 1
|
||||
#define IOR_OVERLAPPED 2
|
||||
#define IOR_STATE_LOCKED 1
|
||||
unsigned pagesize;
|
||||
unsigned last_sgvcnt;
|
||||
size_t last_bytes;
|
||||
uint8_t flags, state, pagesize_ln2;
|
||||
unsigned event_stack;
|
||||
HANDLE *event_pool;
|
||||
volatile LONG async_waiting;
|
||||
volatile LONG async_completed;
|
||||
HANDLE async_done;
|
||||
|
||||
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#elif MDBX_HAVE_PWRITEV
|
||||
unsigned last_bytes;
|
||||
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#else
|
||||
#define ior_last_sgvcnt(ior, item) (1)
|
||||
#define ior_last_bytes(ior, item) (item)->single.iov_len
|
||||
#endif /* !Windows */
|
||||
mdbx_filehandle_t fd;
|
||||
ior_item_t *last;
|
||||
ior_item_t *pool;
|
||||
char *boundary;
|
||||
} osal_ioring_t;
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/* Actually this is not ioring for now, but on the way. */
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint8_t flags,
|
||||
#endif /* Windows */
|
||||
mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
|
||||
void *data, const size_t bytes);
|
||||
typedef struct osal_ioring_write_result {
|
||||
int err;
|
||||
unsigned wops;
|
||||
} osal_ioring_write_result_t;
|
||||
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
|
||||
osal_ioring_write(osal_ioring_t *ior);
|
||||
|
||||
typedef struct iov_ctx iov_ctx_t;
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_walk(
|
||||
osal_ioring_t *ior, iov_ctx_t *ctx,
|
||||
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_left(const osal_ioring_t *ior) {
|
||||
return ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_used(const osal_ioring_t *ior) {
|
||||
return ior->allocated - ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int
|
||||
osal_ioring_reserve(osal_ioring_t *ior, size_t items, size_t bytes) {
|
||||
items = (items > 32) ? items : 32;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
const size_t npages = bytes >> ior->pagesize_ln2;
|
||||
items = (items > npages) ? items : npages;
|
||||
#else
|
||||
(void)bytes;
|
||||
#endif
|
||||
items = (items < 65536) ? items : 65536;
|
||||
if (likely(ior->allocated >= items))
|
||||
return MDBX_SUCCESS;
|
||||
return osal_ioring_resize(ior, items);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* libc compatibility stuff */
|
||||
|
||||
@ -1279,10 +1408,53 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
|
||||
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
|
||||
|
||||
/* max bytes to write in one call */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x01000000)
|
||||
#if defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x10000000)
|
||||
#elif defined(_WIN32)
|
||||
#define MAX_WRITE UINT32_C(0x04000000)
|
||||
#else
|
||||
#define MAX_WRITE UINT32_C(0x3fff0000)
|
||||
#define MAX_WRITE UINT32_C(0x3f000000)
|
||||
|
||||
#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \
|
||||
!defined(__ANDROID_API__)
|
||||
#define MDBX_F_SETLK F_SETLK64
|
||||
#define MDBX_F_SETLKW F_SETLKW64
|
||||
#define MDBX_F_GETLK F_GETLK64
|
||||
#if (__GLIBC_PREREQ(2, 28) && \
|
||||
(defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \
|
||||
defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \
|
||||
defined(fcntl64)
|
||||
#define MDBX_FCNTL fcntl64
|
||||
#else
|
||||
#define MDBX_FCNTL fcntl
|
||||
#endif
|
||||
#define MDBX_STRUCT_FLOCK struct flock64
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
|
||||
#endif /* OFF_T_MAX */
|
||||
#else
|
||||
#define MDBX_F_SETLK F_SETLK
|
||||
#define MDBX_F_SETLKW F_SETLKW
|
||||
#define MDBX_F_GETLK F_GETLK
|
||||
#define MDBX_FCNTL fcntl
|
||||
#define MDBX_STRUCT_FLOCK struct flock
|
||||
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */
|
||||
|
||||
#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
|
||||
#else
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX \
|
||||
(((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
|
||||
#endif /* OFF_T_MAX */
|
||||
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
@ -1325,8 +1497,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
|
||||
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
|
||||
int iovcnt, uint64_t offset,
|
||||
size_t expected_written);
|
||||
size_t sgvcnt, uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
|
||||
uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
|
||||
@ -1354,12 +1525,16 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
enum osal_openfile_purpose {
|
||||
MDBX_OPEN_DXB_READ = 0,
|
||||
MDBX_OPEN_DXB_LAZY = 1,
|
||||
MDBX_OPEN_DXB_DSYNC = 2,
|
||||
MDBX_OPEN_LCK = 3,
|
||||
MDBX_OPEN_COPY = 4,
|
||||
MDBX_OPEN_DELETE = 5
|
||||
MDBX_OPEN_DXB_READ,
|
||||
MDBX_OPEN_DXB_LAZY,
|
||||
MDBX_OPEN_DXB_DSYNC,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_OPEN_DXB_OVERLAPPED,
|
||||
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
|
||||
#endif /* Windows */
|
||||
MDBX_OPEN_LCK,
|
||||
MDBX_OPEN_COPY,
|
||||
MDBX_OPEN_DELETE
|
||||
};
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
||||
@ -1393,7 +1568,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
|
||||
MDBX_INTERNAL_FUNC int
|
||||
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
|
||||
#endif /* Windows */
|
||||
MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
|
||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||
size_t length,
|
||||
enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
|
||||
@ -1436,9 +1611,16 @@ osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||
#endif /* !Windows */
|
||||
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
|
||||
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline uint32_t
|
||||
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
|
||||
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
|
||||
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* lck stuff */
|
||||
@ -1548,6 +1730,9 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#define OSAL_MB2WIDE(FROM, TO) \
|
||||
do { \
|
||||
const char *const from_tmp = (FROM); \
|
||||
@ -1681,6 +1866,11 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
|
||||
|
||||
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
|
||||
|
||||
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
|
||||
PUCHAR OverlappedRangeStart,
|
||||
ULONG Length);
|
||||
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -1795,6 +1985,13 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
/** Controls profiling of GC search and updates. */
|
||||
#ifndef MDBX_ENABLE_PROFGC
|
||||
#define MDBX_ENABLE_PROFGC 0
|
||||
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
|
||||
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_PROFGC */
|
||||
|
||||
/** Controls gathering statistics for page operations. */
|
||||
#ifndef MDBX_ENABLE_PGOP_STAT
|
||||
#define MDBX_ENABLE_PGOP_STAT 1
|
||||
@ -1814,7 +2011,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
|
||||
/** Controls use of POSIX madvise() hints and friends. */
|
||||
/** Controls using of POSIX' madvise() and/or similar hints. */
|
||||
#ifndef MDBX_ENABLE_MADVISE
|
||||
#define MDBX_ENABLE_MADVISE 1
|
||||
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
|
||||
@ -1843,23 +2040,22 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
|
||||
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
|
||||
|
||||
/** Basically, this build-option is for TODO. Guess it should be replaced
|
||||
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
|
||||
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
|
||||
* This should be by-default on Linux and may-be other systems
|
||||
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
|
||||
* properly LRU tracking and async writing on-demand.
|
||||
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
|
||||
* spilling with msync(MS_ASYNC). */
|
||||
#ifndef MDBX_FAKE_SPILL_WRITEMAP
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
|
||||
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
|
||||
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
|
||||
* msync() to persist data. This is by-default on Linux and other systems where
|
||||
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
|
||||
* = Tracking of dirty pages but with LRU labels for spilling and explicit
|
||||
* persist ones by write(). This may be reasonable for systems which low
|
||||
* performance of msync() and/or LRU tracking. */
|
||||
#ifndef MDBX_AVOID_MSYNC
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MDBX_AVOID_MSYNC 1
|
||||
#else
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 0
|
||||
#define MDBX_AVOID_MSYNC 0
|
||||
#endif
|
||||
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
|
||||
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
|
||||
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
|
||||
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
|
||||
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
|
||||
#endif /* MDBX_AVOID_MSYNC */
|
||||
|
||||
/** Controls sort order of internal page number lists.
|
||||
* This mostly experimental/advanced option with not for regular MDBX users.
|
||||
@ -1916,6 +2112,27 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#ifndef MDBX_HAVE_C11ATOMICS
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
|
||||
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
|
||||
* for runtime dispatching depending on the CPU's capabilities. */
|
||||
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
|
||||
#if defined(__APPLE__) || defined(BIONIC)
|
||||
/* Never use any modern features on Apple's or Google's OSes
|
||||
* since a lot of troubles with compatibility and/or performance */
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif defined(__e2k__)
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif __has_builtin(__builtin_cpu_supports) || \
|
||||
defined(__BUILTIN_CPU_SUPPORTS__) || \
|
||||
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
|
||||
#else
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#endif
|
||||
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
|
||||
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
|
||||
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
|
||||
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/** Win32 File Locking API for \ref MDBX_LOCKING */
|
||||
@ -1971,7 +2188,10 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
|
||||
/** Advanced: Using POSIX OFD-locks (autodetection by default). */
|
||||
#ifndef MDBX_USE_OFDLOCKS
|
||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \
|
||||
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \
|
||||
defined(F_OFD_GETLK)) || \
|
||||
(defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64))) && \
|
||||
!defined(MDBX_SAFE4QEMU) && \
|
||||
!defined(__sun) /* OFD-lock are broken on Solaris */
|
||||
#define MDBX_USE_OFDLOCKS 1
|
||||
@ -2057,13 +2277,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
|
||||
#ifndef MDBX_64BIT_CAS
|
||||
#if defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2075,6 +2289,12 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2309,7 +2529,7 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
||||
/* FROZEN: The version number for a database's datafile format. */
|
||||
#define MDBX_DATA_VERSION 3
|
||||
/* The version number for a database's lockfile format. */
|
||||
#define MDBX_LOCK_VERSION 4
|
||||
#define MDBX_LOCK_VERSION 5
|
||||
|
||||
/* handle for the DB used to track free pages. */
|
||||
#define FREE_DBI 0
|
||||
@ -2513,14 +2733,34 @@ typedef struct MDBX_page {
|
||||
: PAGETYPE_WHOLE(p))
|
||||
|
||||
/* Size of the page header, excluding dynamic data at the end */
|
||||
#define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
|
||||
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
typedef struct profgc_stat {
|
||||
/* Монотонное время по "настенным часам"
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_monotonic;
|
||||
/* Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
||||
uint64_t xtime_monotonic;
|
||||
/* Процессорное время в режим пользователя
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_cpu;
|
||||
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
||||
uint32_t rsteps;
|
||||
/* Количество запросов на выделение последовательностей страниц,
|
||||
* т.е. когда запрашивает выделение больше одной страницы */
|
||||
uint32_t xpages;
|
||||
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
||||
uint32_t spe_counter;
|
||||
/* page faults (hard page faults) */
|
||||
uint32_t majflt;
|
||||
} profgc_stat_t;
|
||||
|
||||
/* Statistics of page operations overall of all (running, completed and aborted)
|
||||
* transactions */
|
||||
typedef struct {
|
||||
typedef struct pgop_stat {
|
||||
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
||||
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
||||
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
||||
@ -2532,10 +2772,31 @@ typedef struct {
|
||||
MDBX_atomic_uint64_t
|
||||
wops; /* Number of explicit write operations (not a pages) to a disk */
|
||||
MDBX_atomic_uint64_t
|
||||
gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
|
||||
unit/scale is platform-depended, see osal_monotime(). */
|
||||
} MDBX_pgop_stat_t;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||
msync; /* Number of explicit msync/flush-to-disk operations */
|
||||
MDBX_atomic_uint64_t
|
||||
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
||||
|
||||
/* Статистика для профилирования GC.
|
||||
* Логически эти данные может быть стоит вынести в другую структуру,
|
||||
* но разница будет сугубо косметическая. */
|
||||
struct {
|
||||
/* Затраты на поддержку данных пользователя */
|
||||
profgc_stat_t work;
|
||||
/* Затраты на поддержку и обновления самой GC */
|
||||
profgc_stat_t self;
|
||||
/* Итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски */
|
||||
uint32_t wloops;
|
||||
/* Итерации слияния записей GC */
|
||||
uint32_t coalescences;
|
||||
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t wipes;
|
||||
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t flushes;
|
||||
/* Попытки пнуть тормозящих читателей */
|
||||
uint32_t kicks;
|
||||
} gc_prof;
|
||||
} pgop_stat_t;
|
||||
|
||||
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
||||
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
||||
@ -2666,13 +2927,16 @@ typedef struct MDBX_lockinfo {
|
||||
/* Marker to distinguish uniqueness of DB/CLK. */
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
||||
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
||||
* lock at leat one page, so therefore madvise() could return EINVAL. */
|
||||
MDBX_atomic_uint32_t mti_mlcnt[2];
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
/* Statistics of costly ops of all (running, completed and aborted)
|
||||
* transactions */
|
||||
MDBX_pgop_stat_t mti_pgop_stat;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
pgop_stat_t mti_pgop_stat;
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
@ -2683,20 +2947,20 @@ typedef struct MDBX_lockinfo {
|
||||
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
||||
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
||||
* or clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
MDBX_atomic_uint64_t mti_unsynced_pages;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
||||
pgno_t mti_readahead_anchor;
|
||||
|
||||
@ -2799,7 +3063,7 @@ typedef struct MDBX_dp {
|
||||
MDBX_page *ptr;
|
||||
pgno_t pgno;
|
||||
union {
|
||||
unsigned extra;
|
||||
uint32_t extra;
|
||||
__anonymous_struct_extension__ struct {
|
||||
unsigned multi : 1;
|
||||
unsigned lru : 31;
|
||||
@ -2809,10 +3073,10 @@ typedef struct MDBX_dp {
|
||||
|
||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||
typedef struct MDBX_dpl {
|
||||
unsigned sorted;
|
||||
unsigned length;
|
||||
unsigned pages_including_loose; /* number of pages, but not an entries. */
|
||||
unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
size_t sorted;
|
||||
size_t length;
|
||||
size_t pages_including_loose; /* number of pages, but not an entries. */
|
||||
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
||||
@ -2831,11 +3095,17 @@ typedef struct MDBX_dpl {
|
||||
((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
||||
|
||||
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
||||
#define MDBX_PNL_SIZE(pl) ((pl)[0])
|
||||
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
||||
#define MDBX_PNL_SETSIZE(pl, size) \
|
||||
do { \
|
||||
const size_t __size = size; \
|
||||
assert(__size < INT_MAX); \
|
||||
(pl)[0] = (pgno_t)__size; \
|
||||
} while (0)
|
||||
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
||||
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
||||
|
||||
#if MDBX_PNL_ASCENDING
|
||||
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
||||
@ -2845,8 +3115,8 @@ typedef struct MDBX_dpl {
|
||||
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
||||
#endif
|
||||
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Internal structures */
|
||||
@ -2865,6 +3135,9 @@ typedef struct MDBX_dbx {
|
||||
|
||||
typedef struct troika {
|
||||
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
||||
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
||||
uint32_t unused_pad;
|
||||
#endif
|
||||
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
|
||||
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
|
||||
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
|
||||
@ -2886,9 +3159,13 @@ struct MDBX_txn {
|
||||
/* Additional flag for sync_locked() */
|
||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||
|
||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
||||
|
||||
#define TXN_FLAGS \
|
||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
||||
MDBX_TXN_FROZEN_RE)
|
||||
|
||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||
@ -2947,18 +3224,18 @@ struct MDBX_txn {
|
||||
struct {
|
||||
meta_troika_t troika;
|
||||
/* In write txns, array of cursors for each DB */
|
||||
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
pgno_t *relist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
pgno_t loose_refund_wl /* FIXME: describe */;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
||||
* Includes ancestor txns' dirty pages not hidden by other txns'
|
||||
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
||||
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
||||
unsigned dirtyroom;
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
size_t dirtyroom;
|
||||
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
||||
MDBX_dpl *dirtylist;
|
||||
/* The list of reclaimed txns from GC */
|
||||
@ -2969,8 +3246,8 @@ struct MDBX_txn {
|
||||
* in this transaction, linked through `mp_next`. */
|
||||
MDBX_page *loose_pages;
|
||||
/* Number of loose pages (tw.loose_pages) */
|
||||
unsigned loose_count;
|
||||
unsigned spill_least_removed;
|
||||
size_t loose_count;
|
||||
size_t spill_least_removed;
|
||||
/* The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
* shifted left by 1, deleted slots have the LSB set. */
|
||||
@ -3024,9 +3301,7 @@ struct MDBX_cursor {
|
||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||
#define C_RECLAIMING 0x20 /* GC lookup is prohibited */
|
||||
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
|
||||
uint8_t mc_flags; /* see mdbx_cursor */
|
||||
uint8_t mc_flags;
|
||||
|
||||
/* Cursor checking flags. */
|
||||
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
||||
@ -3037,7 +3312,7 @@ struct MDBX_cursor {
|
||||
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
||||
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
||||
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
||||
uint8_t mc_checking; /* page checking level */
|
||||
uint8_t mc_checking;
|
||||
|
||||
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
||||
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
||||
@ -3086,14 +3361,20 @@ struct MDBX_env {
|
||||
osal_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
#define me_fd4data me_ioring.fd
|
||||
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE me_overlapped_fd, me_data_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
struct MDBX_lockinfo *me_lck;
|
||||
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
atomic_pgno_t me_mlocked_pgno;
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
uint16_t me_merge_threshold,
|
||||
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
||||
@ -3165,6 +3446,7 @@ struct MDBX_env {
|
||||
unsigned me_dp_reserve_len;
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
osal_ioring_t me_ioring;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t me_remap_guard;
|
||||
@ -3190,7 +3472,7 @@ struct MDBX_env {
|
||||
#define xMDBX_DEBUG_SPILLING 0
|
||||
#endif
|
||||
#if xMDBX_DEBUG_SPILLING == 2
|
||||
unsigned debug_dirtied_est, debug_dirtied_act;
|
||||
size_t debug_dirtied_est, debug_dirtied_act;
|
||||
#endif /* xMDBX_DEBUG_SPILLING */
|
||||
|
||||
/* ------------------------------------------------- stub for lck-less mode */
|
||||
@ -3295,10 +3577,22 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
||||
#define FATAL(fmt, ...) \
|
||||
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
||||
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
||||
unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
assert_fail(msg, func, line); \
|
||||
} while (0)
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
#define ENSURE_MSG(env, expr, msg) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) \
|
||||
mdbx_assert_fail(env, msg, __func__, __LINE__); \
|
||||
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
||||
@ -3369,7 +3663,9 @@ MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
||||
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
||||
|
||||
MDBX_INTERNAL_FUNC void global_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void global_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -3490,12 +3786,12 @@ typedef struct MDBX_node {
|
||||
#error "Oops, some flags overlapped or wrong"
|
||||
#endif
|
||||
|
||||
/* max number of pages to commit in one writev() call */
|
||||
#define MDBX_COMMIT_PAGES 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
|
||||
#undef MDBX_COMMIT_PAGES
|
||||
#define MDBX_COMMIT_PAGES IOV_MAX
|
||||
#endif
|
||||
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
||||
#define MDBX_AUXILARY_IOV_MAX 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
||||
#undef MDBX_AUXILARY_IOV_MAX
|
||||
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
||||
#endif /* MDBX_AUXILARY_IOV_MAX */
|
||||
|
||||
/*
|
||||
* /
|
||||
@ -3552,20 +3848,24 @@ ceil_powerof2(size_t value, size_t granularity) {
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
||||
log2n_powerof2(size_t value) {
|
||||
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
|
||||
assert((value & -(int32_t)value) == value);
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
return __builtin_ctzl(value);
|
||||
log2n_powerof2(size_t value_uintptr) {
|
||||
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
||||
is_powerof2(value_uintptr));
|
||||
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
||||
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
||||
return __builtin_ctz(value_uint32);
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long)value);
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
||||
_BitScanForward(&index, value_uint32);
|
||||
return index;
|
||||
#else
|
||||
static const uint8_t debruijn_ctz32[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
|
||||
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -3719,14 +4019,17 @@ static void signal_handler(int sig) {
|
||||
#endif /* !WINDOWS */
|
||||
|
||||
static void usage(const char *prog) {
|
||||
fprintf(stderr,
|
||||
"usage: %s [-V] [-q] [-c] src_path [dest_path]\n"
|
||||
" -V\t\tprint version and exit\n"
|
||||
" -q\t\tbe quiet\n"
|
||||
" -c\t\tenable compactification (skip unused pages)\n"
|
||||
" src_path\tsource database\n"
|
||||
" dest_path\tdestination (stdout if not specified)\n",
|
||||
prog);
|
||||
fprintf(
|
||||
stderr,
|
||||
"usage: %s [-V] [-q] [-c] [-u|U] src_path [dest_path]\n"
|
||||
" -V\t\tprint version and exit\n"
|
||||
" -q\t\tbe quiet\n"
|
||||
" -c\t\tenable compactification (skip unused pages)\n"
|
||||
" -u\t\twarmup database before copying\n"
|
||||
" -U\t\twarmup and try lock database pages in memory before copying\n"
|
||||
" src_path\tsource database\n"
|
||||
" dest_path\tdestination (stdout if not specified)\n",
|
||||
prog);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
@ -3737,6 +4040,8 @@ int main(int argc, char *argv[]) {
|
||||
unsigned flags = MDBX_RDONLY;
|
||||
unsigned cpflags = 0;
|
||||
bool quiet = false;
|
||||
bool warmup = false;
|
||||
MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default;
|
||||
|
||||
for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) {
|
||||
if (argv[1][1] == 'n' && argv[1][2] == '\0')
|
||||
@ -3745,8 +4050,14 @@ int main(int argc, char *argv[]) {
|
||||
cpflags |= MDBX_CP_COMPACT;
|
||||
else if (argv[1][1] == 'q' && argv[1][2] == '\0')
|
||||
quiet = true;
|
||||
else if ((argv[1][1] == 'h' && argv[1][2] == '\0') ||
|
||||
strcmp(argv[1], "--help") == 0)
|
||||
else if (argv[1][1] == 'u' && argv[1][2] == '\0')
|
||||
warmup = true;
|
||||
else if (argv[1][1] == 'U' && argv[1][2] == '\0') {
|
||||
warmup = true;
|
||||
warmup_flags =
|
||||
MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock;
|
||||
} else if ((argv[1][1] == 'h' && argv[1][2] == '\0') ||
|
||||
strcmp(argv[1], "--help") == 0)
|
||||
usage(progname);
|
||||
else if (argv[1][1] == 'V' && argv[1][2] == '\0') {
|
||||
printf("mdbx_copy version %d.%d.%d.%d\n"
|
||||
@ -3795,7 +4106,12 @@ int main(int argc, char *argv[]) {
|
||||
if (rc == MDBX_SUCCESS)
|
||||
rc = mdbx_env_open(env, argv[1], flags, 0);
|
||||
|
||||
if (rc == MDBX_SUCCESS) {
|
||||
if (rc == MDBX_SUCCESS && warmup) {
|
||||
act = "warming up";
|
||||
rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536);
|
||||
}
|
||||
|
||||
if (!MDBX_IS_ERROR(rc)) {
|
||||
act = "copying";
|
||||
if (argc == 2) {
|
||||
mdbx_filehandle_t fd;
|
||||
|
||||
@ -36,7 +36,7 @@
|
||||
* top-level directory of the distribution or, alternatively, at
|
||||
* <http://www.OpenLDAP.org/license.html>. */
|
||||
|
||||
#define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
|
||||
#define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
|
||||
#ifdef MDBX_CONFIG_H
|
||||
#include MDBX_CONFIG_H
|
||||
#endif
|
||||
@ -151,7 +151,11 @@
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
||||
#define UNICODE
|
||||
#endif /* UNICODE */
|
||||
|
||||
#include "mdbx.h"
|
||||
/*
|
||||
@ -218,7 +222,7 @@
|
||||
#define SSIZE_MAX INTPTR_MAX
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
|
||||
#define MDBX_WORDBITS 64
|
||||
#else
|
||||
#define MDBX_WORDBITS 32
|
||||
@ -391,10 +395,6 @@ __extern_C key_t ftok(const char *, int);
|
||||
#elif _WIN32_WINNT < 0x0500
|
||||
#error At least 'Windows 2000' API is required for libmdbx.
|
||||
#endif /* _WIN32_WINNT */
|
||||
#if (defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* MinGW */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif /* WIN32_LEAN_AND_MEAN */
|
||||
@ -418,8 +418,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
@ -1171,9 +1173,6 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@ -1204,18 +1203,16 @@ typedef pthread_mutex_t osal_fastmutex_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* OS abstraction layer stuff */
|
||||
|
||||
MDBX_INTERNAL_VAR unsigned sys_pagesize;
|
||||
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_allocation_granularity;
|
||||
|
||||
/* Get the size of a memory page for the system.
|
||||
* This is the basic size that the platform's memory manager uses, and is
|
||||
* fundamental to the use of memory-mapped files. */
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
|
||||
osal_syspagesize(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGE_SIZE);
|
||||
#endif
|
||||
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
|
||||
return sys_pagesize;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -1254,8 +1251,140 @@ typedef union osal_srwlock {
|
||||
} osal_srwlock_t;
|
||||
#endif /* Windows */
|
||||
|
||||
#ifndef MDBX_HAVE_PWRITEV
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
|
||||
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
|
||||
/* FIXME: add checks for IOS versions, etc */
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
|
||||
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
|
||||
typedef struct ior_item {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
OVERLAPPED ov;
|
||||
#define ior_svg_gap4terminator 1
|
||||
#define ior_sgv_element FILE_SEGMENT_ELEMENT
|
||||
#else
|
||||
size_t offset;
|
||||
#if MDBX_HAVE_PWRITEV
|
||||
size_t sgvcnt;
|
||||
#define ior_svg_gap4terminator 0
|
||||
#define ior_sgv_element struct iovec
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
#endif /* !Windows */
|
||||
union {
|
||||
MDBX_val single;
|
||||
#if defined(ior_sgv_element)
|
||||
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
|
||||
#endif /* ior_sgv_element */
|
||||
};
|
||||
} ior_item_t;
|
||||
|
||||
typedef struct osal_ioring {
|
||||
unsigned slots_left;
|
||||
unsigned allocated;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define IOR_DIRECT 1
|
||||
#define IOR_OVERLAPPED 2
|
||||
#define IOR_STATE_LOCKED 1
|
||||
unsigned pagesize;
|
||||
unsigned last_sgvcnt;
|
||||
size_t last_bytes;
|
||||
uint8_t flags, state, pagesize_ln2;
|
||||
unsigned event_stack;
|
||||
HANDLE *event_pool;
|
||||
volatile LONG async_waiting;
|
||||
volatile LONG async_completed;
|
||||
HANDLE async_done;
|
||||
|
||||
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#elif MDBX_HAVE_PWRITEV
|
||||
unsigned last_bytes;
|
||||
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#else
|
||||
#define ior_last_sgvcnt(ior, item) (1)
|
||||
#define ior_last_bytes(ior, item) (item)->single.iov_len
|
||||
#endif /* !Windows */
|
||||
mdbx_filehandle_t fd;
|
||||
ior_item_t *last;
|
||||
ior_item_t *pool;
|
||||
char *boundary;
|
||||
} osal_ioring_t;
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/* Actually this is not ioring for now, but on the way. */
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint8_t flags,
|
||||
#endif /* Windows */
|
||||
mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
|
||||
void *data, const size_t bytes);
|
||||
typedef struct osal_ioring_write_result {
|
||||
int err;
|
||||
unsigned wops;
|
||||
} osal_ioring_write_result_t;
|
||||
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
|
||||
osal_ioring_write(osal_ioring_t *ior);
|
||||
|
||||
typedef struct iov_ctx iov_ctx_t;
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_walk(
|
||||
osal_ioring_t *ior, iov_ctx_t *ctx,
|
||||
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_left(const osal_ioring_t *ior) {
|
||||
return ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_used(const osal_ioring_t *ior) {
|
||||
return ior->allocated - ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int
|
||||
osal_ioring_reserve(osal_ioring_t *ior, size_t items, size_t bytes) {
|
||||
items = (items > 32) ? items : 32;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
const size_t npages = bytes >> ior->pagesize_ln2;
|
||||
items = (items > npages) ? items : npages;
|
||||
#else
|
||||
(void)bytes;
|
||||
#endif
|
||||
items = (items < 65536) ? items : 65536;
|
||||
if (likely(ior->allocated >= items))
|
||||
return MDBX_SUCCESS;
|
||||
return osal_ioring_resize(ior, items);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* libc compatibility stuff */
|
||||
|
||||
@ -1281,10 +1410,53 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
|
||||
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
|
||||
|
||||
/* max bytes to write in one call */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x01000000)
|
||||
#if defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x10000000)
|
||||
#elif defined(_WIN32)
|
||||
#define MAX_WRITE UINT32_C(0x04000000)
|
||||
#else
|
||||
#define MAX_WRITE UINT32_C(0x3fff0000)
|
||||
#define MAX_WRITE UINT32_C(0x3f000000)
|
||||
|
||||
#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \
|
||||
!defined(__ANDROID_API__)
|
||||
#define MDBX_F_SETLK F_SETLK64
|
||||
#define MDBX_F_SETLKW F_SETLKW64
|
||||
#define MDBX_F_GETLK F_GETLK64
|
||||
#if (__GLIBC_PREREQ(2, 28) && \
|
||||
(defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \
|
||||
defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \
|
||||
defined(fcntl64)
|
||||
#define MDBX_FCNTL fcntl64
|
||||
#else
|
||||
#define MDBX_FCNTL fcntl
|
||||
#endif
|
||||
#define MDBX_STRUCT_FLOCK struct flock64
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
|
||||
#endif /* OFF_T_MAX */
|
||||
#else
|
||||
#define MDBX_F_SETLK F_SETLK
|
||||
#define MDBX_F_SETLKW F_SETLKW
|
||||
#define MDBX_F_GETLK F_GETLK
|
||||
#define MDBX_FCNTL fcntl
|
||||
#define MDBX_STRUCT_FLOCK struct flock
|
||||
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */
|
||||
|
||||
#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
|
||||
#else
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX \
|
||||
(((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
|
||||
#endif /* OFF_T_MAX */
|
||||
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
@ -1327,8 +1499,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
|
||||
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
|
||||
int iovcnt, uint64_t offset,
|
||||
size_t expected_written);
|
||||
size_t sgvcnt, uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
|
||||
uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
|
||||
@ -1356,12 +1527,16 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
enum osal_openfile_purpose {
|
||||
MDBX_OPEN_DXB_READ = 0,
|
||||
MDBX_OPEN_DXB_LAZY = 1,
|
||||
MDBX_OPEN_DXB_DSYNC = 2,
|
||||
MDBX_OPEN_LCK = 3,
|
||||
MDBX_OPEN_COPY = 4,
|
||||
MDBX_OPEN_DELETE = 5
|
||||
MDBX_OPEN_DXB_READ,
|
||||
MDBX_OPEN_DXB_LAZY,
|
||||
MDBX_OPEN_DXB_DSYNC,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_OPEN_DXB_OVERLAPPED,
|
||||
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
|
||||
#endif /* Windows */
|
||||
MDBX_OPEN_LCK,
|
||||
MDBX_OPEN_COPY,
|
||||
MDBX_OPEN_DELETE
|
||||
};
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
||||
@ -1395,7 +1570,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
|
||||
MDBX_INTERNAL_FUNC int
|
||||
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
|
||||
#endif /* Windows */
|
||||
MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
|
||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||
size_t length,
|
||||
enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
|
||||
@ -1438,9 +1613,16 @@ osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||
#endif /* !Windows */
|
||||
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
|
||||
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline uint32_t
|
||||
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
|
||||
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
|
||||
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* lck stuff */
|
||||
@ -1550,6 +1732,9 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#define OSAL_MB2WIDE(FROM, TO) \
|
||||
do { \
|
||||
const char *const from_tmp = (FROM); \
|
||||
@ -1683,6 +1868,11 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
|
||||
|
||||
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
|
||||
|
||||
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
|
||||
PUCHAR OverlappedRangeStart,
|
||||
ULONG Length);
|
||||
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -1797,6 +1987,13 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
/** Controls profiling of GC search and updates. */
|
||||
#ifndef MDBX_ENABLE_PROFGC
|
||||
#define MDBX_ENABLE_PROFGC 0
|
||||
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
|
||||
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_PROFGC */
|
||||
|
||||
/** Controls gathering statistics for page operations. */
|
||||
#ifndef MDBX_ENABLE_PGOP_STAT
|
||||
#define MDBX_ENABLE_PGOP_STAT 1
|
||||
@ -1816,7 +2013,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
|
||||
/** Controls use of POSIX madvise() hints and friends. */
|
||||
/** Controls using of POSIX' madvise() and/or similar hints. */
|
||||
#ifndef MDBX_ENABLE_MADVISE
|
||||
#define MDBX_ENABLE_MADVISE 1
|
||||
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
|
||||
@ -1845,23 +2042,22 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
|
||||
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
|
||||
|
||||
/** Basically, this build-option is for TODO. Guess it should be replaced
|
||||
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
|
||||
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
|
||||
* This should be by-default on Linux and may-be other systems
|
||||
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
|
||||
* properly LRU tracking and async writing on-demand.
|
||||
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
|
||||
* spilling with msync(MS_ASYNC). */
|
||||
#ifndef MDBX_FAKE_SPILL_WRITEMAP
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
|
||||
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
|
||||
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
|
||||
* msync() to persist data. This is by-default on Linux and other systems where
|
||||
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
|
||||
* = Tracking of dirty pages but with LRU labels for spilling and explicit
|
||||
* persist ones by write(). This may be reasonable for systems which low
|
||||
* performance of msync() and/or LRU tracking. */
|
||||
#ifndef MDBX_AVOID_MSYNC
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MDBX_AVOID_MSYNC 1
|
||||
#else
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 0
|
||||
#define MDBX_AVOID_MSYNC 0
|
||||
#endif
|
||||
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
|
||||
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
|
||||
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
|
||||
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
|
||||
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
|
||||
#endif /* MDBX_AVOID_MSYNC */
|
||||
|
||||
/** Controls sort order of internal page number lists.
|
||||
* This mostly experimental/advanced option with not for regular MDBX users.
|
||||
@ -1918,6 +2114,27 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#ifndef MDBX_HAVE_C11ATOMICS
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
|
||||
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
|
||||
* for runtime dispatching depending on the CPU's capabilities. */
|
||||
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
|
||||
#if defined(__APPLE__) || defined(BIONIC)
|
||||
/* Never use any modern features on Apple's or Google's OSes
|
||||
* since a lot of troubles with compatibility and/or performance */
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif defined(__e2k__)
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif __has_builtin(__builtin_cpu_supports) || \
|
||||
defined(__BUILTIN_CPU_SUPPORTS__) || \
|
||||
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
|
||||
#else
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#endif
|
||||
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
|
||||
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
|
||||
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
|
||||
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/** Win32 File Locking API for \ref MDBX_LOCKING */
|
||||
@ -1973,7 +2190,10 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
|
||||
/** Advanced: Using POSIX OFD-locks (autodetection by default). */
|
||||
#ifndef MDBX_USE_OFDLOCKS
|
||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \
|
||||
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \
|
||||
defined(F_OFD_GETLK)) || \
|
||||
(defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64))) && \
|
||||
!defined(MDBX_SAFE4QEMU) && \
|
||||
!defined(__sun) /* OFD-lock are broken on Solaris */
|
||||
#define MDBX_USE_OFDLOCKS 1
|
||||
@ -2059,13 +2279,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
|
||||
#ifndef MDBX_64BIT_CAS
|
||||
#if defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2077,6 +2291,12 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2311,7 +2531,7 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
||||
/* FROZEN: The version number for a database's datafile format. */
|
||||
#define MDBX_DATA_VERSION 3
|
||||
/* The version number for a database's lockfile format. */
|
||||
#define MDBX_LOCK_VERSION 4
|
||||
#define MDBX_LOCK_VERSION 5
|
||||
|
||||
/* handle for the DB used to track free pages. */
|
||||
#define FREE_DBI 0
|
||||
@ -2515,14 +2735,34 @@ typedef struct MDBX_page {
|
||||
: PAGETYPE_WHOLE(p))
|
||||
|
||||
/* Size of the page header, excluding dynamic data at the end */
|
||||
#define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
|
||||
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
typedef struct profgc_stat {
|
||||
/* Монотонное время по "настенным часам"
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_monotonic;
|
||||
/* Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
||||
uint64_t xtime_monotonic;
|
||||
/* Процессорное время в режим пользователя
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_cpu;
|
||||
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
||||
uint32_t rsteps;
|
||||
/* Количество запросов на выделение последовательностей страниц,
|
||||
* т.е. когда запрашивает выделение больше одной страницы */
|
||||
uint32_t xpages;
|
||||
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
||||
uint32_t spe_counter;
|
||||
/* page faults (hard page faults) */
|
||||
uint32_t majflt;
|
||||
} profgc_stat_t;
|
||||
|
||||
/* Statistics of page operations overall of all (running, completed and aborted)
|
||||
* transactions */
|
||||
typedef struct {
|
||||
typedef struct pgop_stat {
|
||||
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
||||
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
||||
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
||||
@ -2534,10 +2774,31 @@ typedef struct {
|
||||
MDBX_atomic_uint64_t
|
||||
wops; /* Number of explicit write operations (not a pages) to a disk */
|
||||
MDBX_atomic_uint64_t
|
||||
gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
|
||||
unit/scale is platform-depended, see osal_monotime(). */
|
||||
} MDBX_pgop_stat_t;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||
msync; /* Number of explicit msync/flush-to-disk operations */
|
||||
MDBX_atomic_uint64_t
|
||||
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
||||
|
||||
/* Статистика для профилирования GC.
|
||||
* Логически эти данные может быть стоит вынести в другую структуру,
|
||||
* но разница будет сугубо косметическая. */
|
||||
struct {
|
||||
/* Затраты на поддержку данных пользователя */
|
||||
profgc_stat_t work;
|
||||
/* Затраты на поддержку и обновления самой GC */
|
||||
profgc_stat_t self;
|
||||
/* Итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски */
|
||||
uint32_t wloops;
|
||||
/* Итерации слияния записей GC */
|
||||
uint32_t coalescences;
|
||||
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t wipes;
|
||||
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t flushes;
|
||||
/* Попытки пнуть тормозящих читателей */
|
||||
uint32_t kicks;
|
||||
} gc_prof;
|
||||
} pgop_stat_t;
|
||||
|
||||
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
||||
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
||||
@ -2668,13 +2929,16 @@ typedef struct MDBX_lockinfo {
|
||||
/* Marker to distinguish uniqueness of DB/CLK. */
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
||||
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
||||
* lock at leat one page, so therefore madvise() could return EINVAL. */
|
||||
MDBX_atomic_uint32_t mti_mlcnt[2];
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
/* Statistics of costly ops of all (running, completed and aborted)
|
||||
* transactions */
|
||||
MDBX_pgop_stat_t mti_pgop_stat;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
pgop_stat_t mti_pgop_stat;
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
@ -2685,20 +2949,20 @@ typedef struct MDBX_lockinfo {
|
||||
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
||||
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
||||
* or clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
MDBX_atomic_uint64_t mti_unsynced_pages;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
||||
pgno_t mti_readahead_anchor;
|
||||
|
||||
@ -2801,7 +3065,7 @@ typedef struct MDBX_dp {
|
||||
MDBX_page *ptr;
|
||||
pgno_t pgno;
|
||||
union {
|
||||
unsigned extra;
|
||||
uint32_t extra;
|
||||
__anonymous_struct_extension__ struct {
|
||||
unsigned multi : 1;
|
||||
unsigned lru : 31;
|
||||
@ -2811,10 +3075,10 @@ typedef struct MDBX_dp {
|
||||
|
||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||
typedef struct MDBX_dpl {
|
||||
unsigned sorted;
|
||||
unsigned length;
|
||||
unsigned pages_including_loose; /* number of pages, but not an entries. */
|
||||
unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
size_t sorted;
|
||||
size_t length;
|
||||
size_t pages_including_loose; /* number of pages, but not an entries. */
|
||||
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
||||
@ -2833,11 +3097,17 @@ typedef struct MDBX_dpl {
|
||||
((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
||||
|
||||
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
||||
#define MDBX_PNL_SIZE(pl) ((pl)[0])
|
||||
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
||||
#define MDBX_PNL_SETSIZE(pl, size) \
|
||||
do { \
|
||||
const size_t __size = size; \
|
||||
assert(__size < INT_MAX); \
|
||||
(pl)[0] = (pgno_t)__size; \
|
||||
} while (0)
|
||||
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
||||
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
||||
|
||||
#if MDBX_PNL_ASCENDING
|
||||
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
||||
@ -2847,8 +3117,8 @@ typedef struct MDBX_dpl {
|
||||
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
||||
#endif
|
||||
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Internal structures */
|
||||
@ -2867,6 +3137,9 @@ typedef struct MDBX_dbx {
|
||||
|
||||
typedef struct troika {
|
||||
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
||||
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
||||
uint32_t unused_pad;
|
||||
#endif
|
||||
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
|
||||
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
|
||||
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
|
||||
@ -2888,9 +3161,13 @@ struct MDBX_txn {
|
||||
/* Additional flag for sync_locked() */
|
||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||
|
||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
||||
|
||||
#define TXN_FLAGS \
|
||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
||||
MDBX_TXN_FROZEN_RE)
|
||||
|
||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||
@ -2949,18 +3226,18 @@ struct MDBX_txn {
|
||||
struct {
|
||||
meta_troika_t troika;
|
||||
/* In write txns, array of cursors for each DB */
|
||||
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
pgno_t *relist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
pgno_t loose_refund_wl /* FIXME: describe */;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
||||
* Includes ancestor txns' dirty pages not hidden by other txns'
|
||||
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
||||
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
||||
unsigned dirtyroom;
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
size_t dirtyroom;
|
||||
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
||||
MDBX_dpl *dirtylist;
|
||||
/* The list of reclaimed txns from GC */
|
||||
@ -2971,8 +3248,8 @@ struct MDBX_txn {
|
||||
* in this transaction, linked through `mp_next`. */
|
||||
MDBX_page *loose_pages;
|
||||
/* Number of loose pages (tw.loose_pages) */
|
||||
unsigned loose_count;
|
||||
unsigned spill_least_removed;
|
||||
size_t loose_count;
|
||||
size_t spill_least_removed;
|
||||
/* The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
* shifted left by 1, deleted slots have the LSB set. */
|
||||
@ -3026,9 +3303,7 @@ struct MDBX_cursor {
|
||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||
#define C_RECLAIMING 0x20 /* GC lookup is prohibited */
|
||||
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
|
||||
uint8_t mc_flags; /* see mdbx_cursor */
|
||||
uint8_t mc_flags;
|
||||
|
||||
/* Cursor checking flags. */
|
||||
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
||||
@ -3039,7 +3314,7 @@ struct MDBX_cursor {
|
||||
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
||||
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
||||
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
||||
uint8_t mc_checking; /* page checking level */
|
||||
uint8_t mc_checking;
|
||||
|
||||
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
||||
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
||||
@ -3088,14 +3363,20 @@ struct MDBX_env {
|
||||
osal_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
#define me_fd4data me_ioring.fd
|
||||
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE me_overlapped_fd, me_data_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
struct MDBX_lockinfo *me_lck;
|
||||
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
atomic_pgno_t me_mlocked_pgno;
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
uint16_t me_merge_threshold,
|
||||
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
||||
@ -3167,6 +3448,7 @@ struct MDBX_env {
|
||||
unsigned me_dp_reserve_len;
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
osal_ioring_t me_ioring;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t me_remap_guard;
|
||||
@ -3192,7 +3474,7 @@ struct MDBX_env {
|
||||
#define xMDBX_DEBUG_SPILLING 0
|
||||
#endif
|
||||
#if xMDBX_DEBUG_SPILLING == 2
|
||||
unsigned debug_dirtied_est, debug_dirtied_act;
|
||||
size_t debug_dirtied_est, debug_dirtied_act;
|
||||
#endif /* xMDBX_DEBUG_SPILLING */
|
||||
|
||||
/* ------------------------------------------------- stub for lck-less mode */
|
||||
@ -3297,10 +3579,22 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
||||
#define FATAL(fmt, ...) \
|
||||
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
||||
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
||||
unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
assert_fail(msg, func, line); \
|
||||
} while (0)
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
#define ENSURE_MSG(env, expr, msg) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) \
|
||||
mdbx_assert_fail(env, msg, __func__, __LINE__); \
|
||||
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
||||
@ -3371,7 +3665,9 @@ MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
||||
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
||||
|
||||
MDBX_INTERNAL_FUNC void global_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void global_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -3492,12 +3788,12 @@ typedef struct MDBX_node {
|
||||
#error "Oops, some flags overlapped or wrong"
|
||||
#endif
|
||||
|
||||
/* max number of pages to commit in one writev() call */
|
||||
#define MDBX_COMMIT_PAGES 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
|
||||
#undef MDBX_COMMIT_PAGES
|
||||
#define MDBX_COMMIT_PAGES IOV_MAX
|
||||
#endif
|
||||
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
||||
#define MDBX_AUXILARY_IOV_MAX 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
||||
#undef MDBX_AUXILARY_IOV_MAX
|
||||
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
||||
#endif /* MDBX_AUXILARY_IOV_MAX */
|
||||
|
||||
/*
|
||||
* /
|
||||
@ -3554,20 +3850,24 @@ ceil_powerof2(size_t value, size_t granularity) {
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
||||
log2n_powerof2(size_t value) {
|
||||
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
|
||||
assert((value & -(int32_t)value) == value);
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
return __builtin_ctzl(value);
|
||||
log2n_powerof2(size_t value_uintptr) {
|
||||
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
||||
is_powerof2(value_uintptr));
|
||||
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
||||
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
||||
return __builtin_ctz(value_uint32);
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long)value);
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
||||
_BitScanForward(&index, value_uint32);
|
||||
return index;
|
||||
#else
|
||||
static const uint8_t debruijn_ctz32[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
|
||||
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
* top-level directory of the distribution or, alternatively, at
|
||||
* <http://www.OpenLDAP.org/license.html>. */
|
||||
|
||||
#define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
|
||||
#define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
|
||||
#ifdef MDBX_CONFIG_H
|
||||
#include MDBX_CONFIG_H
|
||||
#endif
|
||||
@ -149,7 +149,11 @@
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
||||
#define UNICODE
|
||||
#endif /* UNICODE */
|
||||
|
||||
#include "mdbx.h"
|
||||
/*
|
||||
@ -216,7 +220,7 @@
|
||||
#define SSIZE_MAX INTPTR_MAX
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
|
||||
#define MDBX_WORDBITS 64
|
||||
#else
|
||||
#define MDBX_WORDBITS 32
|
||||
@ -389,10 +393,6 @@ __extern_C key_t ftok(const char *, int);
|
||||
#elif _WIN32_WINNT < 0x0500
|
||||
#error At least 'Windows 2000' API is required for libmdbx.
|
||||
#endif /* _WIN32_WINNT */
|
||||
#if (defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* MinGW */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif /* WIN32_LEAN_AND_MEAN */
|
||||
@ -416,8 +416,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
@ -1169,9 +1171,6 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@ -1202,18 +1201,16 @@ typedef pthread_mutex_t osal_fastmutex_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* OS abstraction layer stuff */
|
||||
|
||||
MDBX_INTERNAL_VAR unsigned sys_pagesize;
|
||||
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_allocation_granularity;
|
||||
|
||||
/* Get the size of a memory page for the system.
|
||||
* This is the basic size that the platform's memory manager uses, and is
|
||||
* fundamental to the use of memory-mapped files. */
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
|
||||
osal_syspagesize(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGE_SIZE);
|
||||
#endif
|
||||
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
|
||||
return sys_pagesize;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -1252,8 +1249,140 @@ typedef union osal_srwlock {
|
||||
} osal_srwlock_t;
|
||||
#endif /* Windows */
|
||||
|
||||
#ifndef MDBX_HAVE_PWRITEV
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
|
||||
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
|
||||
/* FIXME: add checks for IOS versions, etc */
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
|
||||
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
|
||||
typedef struct ior_item {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
OVERLAPPED ov;
|
||||
#define ior_svg_gap4terminator 1
|
||||
#define ior_sgv_element FILE_SEGMENT_ELEMENT
|
||||
#else
|
||||
size_t offset;
|
||||
#if MDBX_HAVE_PWRITEV
|
||||
size_t sgvcnt;
|
||||
#define ior_svg_gap4terminator 0
|
||||
#define ior_sgv_element struct iovec
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
#endif /* !Windows */
|
||||
union {
|
||||
MDBX_val single;
|
||||
#if defined(ior_sgv_element)
|
||||
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
|
||||
#endif /* ior_sgv_element */
|
||||
};
|
||||
} ior_item_t;
|
||||
|
||||
typedef struct osal_ioring {
|
||||
unsigned slots_left;
|
||||
unsigned allocated;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define IOR_DIRECT 1
|
||||
#define IOR_OVERLAPPED 2
|
||||
#define IOR_STATE_LOCKED 1
|
||||
unsigned pagesize;
|
||||
unsigned last_sgvcnt;
|
||||
size_t last_bytes;
|
||||
uint8_t flags, state, pagesize_ln2;
|
||||
unsigned event_stack;
|
||||
HANDLE *event_pool;
|
||||
volatile LONG async_waiting;
|
||||
volatile LONG async_completed;
|
||||
HANDLE async_done;
|
||||
|
||||
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#elif MDBX_HAVE_PWRITEV
|
||||
unsigned last_bytes;
|
||||
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#else
|
||||
#define ior_last_sgvcnt(ior, item) (1)
|
||||
#define ior_last_bytes(ior, item) (item)->single.iov_len
|
||||
#endif /* !Windows */
|
||||
mdbx_filehandle_t fd;
|
||||
ior_item_t *last;
|
||||
ior_item_t *pool;
|
||||
char *boundary;
|
||||
} osal_ioring_t;
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/* Actually this is not ioring for now, but on the way. */
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint8_t flags,
|
||||
#endif /* Windows */
|
||||
mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
|
||||
void *data, const size_t bytes);
|
||||
typedef struct osal_ioring_write_result {
|
||||
int err;
|
||||
unsigned wops;
|
||||
} osal_ioring_write_result_t;
|
||||
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
|
||||
osal_ioring_write(osal_ioring_t *ior);
|
||||
|
||||
typedef struct iov_ctx iov_ctx_t;
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_walk(
|
||||
osal_ioring_t *ior, iov_ctx_t *ctx,
|
||||
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_left(const osal_ioring_t *ior) {
|
||||
return ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_used(const osal_ioring_t *ior) {
|
||||
return ior->allocated - ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int
|
||||
osal_ioring_reserve(osal_ioring_t *ior, size_t items, size_t bytes) {
|
||||
items = (items > 32) ? items : 32;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
const size_t npages = bytes >> ior->pagesize_ln2;
|
||||
items = (items > npages) ? items : npages;
|
||||
#else
|
||||
(void)bytes;
|
||||
#endif
|
||||
items = (items < 65536) ? items : 65536;
|
||||
if (likely(ior->allocated >= items))
|
||||
return MDBX_SUCCESS;
|
||||
return osal_ioring_resize(ior, items);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* libc compatibility stuff */
|
||||
|
||||
@ -1279,10 +1408,53 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
|
||||
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
|
||||
|
||||
/* max bytes to write in one call */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x01000000)
|
||||
#if defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x10000000)
|
||||
#elif defined(_WIN32)
|
||||
#define MAX_WRITE UINT32_C(0x04000000)
|
||||
#else
|
||||
#define MAX_WRITE UINT32_C(0x3fff0000)
|
||||
#define MAX_WRITE UINT32_C(0x3f000000)
|
||||
|
||||
#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \
|
||||
!defined(__ANDROID_API__)
|
||||
#define MDBX_F_SETLK F_SETLK64
|
||||
#define MDBX_F_SETLKW F_SETLKW64
|
||||
#define MDBX_F_GETLK F_GETLK64
|
||||
#if (__GLIBC_PREREQ(2, 28) && \
|
||||
(defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \
|
||||
defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \
|
||||
defined(fcntl64)
|
||||
#define MDBX_FCNTL fcntl64
|
||||
#else
|
||||
#define MDBX_FCNTL fcntl
|
||||
#endif
|
||||
#define MDBX_STRUCT_FLOCK struct flock64
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
|
||||
#endif /* OFF_T_MAX */
|
||||
#else
|
||||
#define MDBX_F_SETLK F_SETLK
|
||||
#define MDBX_F_SETLKW F_SETLKW
|
||||
#define MDBX_F_GETLK F_GETLK
|
||||
#define MDBX_FCNTL fcntl
|
||||
#define MDBX_STRUCT_FLOCK struct flock
|
||||
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */
|
||||
|
||||
#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
|
||||
#else
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX \
|
||||
(((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
|
||||
#endif /* OFF_T_MAX */
|
||||
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
@ -1325,8 +1497,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
|
||||
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
|
||||
int iovcnt, uint64_t offset,
|
||||
size_t expected_written);
|
||||
size_t sgvcnt, uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
|
||||
uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
|
||||
@ -1354,12 +1525,16 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
enum osal_openfile_purpose {
|
||||
MDBX_OPEN_DXB_READ = 0,
|
||||
MDBX_OPEN_DXB_LAZY = 1,
|
||||
MDBX_OPEN_DXB_DSYNC = 2,
|
||||
MDBX_OPEN_LCK = 3,
|
||||
MDBX_OPEN_COPY = 4,
|
||||
MDBX_OPEN_DELETE = 5
|
||||
MDBX_OPEN_DXB_READ,
|
||||
MDBX_OPEN_DXB_LAZY,
|
||||
MDBX_OPEN_DXB_DSYNC,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_OPEN_DXB_OVERLAPPED,
|
||||
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
|
||||
#endif /* Windows */
|
||||
MDBX_OPEN_LCK,
|
||||
MDBX_OPEN_COPY,
|
||||
MDBX_OPEN_DELETE
|
||||
};
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
||||
@ -1393,7 +1568,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
|
||||
MDBX_INTERNAL_FUNC int
|
||||
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
|
||||
#endif /* Windows */
|
||||
MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
|
||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||
size_t length,
|
||||
enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
|
||||
@ -1436,9 +1611,16 @@ osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||
#endif /* !Windows */
|
||||
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
|
||||
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline uint32_t
|
||||
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
|
||||
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
|
||||
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* lck stuff */
|
||||
@ -1548,6 +1730,9 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#define OSAL_MB2WIDE(FROM, TO) \
|
||||
do { \
|
||||
const char *const from_tmp = (FROM); \
|
||||
@ -1681,6 +1866,11 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
|
||||
|
||||
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
|
||||
|
||||
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
|
||||
PUCHAR OverlappedRangeStart,
|
||||
ULONG Length);
|
||||
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -1795,6 +1985,13 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
/** Controls profiling of GC search and updates. */
|
||||
#ifndef MDBX_ENABLE_PROFGC
|
||||
#define MDBX_ENABLE_PROFGC 0
|
||||
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
|
||||
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_PROFGC */
|
||||
|
||||
/** Controls gathering statistics for page operations. */
|
||||
#ifndef MDBX_ENABLE_PGOP_STAT
|
||||
#define MDBX_ENABLE_PGOP_STAT 1
|
||||
@ -1814,7 +2011,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
|
||||
/** Controls use of POSIX madvise() hints and friends. */
|
||||
/** Controls using of POSIX' madvise() and/or similar hints. */
|
||||
#ifndef MDBX_ENABLE_MADVISE
|
||||
#define MDBX_ENABLE_MADVISE 1
|
||||
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
|
||||
@ -1843,23 +2040,22 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
|
||||
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
|
||||
|
||||
/** Basically, this build-option is for TODO. Guess it should be replaced
|
||||
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
|
||||
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
|
||||
* This should be by-default on Linux and may-be other systems
|
||||
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
|
||||
* properly LRU tracking and async writing on-demand.
|
||||
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
|
||||
* spilling with msync(MS_ASYNC). */
|
||||
#ifndef MDBX_FAKE_SPILL_WRITEMAP
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
|
||||
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
|
||||
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
|
||||
* msync() to persist data. This is by-default on Linux and other systems where
|
||||
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
|
||||
* = Tracking of dirty pages but with LRU labels for spilling and explicit
|
||||
* persist ones by write(). This may be reasonable for systems which low
|
||||
* performance of msync() and/or LRU tracking. */
|
||||
#ifndef MDBX_AVOID_MSYNC
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MDBX_AVOID_MSYNC 1
|
||||
#else
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 0
|
||||
#define MDBX_AVOID_MSYNC 0
|
||||
#endif
|
||||
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
|
||||
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
|
||||
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
|
||||
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
|
||||
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
|
||||
#endif /* MDBX_AVOID_MSYNC */
|
||||
|
||||
/** Controls sort order of internal page number lists.
|
||||
* This mostly experimental/advanced option with not for regular MDBX users.
|
||||
@ -1916,6 +2112,27 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#ifndef MDBX_HAVE_C11ATOMICS
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
|
||||
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
|
||||
* for runtime dispatching depending on the CPU's capabilities. */
|
||||
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
|
||||
#if defined(__APPLE__) || defined(BIONIC)
|
||||
/* Never use any modern features on Apple's or Google's OSes
|
||||
* since a lot of troubles with compatibility and/or performance */
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif defined(__e2k__)
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif __has_builtin(__builtin_cpu_supports) || \
|
||||
defined(__BUILTIN_CPU_SUPPORTS__) || \
|
||||
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
|
||||
#else
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#endif
|
||||
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
|
||||
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
|
||||
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
|
||||
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/** Win32 File Locking API for \ref MDBX_LOCKING */
|
||||
@ -1971,7 +2188,10 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
|
||||
/** Advanced: Using POSIX OFD-locks (autodetection by default). */
|
||||
#ifndef MDBX_USE_OFDLOCKS
|
||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \
|
||||
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \
|
||||
defined(F_OFD_GETLK)) || \
|
||||
(defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64))) && \
|
||||
!defined(MDBX_SAFE4QEMU) && \
|
||||
!defined(__sun) /* OFD-lock are broken on Solaris */
|
||||
#define MDBX_USE_OFDLOCKS 1
|
||||
@ -2057,13 +2277,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
|
||||
#ifndef MDBX_64BIT_CAS
|
||||
#if defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2075,6 +2289,12 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2309,7 +2529,7 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
||||
/* FROZEN: The version number for a database's datafile format. */
|
||||
#define MDBX_DATA_VERSION 3
|
||||
/* The version number for a database's lockfile format. */
|
||||
#define MDBX_LOCK_VERSION 4
|
||||
#define MDBX_LOCK_VERSION 5
|
||||
|
||||
/* handle for the DB used to track free pages. */
|
||||
#define FREE_DBI 0
|
||||
@ -2513,14 +2733,34 @@ typedef struct MDBX_page {
|
||||
: PAGETYPE_WHOLE(p))
|
||||
|
||||
/* Size of the page header, excluding dynamic data at the end */
|
||||
#define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
|
||||
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
typedef struct profgc_stat {
|
||||
/* Монотонное время по "настенным часам"
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_monotonic;
|
||||
/* Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
||||
uint64_t xtime_monotonic;
|
||||
/* Процессорное время в режим пользователя
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_cpu;
|
||||
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
||||
uint32_t rsteps;
|
||||
/* Количество запросов на выделение последовательностей страниц,
|
||||
* т.е. когда запрашивает выделение больше одной страницы */
|
||||
uint32_t xpages;
|
||||
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
||||
uint32_t spe_counter;
|
||||
/* page faults (hard page faults) */
|
||||
uint32_t majflt;
|
||||
} profgc_stat_t;
|
||||
|
||||
/* Statistics of page operations overall of all (running, completed and aborted)
|
||||
* transactions */
|
||||
typedef struct {
|
||||
typedef struct pgop_stat {
|
||||
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
||||
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
||||
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
||||
@ -2532,10 +2772,31 @@ typedef struct {
|
||||
MDBX_atomic_uint64_t
|
||||
wops; /* Number of explicit write operations (not a pages) to a disk */
|
||||
MDBX_atomic_uint64_t
|
||||
gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
|
||||
unit/scale is platform-depended, see osal_monotime(). */
|
||||
} MDBX_pgop_stat_t;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||
msync; /* Number of explicit msync/flush-to-disk operations */
|
||||
MDBX_atomic_uint64_t
|
||||
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
||||
|
||||
/* Статистика для профилирования GC.
|
||||
* Логически эти данные может быть стоит вынести в другую структуру,
|
||||
* но разница будет сугубо косметическая. */
|
||||
struct {
|
||||
/* Затраты на поддержку данных пользователя */
|
||||
profgc_stat_t work;
|
||||
/* Затраты на поддержку и обновления самой GC */
|
||||
profgc_stat_t self;
|
||||
/* Итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски */
|
||||
uint32_t wloops;
|
||||
/* Итерации слияния записей GC */
|
||||
uint32_t coalescences;
|
||||
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t wipes;
|
||||
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t flushes;
|
||||
/* Попытки пнуть тормозящих читателей */
|
||||
uint32_t kicks;
|
||||
} gc_prof;
|
||||
} pgop_stat_t;
|
||||
|
||||
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
||||
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
||||
@ -2666,13 +2927,16 @@ typedef struct MDBX_lockinfo {
|
||||
/* Marker to distinguish uniqueness of DB/CLK. */
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
||||
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
||||
* lock at leat one page, so therefore madvise() could return EINVAL. */
|
||||
MDBX_atomic_uint32_t mti_mlcnt[2];
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
/* Statistics of costly ops of all (running, completed and aborted)
|
||||
* transactions */
|
||||
MDBX_pgop_stat_t mti_pgop_stat;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
pgop_stat_t mti_pgop_stat;
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
@ -2683,20 +2947,20 @@ typedef struct MDBX_lockinfo {
|
||||
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
||||
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
||||
* or clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
MDBX_atomic_uint64_t mti_unsynced_pages;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
||||
pgno_t mti_readahead_anchor;
|
||||
|
||||
@ -2799,7 +3063,7 @@ typedef struct MDBX_dp {
|
||||
MDBX_page *ptr;
|
||||
pgno_t pgno;
|
||||
union {
|
||||
unsigned extra;
|
||||
uint32_t extra;
|
||||
__anonymous_struct_extension__ struct {
|
||||
unsigned multi : 1;
|
||||
unsigned lru : 31;
|
||||
@ -2809,10 +3073,10 @@ typedef struct MDBX_dp {
|
||||
|
||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||
typedef struct MDBX_dpl {
|
||||
unsigned sorted;
|
||||
unsigned length;
|
||||
unsigned pages_including_loose; /* number of pages, but not an entries. */
|
||||
unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
size_t sorted;
|
||||
size_t length;
|
||||
size_t pages_including_loose; /* number of pages, but not an entries. */
|
||||
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
||||
@ -2831,11 +3095,17 @@ typedef struct MDBX_dpl {
|
||||
((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
||||
|
||||
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
||||
#define MDBX_PNL_SIZE(pl) ((pl)[0])
|
||||
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
||||
#define MDBX_PNL_SETSIZE(pl, size) \
|
||||
do { \
|
||||
const size_t __size = size; \
|
||||
assert(__size < INT_MAX); \
|
||||
(pl)[0] = (pgno_t)__size; \
|
||||
} while (0)
|
||||
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
||||
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
||||
|
||||
#if MDBX_PNL_ASCENDING
|
||||
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
||||
@ -2845,8 +3115,8 @@ typedef struct MDBX_dpl {
|
||||
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
||||
#endif
|
||||
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Internal structures */
|
||||
@ -2865,6 +3135,9 @@ typedef struct MDBX_dbx {
|
||||
|
||||
typedef struct troika {
|
||||
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
||||
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
||||
uint32_t unused_pad;
|
||||
#endif
|
||||
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
|
||||
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
|
||||
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
|
||||
@ -2886,9 +3159,13 @@ struct MDBX_txn {
|
||||
/* Additional flag for sync_locked() */
|
||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||
|
||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
||||
|
||||
#define TXN_FLAGS \
|
||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
||||
MDBX_TXN_FROZEN_RE)
|
||||
|
||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||
@ -2947,18 +3224,18 @@ struct MDBX_txn {
|
||||
struct {
|
||||
meta_troika_t troika;
|
||||
/* In write txns, array of cursors for each DB */
|
||||
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
pgno_t *relist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
pgno_t loose_refund_wl /* FIXME: describe */;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
||||
* Includes ancestor txns' dirty pages not hidden by other txns'
|
||||
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
||||
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
||||
unsigned dirtyroom;
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
size_t dirtyroom;
|
||||
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
||||
MDBX_dpl *dirtylist;
|
||||
/* The list of reclaimed txns from GC */
|
||||
@ -2969,8 +3246,8 @@ struct MDBX_txn {
|
||||
* in this transaction, linked through `mp_next`. */
|
||||
MDBX_page *loose_pages;
|
||||
/* Number of loose pages (tw.loose_pages) */
|
||||
unsigned loose_count;
|
||||
unsigned spill_least_removed;
|
||||
size_t loose_count;
|
||||
size_t spill_least_removed;
|
||||
/* The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
* shifted left by 1, deleted slots have the LSB set. */
|
||||
@ -3024,9 +3301,7 @@ struct MDBX_cursor {
|
||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||
#define C_RECLAIMING 0x20 /* GC lookup is prohibited */
|
||||
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
|
||||
uint8_t mc_flags; /* see mdbx_cursor */
|
||||
uint8_t mc_flags;
|
||||
|
||||
/* Cursor checking flags. */
|
||||
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
||||
@ -3037,7 +3312,7 @@ struct MDBX_cursor {
|
||||
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
||||
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
||||
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
||||
uint8_t mc_checking; /* page checking level */
|
||||
uint8_t mc_checking;
|
||||
|
||||
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
||||
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
||||
@ -3086,14 +3361,20 @@ struct MDBX_env {
|
||||
osal_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
#define me_fd4data me_ioring.fd
|
||||
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE me_overlapped_fd, me_data_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
struct MDBX_lockinfo *me_lck;
|
||||
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
atomic_pgno_t me_mlocked_pgno;
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
uint16_t me_merge_threshold,
|
||||
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
||||
@ -3165,6 +3446,7 @@ struct MDBX_env {
|
||||
unsigned me_dp_reserve_len;
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
osal_ioring_t me_ioring;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t me_remap_guard;
|
||||
@ -3190,7 +3472,7 @@ struct MDBX_env {
|
||||
#define xMDBX_DEBUG_SPILLING 0
|
||||
#endif
|
||||
#if xMDBX_DEBUG_SPILLING == 2
|
||||
unsigned debug_dirtied_est, debug_dirtied_act;
|
||||
size_t debug_dirtied_est, debug_dirtied_act;
|
||||
#endif /* xMDBX_DEBUG_SPILLING */
|
||||
|
||||
/* ------------------------------------------------- stub for lck-less mode */
|
||||
@ -3295,10 +3577,22 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
||||
#define FATAL(fmt, ...) \
|
||||
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
||||
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
||||
unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
assert_fail(msg, func, line); \
|
||||
} while (0)
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
#define ENSURE_MSG(env, expr, msg) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) \
|
||||
mdbx_assert_fail(env, msg, __func__, __LINE__); \
|
||||
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
||||
@ -3369,7 +3663,9 @@ MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
||||
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
||||
|
||||
MDBX_INTERNAL_FUNC void global_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void global_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -3490,12 +3786,12 @@ typedef struct MDBX_node {
|
||||
#error "Oops, some flags overlapped or wrong"
|
||||
#endif
|
||||
|
||||
/* max number of pages to commit in one writev() call */
|
||||
#define MDBX_COMMIT_PAGES 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
|
||||
#undef MDBX_COMMIT_PAGES
|
||||
#define MDBX_COMMIT_PAGES IOV_MAX
|
||||
#endif
|
||||
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
||||
#define MDBX_AUXILARY_IOV_MAX 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
||||
#undef MDBX_AUXILARY_IOV_MAX
|
||||
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
||||
#endif /* MDBX_AUXILARY_IOV_MAX */
|
||||
|
||||
/*
|
||||
* /
|
||||
@ -3552,20 +3848,24 @@ ceil_powerof2(size_t value, size_t granularity) {
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
||||
log2n_powerof2(size_t value) {
|
||||
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
|
||||
assert((value & -(int32_t)value) == value);
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
return __builtin_ctzl(value);
|
||||
log2n_powerof2(size_t value_uintptr) {
|
||||
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
||||
is_powerof2(value_uintptr));
|
||||
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
||||
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
||||
return __builtin_ctz(value_uint32);
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long)value);
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
||||
_BitScanForward(&index, value_uint32);
|
||||
return index;
|
||||
#else
|
||||
static const uint8_t debruijn_ctz32[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
|
||||
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -3892,19 +4192,23 @@ static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) {
|
||||
}
|
||||
|
||||
static void usage(void) {
|
||||
fprintf(stderr,
|
||||
"usage: %s [-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s subdb] "
|
||||
"dbpath\n"
|
||||
" -V\t\tprint version and exit\n"
|
||||
" -q\t\tbe quiet\n"
|
||||
" -f\t\twrite to file instead of stdout\n"
|
||||
" -l\t\tlist subDBs and exit\n"
|
||||
" -p\t\tuse printable characters\n"
|
||||
" -r\t\trescue mode (ignore errors to dump corrupted DB)\n"
|
||||
" -a\t\tdump main DB and all subDBs\n"
|
||||
" -s name\tdump only the specified named subDB\n"
|
||||
" \t\tby default dump only the main DB\n",
|
||||
prog);
|
||||
fprintf(
|
||||
stderr,
|
||||
"usage: %s "
|
||||
"[-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s subdb] [-u|U] "
|
||||
"dbpath\n"
|
||||
" -V\t\tprint version and exit\n"
|
||||
" -q\t\tbe quiet\n"
|
||||
" -f\t\twrite to file instead of stdout\n"
|
||||
" -l\t\tlist subDBs and exit\n"
|
||||
" -p\t\tuse printable characters\n"
|
||||
" -r\t\trescue mode (ignore errors to dump corrupted DB)\n"
|
||||
" -a\t\tdump main DB and all subDBs\n"
|
||||
" -s name\tdump only the specified named subDB\n"
|
||||
" -u\t\twarmup database before dumping\n"
|
||||
" -U\t\twarmup and try lock database pages in memory before dumping\n"
|
||||
" \t\tby default dump only the main DB\n",
|
||||
prog);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
@ -3925,11 +4229,14 @@ int main(int argc, char *argv[]) {
|
||||
char *subname = nullptr, *buf4free = nullptr;
|
||||
unsigned envflags = 0;
|
||||
bool alldbs = false, list = false;
|
||||
bool warmup = false;
|
||||
MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default;
|
||||
|
||||
if (argc < 2)
|
||||
usage();
|
||||
|
||||
while ((i = getopt(argc, argv,
|
||||
"uU"
|
||||
"a"
|
||||
"f:"
|
||||
"l"
|
||||
@ -3986,6 +4293,14 @@ int main(int argc, char *argv[]) {
|
||||
case 'r':
|
||||
rescue = true;
|
||||
break;
|
||||
case 'u':
|
||||
warmup = true;
|
||||
break;
|
||||
case 'U':
|
||||
warmup = true;
|
||||
warmup_flags =
|
||||
MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
}
|
||||
@ -4039,6 +4354,14 @@ int main(int argc, char *argv[]) {
|
||||
goto env_close;
|
||||
}
|
||||
|
||||
if (warmup) {
|
||||
rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536);
|
||||
if (MDBX_IS_ERROR(rc)) {
|
||||
error("mdbx_env_warmup", rc);
|
||||
goto env_close;
|
||||
}
|
||||
}
|
||||
|
||||
rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
error("mdbx_txn_begin", rc);
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
* top-level directory of the distribution or, alternatively, at
|
||||
* <http://www.OpenLDAP.org/license.html>. */
|
||||
|
||||
#define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
|
||||
#define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
|
||||
#ifdef MDBX_CONFIG_H
|
||||
#include MDBX_CONFIG_H
|
||||
#endif
|
||||
@ -149,7 +149,11 @@
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
||||
#define UNICODE
|
||||
#endif /* UNICODE */
|
||||
|
||||
#include "mdbx.h"
|
||||
/*
|
||||
@ -216,7 +220,7 @@
|
||||
#define SSIZE_MAX INTPTR_MAX
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
|
||||
#define MDBX_WORDBITS 64
|
||||
#else
|
||||
#define MDBX_WORDBITS 32
|
||||
@ -389,10 +393,6 @@ __extern_C key_t ftok(const char *, int);
|
||||
#elif _WIN32_WINNT < 0x0500
|
||||
#error At least 'Windows 2000' API is required for libmdbx.
|
||||
#endif /* _WIN32_WINNT */
|
||||
#if (defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* MinGW */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif /* WIN32_LEAN_AND_MEAN */
|
||||
@ -416,8 +416,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
@ -1169,9 +1171,6 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@ -1202,18 +1201,16 @@ typedef pthread_mutex_t osal_fastmutex_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* OS abstraction layer stuff */
|
||||
|
||||
MDBX_INTERNAL_VAR unsigned sys_pagesize;
|
||||
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_allocation_granularity;
|
||||
|
||||
/* Get the size of a memory page for the system.
|
||||
* This is the basic size that the platform's memory manager uses, and is
|
||||
* fundamental to the use of memory-mapped files. */
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
|
||||
osal_syspagesize(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGE_SIZE);
|
||||
#endif
|
||||
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
|
||||
return sys_pagesize;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -1252,8 +1249,140 @@ typedef union osal_srwlock {
|
||||
} osal_srwlock_t;
|
||||
#endif /* Windows */
|
||||
|
||||
#ifndef MDBX_HAVE_PWRITEV
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
|
||||
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
|
||||
/* FIXME: add checks for IOS versions, etc */
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
|
||||
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
|
||||
typedef struct ior_item {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
OVERLAPPED ov;
|
||||
#define ior_svg_gap4terminator 1
|
||||
#define ior_sgv_element FILE_SEGMENT_ELEMENT
|
||||
#else
|
||||
size_t offset;
|
||||
#if MDBX_HAVE_PWRITEV
|
||||
size_t sgvcnt;
|
||||
#define ior_svg_gap4terminator 0
|
||||
#define ior_sgv_element struct iovec
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
#endif /* !Windows */
|
||||
union {
|
||||
MDBX_val single;
|
||||
#if defined(ior_sgv_element)
|
||||
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
|
||||
#endif /* ior_sgv_element */
|
||||
};
|
||||
} ior_item_t;
|
||||
|
||||
typedef struct osal_ioring {
|
||||
unsigned slots_left;
|
||||
unsigned allocated;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define IOR_DIRECT 1
|
||||
#define IOR_OVERLAPPED 2
|
||||
#define IOR_STATE_LOCKED 1
|
||||
unsigned pagesize;
|
||||
unsigned last_sgvcnt;
|
||||
size_t last_bytes;
|
||||
uint8_t flags, state, pagesize_ln2;
|
||||
unsigned event_stack;
|
||||
HANDLE *event_pool;
|
||||
volatile LONG async_waiting;
|
||||
volatile LONG async_completed;
|
||||
HANDLE async_done;
|
||||
|
||||
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#elif MDBX_HAVE_PWRITEV
|
||||
unsigned last_bytes;
|
||||
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#else
|
||||
#define ior_last_sgvcnt(ior, item) (1)
|
||||
#define ior_last_bytes(ior, item) (item)->single.iov_len
|
||||
#endif /* !Windows */
|
||||
mdbx_filehandle_t fd;
|
||||
ior_item_t *last;
|
||||
ior_item_t *pool;
|
||||
char *boundary;
|
||||
} osal_ioring_t;
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/* Actually this is not ioring for now, but on the way. */
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint8_t flags,
|
||||
#endif /* Windows */
|
||||
mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
|
||||
void *data, const size_t bytes);
|
||||
typedef struct osal_ioring_write_result {
|
||||
int err;
|
||||
unsigned wops;
|
||||
} osal_ioring_write_result_t;
|
||||
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
|
||||
osal_ioring_write(osal_ioring_t *ior);
|
||||
|
||||
typedef struct iov_ctx iov_ctx_t;
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_walk(
|
||||
osal_ioring_t *ior, iov_ctx_t *ctx,
|
||||
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_left(const osal_ioring_t *ior) {
|
||||
return ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_used(const osal_ioring_t *ior) {
|
||||
return ior->allocated - ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int
|
||||
osal_ioring_reserve(osal_ioring_t *ior, size_t items, size_t bytes) {
|
||||
items = (items > 32) ? items : 32;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
const size_t npages = bytes >> ior->pagesize_ln2;
|
||||
items = (items > npages) ? items : npages;
|
||||
#else
|
||||
(void)bytes;
|
||||
#endif
|
||||
items = (items < 65536) ? items : 65536;
|
||||
if (likely(ior->allocated >= items))
|
||||
return MDBX_SUCCESS;
|
||||
return osal_ioring_resize(ior, items);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* libc compatibility stuff */
|
||||
|
||||
@ -1279,10 +1408,53 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
|
||||
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
|
||||
|
||||
/* max bytes to write in one call */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x01000000)
|
||||
#if defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x10000000)
|
||||
#elif defined(_WIN32)
|
||||
#define MAX_WRITE UINT32_C(0x04000000)
|
||||
#else
|
||||
#define MAX_WRITE UINT32_C(0x3fff0000)
|
||||
#define MAX_WRITE UINT32_C(0x3f000000)
|
||||
|
||||
#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \
|
||||
!defined(__ANDROID_API__)
|
||||
#define MDBX_F_SETLK F_SETLK64
|
||||
#define MDBX_F_SETLKW F_SETLKW64
|
||||
#define MDBX_F_GETLK F_GETLK64
|
||||
#if (__GLIBC_PREREQ(2, 28) && \
|
||||
(defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \
|
||||
defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \
|
||||
defined(fcntl64)
|
||||
#define MDBX_FCNTL fcntl64
|
||||
#else
|
||||
#define MDBX_FCNTL fcntl
|
||||
#endif
|
||||
#define MDBX_STRUCT_FLOCK struct flock64
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
|
||||
#endif /* OFF_T_MAX */
|
||||
#else
|
||||
#define MDBX_F_SETLK F_SETLK
|
||||
#define MDBX_F_SETLKW F_SETLKW
|
||||
#define MDBX_F_GETLK F_GETLK
|
||||
#define MDBX_FCNTL fcntl
|
||||
#define MDBX_STRUCT_FLOCK struct flock
|
||||
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */
|
||||
|
||||
#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
|
||||
#else
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX \
|
||||
(((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
|
||||
#endif /* OFF_T_MAX */
|
||||
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
@ -1325,8 +1497,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
|
||||
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
|
||||
int iovcnt, uint64_t offset,
|
||||
size_t expected_written);
|
||||
size_t sgvcnt, uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
|
||||
uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
|
||||
@ -1354,12 +1525,16 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
enum osal_openfile_purpose {
|
||||
MDBX_OPEN_DXB_READ = 0,
|
||||
MDBX_OPEN_DXB_LAZY = 1,
|
||||
MDBX_OPEN_DXB_DSYNC = 2,
|
||||
MDBX_OPEN_LCK = 3,
|
||||
MDBX_OPEN_COPY = 4,
|
||||
MDBX_OPEN_DELETE = 5
|
||||
MDBX_OPEN_DXB_READ,
|
||||
MDBX_OPEN_DXB_LAZY,
|
||||
MDBX_OPEN_DXB_DSYNC,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_OPEN_DXB_OVERLAPPED,
|
||||
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
|
||||
#endif /* Windows */
|
||||
MDBX_OPEN_LCK,
|
||||
MDBX_OPEN_COPY,
|
||||
MDBX_OPEN_DELETE
|
||||
};
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
||||
@ -1393,7 +1568,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
|
||||
MDBX_INTERNAL_FUNC int
|
||||
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
|
||||
#endif /* Windows */
|
||||
MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
|
||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||
size_t length,
|
||||
enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
|
||||
@ -1436,9 +1611,16 @@ osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||
#endif /* !Windows */
|
||||
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
|
||||
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline uint32_t
|
||||
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
|
||||
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
|
||||
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* lck stuff */
|
||||
@ -1548,6 +1730,9 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#define OSAL_MB2WIDE(FROM, TO) \
|
||||
do { \
|
||||
const char *const from_tmp = (FROM); \
|
||||
@ -1681,6 +1866,11 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
|
||||
|
||||
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
|
||||
|
||||
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
|
||||
PUCHAR OverlappedRangeStart,
|
||||
ULONG Length);
|
||||
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -1795,6 +1985,13 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
/** Controls profiling of GC search and updates. */
|
||||
#ifndef MDBX_ENABLE_PROFGC
|
||||
#define MDBX_ENABLE_PROFGC 0
|
||||
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
|
||||
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_PROFGC */
|
||||
|
||||
/** Controls gathering statistics for page operations. */
|
||||
#ifndef MDBX_ENABLE_PGOP_STAT
|
||||
#define MDBX_ENABLE_PGOP_STAT 1
|
||||
@ -1814,7 +2011,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
|
||||
/** Controls use of POSIX madvise() hints and friends. */
|
||||
/** Controls using of POSIX' madvise() and/or similar hints. */
|
||||
#ifndef MDBX_ENABLE_MADVISE
|
||||
#define MDBX_ENABLE_MADVISE 1
|
||||
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
|
||||
@ -1843,23 +2040,22 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
|
||||
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
|
||||
|
||||
/** Basically, this build-option is for TODO. Guess it should be replaced
|
||||
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
|
||||
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
|
||||
* This should be by-default on Linux and may-be other systems
|
||||
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
|
||||
* properly LRU tracking and async writing on-demand.
|
||||
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
|
||||
* spilling with msync(MS_ASYNC). */
|
||||
#ifndef MDBX_FAKE_SPILL_WRITEMAP
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
|
||||
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
|
||||
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
|
||||
* msync() to persist data. This is by-default on Linux and other systems where
|
||||
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
|
||||
* = Tracking of dirty pages but with LRU labels for spilling and explicit
|
||||
* persist ones by write(). This may be reasonable for systems which low
|
||||
* performance of msync() and/or LRU tracking. */
|
||||
#ifndef MDBX_AVOID_MSYNC
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MDBX_AVOID_MSYNC 1
|
||||
#else
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 0
|
||||
#define MDBX_AVOID_MSYNC 0
|
||||
#endif
|
||||
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
|
||||
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
|
||||
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
|
||||
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
|
||||
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
|
||||
#endif /* MDBX_AVOID_MSYNC */
|
||||
|
||||
/** Controls sort order of internal page number lists.
|
||||
* This mostly experimental/advanced option with not for regular MDBX users.
|
||||
@ -1916,6 +2112,27 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#ifndef MDBX_HAVE_C11ATOMICS
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
|
||||
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
|
||||
* for runtime dispatching depending on the CPU's capabilities. */
|
||||
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
|
||||
#if defined(__APPLE__) || defined(BIONIC)
|
||||
/* Never use any modern features on Apple's or Google's OSes
|
||||
* since a lot of troubles with compatibility and/or performance */
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif defined(__e2k__)
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif __has_builtin(__builtin_cpu_supports) || \
|
||||
defined(__BUILTIN_CPU_SUPPORTS__) || \
|
||||
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
|
||||
#else
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#endif
|
||||
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
|
||||
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
|
||||
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
|
||||
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/** Win32 File Locking API for \ref MDBX_LOCKING */
|
||||
@ -1971,7 +2188,10 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
|
||||
/** Advanced: Using POSIX OFD-locks (autodetection by default). */
|
||||
#ifndef MDBX_USE_OFDLOCKS
|
||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \
|
||||
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \
|
||||
defined(F_OFD_GETLK)) || \
|
||||
(defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64))) && \
|
||||
!defined(MDBX_SAFE4QEMU) && \
|
||||
!defined(__sun) /* OFD-lock are broken on Solaris */
|
||||
#define MDBX_USE_OFDLOCKS 1
|
||||
@ -2057,13 +2277,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
|
||||
#ifndef MDBX_64BIT_CAS
|
||||
#if defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2075,6 +2289,12 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2309,7 +2529,7 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
||||
/* FROZEN: The version number for a database's datafile format. */
|
||||
#define MDBX_DATA_VERSION 3
|
||||
/* The version number for a database's lockfile format. */
|
||||
#define MDBX_LOCK_VERSION 4
|
||||
#define MDBX_LOCK_VERSION 5
|
||||
|
||||
/* handle for the DB used to track free pages. */
|
||||
#define FREE_DBI 0
|
||||
@ -2513,14 +2733,34 @@ typedef struct MDBX_page {
|
||||
: PAGETYPE_WHOLE(p))
|
||||
|
||||
/* Size of the page header, excluding dynamic data at the end */
|
||||
#define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
|
||||
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
typedef struct profgc_stat {
|
||||
/* Монотонное время по "настенным часам"
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_monotonic;
|
||||
/* Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
||||
uint64_t xtime_monotonic;
|
||||
/* Процессорное время в режим пользователя
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_cpu;
|
||||
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
||||
uint32_t rsteps;
|
||||
/* Количество запросов на выделение последовательностей страниц,
|
||||
* т.е. когда запрашивает выделение больше одной страницы */
|
||||
uint32_t xpages;
|
||||
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
||||
uint32_t spe_counter;
|
||||
/* page faults (hard page faults) */
|
||||
uint32_t majflt;
|
||||
} profgc_stat_t;
|
||||
|
||||
/* Statistics of page operations overall of all (running, completed and aborted)
|
||||
* transactions */
|
||||
typedef struct {
|
||||
typedef struct pgop_stat {
|
||||
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
||||
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
||||
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
||||
@ -2532,10 +2772,31 @@ typedef struct {
|
||||
MDBX_atomic_uint64_t
|
||||
wops; /* Number of explicit write operations (not a pages) to a disk */
|
||||
MDBX_atomic_uint64_t
|
||||
gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
|
||||
unit/scale is platform-depended, see osal_monotime(). */
|
||||
} MDBX_pgop_stat_t;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||
msync; /* Number of explicit msync/flush-to-disk operations */
|
||||
MDBX_atomic_uint64_t
|
||||
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
||||
|
||||
/* Статистика для профилирования GC.
|
||||
* Логически эти данные может быть стоит вынести в другую структуру,
|
||||
* но разница будет сугубо косметическая. */
|
||||
struct {
|
||||
/* Затраты на поддержку данных пользователя */
|
||||
profgc_stat_t work;
|
||||
/* Затраты на поддержку и обновления самой GC */
|
||||
profgc_stat_t self;
|
||||
/* Итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски */
|
||||
uint32_t wloops;
|
||||
/* Итерации слияния записей GC */
|
||||
uint32_t coalescences;
|
||||
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t wipes;
|
||||
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t flushes;
|
||||
/* Попытки пнуть тормозящих читателей */
|
||||
uint32_t kicks;
|
||||
} gc_prof;
|
||||
} pgop_stat_t;
|
||||
|
||||
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
||||
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
||||
@ -2666,13 +2927,16 @@ typedef struct MDBX_lockinfo {
|
||||
/* Marker to distinguish uniqueness of DB/CLK. */
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
||||
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
||||
* lock at leat one page, so therefore madvise() could return EINVAL. */
|
||||
MDBX_atomic_uint32_t mti_mlcnt[2];
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
/* Statistics of costly ops of all (running, completed and aborted)
|
||||
* transactions */
|
||||
MDBX_pgop_stat_t mti_pgop_stat;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
pgop_stat_t mti_pgop_stat;
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
@ -2683,20 +2947,20 @@ typedef struct MDBX_lockinfo {
|
||||
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
||||
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
||||
* or clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
MDBX_atomic_uint64_t mti_unsynced_pages;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
||||
pgno_t mti_readahead_anchor;
|
||||
|
||||
@ -2799,7 +3063,7 @@ typedef struct MDBX_dp {
|
||||
MDBX_page *ptr;
|
||||
pgno_t pgno;
|
||||
union {
|
||||
unsigned extra;
|
||||
uint32_t extra;
|
||||
__anonymous_struct_extension__ struct {
|
||||
unsigned multi : 1;
|
||||
unsigned lru : 31;
|
||||
@ -2809,10 +3073,10 @@ typedef struct MDBX_dp {
|
||||
|
||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||
typedef struct MDBX_dpl {
|
||||
unsigned sorted;
|
||||
unsigned length;
|
||||
unsigned pages_including_loose; /* number of pages, but not an entries. */
|
||||
unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
size_t sorted;
|
||||
size_t length;
|
||||
size_t pages_including_loose; /* number of pages, but not an entries. */
|
||||
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
||||
@ -2831,11 +3095,17 @@ typedef struct MDBX_dpl {
|
||||
((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
||||
|
||||
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
||||
#define MDBX_PNL_SIZE(pl) ((pl)[0])
|
||||
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
||||
#define MDBX_PNL_SETSIZE(pl, size) \
|
||||
do { \
|
||||
const size_t __size = size; \
|
||||
assert(__size < INT_MAX); \
|
||||
(pl)[0] = (pgno_t)__size; \
|
||||
} while (0)
|
||||
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
||||
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
||||
|
||||
#if MDBX_PNL_ASCENDING
|
||||
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
||||
@ -2845,8 +3115,8 @@ typedef struct MDBX_dpl {
|
||||
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
||||
#endif
|
||||
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Internal structures */
|
||||
@ -2865,6 +3135,9 @@ typedef struct MDBX_dbx {
|
||||
|
||||
typedef struct troika {
|
||||
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
||||
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
||||
uint32_t unused_pad;
|
||||
#endif
|
||||
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
|
||||
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
|
||||
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
|
||||
@ -2886,9 +3159,13 @@ struct MDBX_txn {
|
||||
/* Additional flag for sync_locked() */
|
||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||
|
||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
||||
|
||||
#define TXN_FLAGS \
|
||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
||||
MDBX_TXN_FROZEN_RE)
|
||||
|
||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||
@ -2947,18 +3224,18 @@ struct MDBX_txn {
|
||||
struct {
|
||||
meta_troika_t troika;
|
||||
/* In write txns, array of cursors for each DB */
|
||||
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
pgno_t *relist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
pgno_t loose_refund_wl /* FIXME: describe */;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
||||
* Includes ancestor txns' dirty pages not hidden by other txns'
|
||||
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
||||
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
||||
unsigned dirtyroom;
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
size_t dirtyroom;
|
||||
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
||||
MDBX_dpl *dirtylist;
|
||||
/* The list of reclaimed txns from GC */
|
||||
@ -2969,8 +3246,8 @@ struct MDBX_txn {
|
||||
* in this transaction, linked through `mp_next`. */
|
||||
MDBX_page *loose_pages;
|
||||
/* Number of loose pages (tw.loose_pages) */
|
||||
unsigned loose_count;
|
||||
unsigned spill_least_removed;
|
||||
size_t loose_count;
|
||||
size_t spill_least_removed;
|
||||
/* The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
* shifted left by 1, deleted slots have the LSB set. */
|
||||
@ -3024,9 +3301,7 @@ struct MDBX_cursor {
|
||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||
#define C_RECLAIMING 0x20 /* GC lookup is prohibited */
|
||||
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
|
||||
uint8_t mc_flags; /* see mdbx_cursor */
|
||||
uint8_t mc_flags;
|
||||
|
||||
/* Cursor checking flags. */
|
||||
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
||||
@ -3037,7 +3312,7 @@ struct MDBX_cursor {
|
||||
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
||||
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
||||
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
||||
uint8_t mc_checking; /* page checking level */
|
||||
uint8_t mc_checking;
|
||||
|
||||
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
||||
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
||||
@ -3086,14 +3361,20 @@ struct MDBX_env {
|
||||
osal_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
#define me_fd4data me_ioring.fd
|
||||
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE me_overlapped_fd, me_data_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
struct MDBX_lockinfo *me_lck;
|
||||
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
atomic_pgno_t me_mlocked_pgno;
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
uint16_t me_merge_threshold,
|
||||
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
||||
@ -3165,6 +3446,7 @@ struct MDBX_env {
|
||||
unsigned me_dp_reserve_len;
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
osal_ioring_t me_ioring;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t me_remap_guard;
|
||||
@ -3190,7 +3472,7 @@ struct MDBX_env {
|
||||
#define xMDBX_DEBUG_SPILLING 0
|
||||
#endif
|
||||
#if xMDBX_DEBUG_SPILLING == 2
|
||||
unsigned debug_dirtied_est, debug_dirtied_act;
|
||||
size_t debug_dirtied_est, debug_dirtied_act;
|
||||
#endif /* xMDBX_DEBUG_SPILLING */
|
||||
|
||||
/* ------------------------------------------------- stub for lck-less mode */
|
||||
@ -3295,10 +3577,22 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
||||
#define FATAL(fmt, ...) \
|
||||
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
||||
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
||||
unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
assert_fail(msg, func, line); \
|
||||
} while (0)
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
#define ENSURE_MSG(env, expr, msg) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) \
|
||||
mdbx_assert_fail(env, msg, __func__, __LINE__); \
|
||||
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
||||
@ -3369,7 +3663,9 @@ MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
||||
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
||||
|
||||
MDBX_INTERNAL_FUNC void global_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void global_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -3490,12 +3786,12 @@ typedef struct MDBX_node {
|
||||
#error "Oops, some flags overlapped or wrong"
|
||||
#endif
|
||||
|
||||
/* max number of pages to commit in one writev() call */
|
||||
#define MDBX_COMMIT_PAGES 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
|
||||
#undef MDBX_COMMIT_PAGES
|
||||
#define MDBX_COMMIT_PAGES IOV_MAX
|
||||
#endif
|
||||
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
||||
#define MDBX_AUXILARY_IOV_MAX 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
||||
#undef MDBX_AUXILARY_IOV_MAX
|
||||
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
||||
#endif /* MDBX_AUXILARY_IOV_MAX */
|
||||
|
||||
/*
|
||||
* /
|
||||
@ -3552,20 +3848,24 @@ ceil_powerof2(size_t value, size_t granularity) {
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
||||
log2n_powerof2(size_t value) {
|
||||
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
|
||||
assert((value & -(int32_t)value) == value);
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
return __builtin_ctzl(value);
|
||||
log2n_powerof2(size_t value_uintptr) {
|
||||
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
||||
is_powerof2(value_uintptr));
|
||||
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
||||
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
||||
return __builtin_ctz(value_uint32);
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long)value);
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
||||
_BitScanForward(&index, value_uint32);
|
||||
return index;
|
||||
#else
|
||||
static const uint8_t debruijn_ctz32[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
|
||||
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
* top-level directory of the distribution or, alternatively, at
|
||||
* <http://www.OpenLDAP.org/license.html>. */
|
||||
|
||||
#define MDBX_BUILD_SOURCERY 86a8d6c403a2023fc2df0ab38f71339b78e82f0aa786f480a1cb166c05497134_v0_12_1_0_gb36a07a5
|
||||
#define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
|
||||
#ifdef MDBX_CONFIG_H
|
||||
#include MDBX_CONFIG_H
|
||||
#endif
|
||||
@ -149,7 +149,11 @@
|
||||
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* __USE_MINGW_ANSI_STDIO */
|
||||
#endif /* MinGW */
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
||||
#define UNICODE
|
||||
#endif /* UNICODE */
|
||||
|
||||
#include "mdbx.h"
|
||||
/*
|
||||
@ -216,7 +220,7 @@
|
||||
#define SSIZE_MAX INTPTR_MAX
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
|
||||
#define MDBX_WORDBITS 64
|
||||
#else
|
||||
#define MDBX_WORDBITS 32
|
||||
@ -389,10 +393,6 @@ __extern_C key_t ftok(const char *, int);
|
||||
#elif _WIN32_WINNT < 0x0500
|
||||
#error At least 'Windows 2000' API is required for libmdbx.
|
||||
#endif /* _WIN32_WINNT */
|
||||
#if (defined(__MINGW32__) || defined(__MINGW64__)) && \
|
||||
!defined(__USE_MINGW_ANSI_STDIO)
|
||||
#define __USE_MINGW_ANSI_STDIO 1
|
||||
#endif /* MinGW */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif /* WIN32_LEAN_AND_MEAN */
|
||||
@ -416,8 +416,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
@ -1169,9 +1171,6 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@ -1202,18 +1201,16 @@ typedef pthread_mutex_t osal_fastmutex_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* OS abstraction layer stuff */
|
||||
|
||||
MDBX_INTERNAL_VAR unsigned sys_pagesize;
|
||||
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_allocation_granularity;
|
||||
|
||||
/* Get the size of a memory page for the system.
|
||||
* This is the basic size that the platform's memory manager uses, and is
|
||||
* fundamental to the use of memory-mapped files. */
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
|
||||
osal_syspagesize(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGE_SIZE);
|
||||
#endif
|
||||
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
|
||||
return sys_pagesize;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -1252,8 +1249,140 @@ typedef union osal_srwlock {
|
||||
} osal_srwlock_t;
|
||||
#endif /* Windows */
|
||||
|
||||
#ifndef MDBX_HAVE_PWRITEV
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
|
||||
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
|
||||
/* FIXME: add checks for IOS versions, etc */
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
|
||||
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
|
||||
#define MDBX_HAVE_PWRITEV 1
|
||||
#else
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
#endif
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
|
||||
typedef struct ior_item {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
OVERLAPPED ov;
|
||||
#define ior_svg_gap4terminator 1
|
||||
#define ior_sgv_element FILE_SEGMENT_ELEMENT
|
||||
#else
|
||||
size_t offset;
|
||||
#if MDBX_HAVE_PWRITEV
|
||||
size_t sgvcnt;
|
||||
#define ior_svg_gap4terminator 0
|
||||
#define ior_sgv_element struct iovec
|
||||
#endif /* MDBX_HAVE_PWRITEV */
|
||||
#endif /* !Windows */
|
||||
union {
|
||||
MDBX_val single;
|
||||
#if defined(ior_sgv_element)
|
||||
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
|
||||
#endif /* ior_sgv_element */
|
||||
};
|
||||
} ior_item_t;
|
||||
|
||||
typedef struct osal_ioring {
|
||||
unsigned slots_left;
|
||||
unsigned allocated;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define IOR_DIRECT 1
|
||||
#define IOR_OVERLAPPED 2
|
||||
#define IOR_STATE_LOCKED 1
|
||||
unsigned pagesize;
|
||||
unsigned last_sgvcnt;
|
||||
size_t last_bytes;
|
||||
uint8_t flags, state, pagesize_ln2;
|
||||
unsigned event_stack;
|
||||
HANDLE *event_pool;
|
||||
volatile LONG async_waiting;
|
||||
volatile LONG async_completed;
|
||||
HANDLE async_done;
|
||||
|
||||
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#elif MDBX_HAVE_PWRITEV
|
||||
unsigned last_bytes;
|
||||
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
|
||||
#define ior_last_bytes(ior, item) (ior)->last_bytes
|
||||
#else
|
||||
#define ior_last_sgvcnt(ior, item) (1)
|
||||
#define ior_last_bytes(ior, item) (item)->single.iov_len
|
||||
#endif /* !Windows */
|
||||
mdbx_filehandle_t fd;
|
||||
ior_item_t *last;
|
||||
ior_item_t *pool;
|
||||
char *boundary;
|
||||
} osal_ioring_t;
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/* Actually this is not ioring for now, but on the way. */
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint8_t flags,
|
||||
#endif /* Windows */
|
||||
mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
|
||||
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
|
||||
void *data, const size_t bytes);
|
||||
typedef struct osal_ioring_write_result {
|
||||
int err;
|
||||
unsigned wops;
|
||||
} osal_ioring_write_result_t;
|
||||
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
|
||||
osal_ioring_write(osal_ioring_t *ior);
|
||||
|
||||
typedef struct iov_ctx iov_ctx_t;
|
||||
MDBX_INTERNAL_FUNC void osal_ioring_walk(
|
||||
osal_ioring_t *ior, iov_ctx_t *ctx,
|
||||
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_left(const osal_ioring_t *ior) {
|
||||
return ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline unsigned
|
||||
osal_ioring_used(const osal_ioring_t *ior) {
|
||||
return ior->allocated - ior->slots_left;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int
|
||||
osal_ioring_reserve(osal_ioring_t *ior, size_t items, size_t bytes) {
|
||||
items = (items > 32) ? items : 32;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
const size_t npages = bytes >> ior->pagesize_ln2;
|
||||
items = (items > npages) ? items : npages;
|
||||
#else
|
||||
(void)bytes;
|
||||
#endif
|
||||
items = (items < 65536) ? items : 65536;
|
||||
if (likely(ior->allocated >= items))
|
||||
return MDBX_SUCCESS;
|
||||
return osal_ioring_resize(ior, items);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* libc compatibility stuff */
|
||||
|
||||
@ -1279,10 +1408,53 @@ MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
|
||||
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
|
||||
|
||||
/* max bytes to write in one call */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x01000000)
|
||||
#if defined(_WIN64)
|
||||
#define MAX_WRITE UINT32_C(0x10000000)
|
||||
#elif defined(_WIN32)
|
||||
#define MAX_WRITE UINT32_C(0x04000000)
|
||||
#else
|
||||
#define MAX_WRITE UINT32_C(0x3fff0000)
|
||||
#define MAX_WRITE UINT32_C(0x3f000000)
|
||||
|
||||
#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && \
|
||||
!defined(__ANDROID_API__)
|
||||
#define MDBX_F_SETLK F_SETLK64
|
||||
#define MDBX_F_SETLKW F_SETLKW64
|
||||
#define MDBX_F_GETLK F_GETLK64
|
||||
#if (__GLIBC_PREREQ(2, 28) && \
|
||||
(defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) || \
|
||||
defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) || \
|
||||
defined(fcntl64)
|
||||
#define MDBX_FCNTL fcntl64
|
||||
#else
|
||||
#define MDBX_FCNTL fcntl
|
||||
#endif
|
||||
#define MDBX_STRUCT_FLOCK struct flock64
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
|
||||
#endif /* OFF_T_MAX */
|
||||
#else
|
||||
#define MDBX_F_SETLK F_SETLK
|
||||
#define MDBX_F_SETLKW F_SETLKW
|
||||
#define MDBX_F_GETLK F_GETLK
|
||||
#define MDBX_FCNTL fcntl
|
||||
#define MDBX_STRUCT_FLOCK struct flock
|
||||
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */
|
||||
|
||||
#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
|
||||
#else
|
||||
#define MDBX_F_OFD_SETLK F_OFD_SETLK
|
||||
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
|
||||
#define MDBX_F_OFD_GETLK F_OFD_GETLK
|
||||
#ifndef OFF_T_MAX
|
||||
#define OFF_T_MAX \
|
||||
(((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
|
||||
#endif /* OFF_T_MAX */
|
||||
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
@ -1325,8 +1497,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
|
||||
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
|
||||
int iovcnt, uint64_t offset,
|
||||
size_t expected_written);
|
||||
size_t sgvcnt, uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
|
||||
uint64_t offset);
|
||||
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
|
||||
@ -1354,12 +1525,16 @@ MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
enum osal_openfile_purpose {
|
||||
MDBX_OPEN_DXB_READ = 0,
|
||||
MDBX_OPEN_DXB_LAZY = 1,
|
||||
MDBX_OPEN_DXB_DSYNC = 2,
|
||||
MDBX_OPEN_LCK = 3,
|
||||
MDBX_OPEN_COPY = 4,
|
||||
MDBX_OPEN_DELETE = 5
|
||||
MDBX_OPEN_DXB_READ,
|
||||
MDBX_OPEN_DXB_LAZY,
|
||||
MDBX_OPEN_DXB_DSYNC,
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_OPEN_DXB_OVERLAPPED,
|
||||
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
|
||||
#endif /* Windows */
|
||||
MDBX_OPEN_LCK,
|
||||
MDBX_OPEN_COPY,
|
||||
MDBX_OPEN_DELETE
|
||||
};
|
||||
|
||||
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
||||
@ -1393,7 +1568,7 @@ osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
|
||||
MDBX_INTERNAL_FUNC int
|
||||
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
|
||||
#endif /* Windows */
|
||||
MDBX_INTERNAL_FUNC int osal_msync(osal_mmap_t *map, size_t offset,
|
||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||
size_t length,
|
||||
enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
|
||||
@ -1436,9 +1611,16 @@ osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||
#endif /* !Windows */
|
||||
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
|
||||
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
|
||||
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline uint32_t
|
||||
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
|
||||
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
|
||||
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* lck stuff */
|
||||
@ -1548,6 +1730,9 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
MDBX_INTERNAL_FUNC size_t osal_mb2w(wchar_t *dst, size_t dst_n, const char *src,
|
||||
size_t src_n);
|
||||
|
||||
#define OSAL_MB2WIDE(FROM, TO) \
|
||||
do { \
|
||||
const char *const from_tmp = (FROM); \
|
||||
@ -1681,6 +1866,11 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
|
||||
|
||||
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
|
||||
|
||||
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
|
||||
PUCHAR OverlappedRangeStart,
|
||||
ULONG Length);
|
||||
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -1795,6 +1985,13 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
/** Controls profiling of GC search and updates. */
|
||||
#ifndef MDBX_ENABLE_PROFGC
|
||||
#define MDBX_ENABLE_PROFGC 0
|
||||
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
|
||||
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_PROFGC */
|
||||
|
||||
/** Controls gathering statistics for page operations. */
|
||||
#ifndef MDBX_ENABLE_PGOP_STAT
|
||||
#define MDBX_ENABLE_PGOP_STAT 1
|
||||
@ -1814,7 +2011,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
|
||||
/** Controls use of POSIX madvise() hints and friends. */
|
||||
/** Controls using of POSIX' madvise() and/or similar hints. */
|
||||
#ifndef MDBX_ENABLE_MADVISE
|
||||
#define MDBX_ENABLE_MADVISE 1
|
||||
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
|
||||
@ -1843,23 +2040,22 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
|
||||
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
|
||||
|
||||
/** Basically, this build-option is for TODO. Guess it should be replaced
|
||||
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
|
||||
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
|
||||
* This should be by-default on Linux and may-be other systems
|
||||
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
|
||||
* properly LRU tracking and async writing on-demand.
|
||||
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
|
||||
* spilling with msync(MS_ASYNC). */
|
||||
#ifndef MDBX_FAKE_SPILL_WRITEMAP
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
|
||||
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
|
||||
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
|
||||
* msync() to persist data. This is by-default on Linux and other systems where
|
||||
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
|
||||
* = Tracking of dirty pages but with LRU labels for spilling and explicit
|
||||
* persist ones by write(). This may be reasonable for systems which low
|
||||
* performance of msync() and/or LRU tracking. */
|
||||
#ifndef MDBX_AVOID_MSYNC
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define MDBX_AVOID_MSYNC 1
|
||||
#else
|
||||
#define MDBX_FAKE_SPILL_WRITEMAP 0
|
||||
#define MDBX_AVOID_MSYNC 0
|
||||
#endif
|
||||
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
|
||||
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
|
||||
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
|
||||
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
|
||||
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
|
||||
#endif /* MDBX_AVOID_MSYNC */
|
||||
|
||||
/** Controls sort order of internal page number lists.
|
||||
* This mostly experimental/advanced option with not for regular MDBX users.
|
||||
@ -1916,6 +2112,27 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#ifndef MDBX_HAVE_C11ATOMICS
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
|
||||
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
|
||||
* for runtime dispatching depending on the CPU's capabilities. */
|
||||
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
|
||||
#if defined(__APPLE__) || defined(BIONIC)
|
||||
/* Never use any modern features on Apple's or Google's OSes
|
||||
* since a lot of troubles with compatibility and/or performance */
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif defined(__e2k__)
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#elif __has_builtin(__builtin_cpu_supports) || \
|
||||
defined(__BUILTIN_CPU_SUPPORTS__) || \
|
||||
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
|
||||
#else
|
||||
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
|
||||
#endif
|
||||
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
|
||||
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
|
||||
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
|
||||
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/** Win32 File Locking API for \ref MDBX_LOCKING */
|
||||
@ -1971,7 +2188,10 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
|
||||
/** Advanced: Using POSIX OFD-locks (autodetection by default). */
|
||||
#ifndef MDBX_USE_OFDLOCKS
|
||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK) && \
|
||||
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && \
|
||||
defined(F_OFD_GETLK)) || \
|
||||
(defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && \
|
||||
defined(F_OFD_GETLK64))) && \
|
||||
!defined(MDBX_SAFE4QEMU) && \
|
||||
!defined(__sun) /* OFD-lock are broken on Solaris */
|
||||
#define MDBX_USE_OFDLOCKS 1
|
||||
@ -2057,13 +2277,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
|
||||
#ifndef MDBX_64BIT_CAS
|
||||
#if defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
|
||||
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2075,6 +2289,12 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(ATOMIC_LLONG_LOCK_FREE)
|
||||
#if ATOMIC_LLONG_LOCK_FREE > 1
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
#define MDBX_64BIT_CAS 0
|
||||
#endif
|
||||
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
|
||||
#define MDBX_64BIT_CAS 1
|
||||
#else
|
||||
@ -2309,7 +2529,7 @@ MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
||||
/* FROZEN: The version number for a database's datafile format. */
|
||||
#define MDBX_DATA_VERSION 3
|
||||
/* The version number for a database's lockfile format. */
|
||||
#define MDBX_LOCK_VERSION 4
|
||||
#define MDBX_LOCK_VERSION 5
|
||||
|
||||
/* handle for the DB used to track free pages. */
|
||||
#define FREE_DBI 0
|
||||
@ -2513,14 +2733,34 @@ typedef struct MDBX_page {
|
||||
: PAGETYPE_WHOLE(p))
|
||||
|
||||
/* Size of the page header, excluding dynamic data at the end */
|
||||
#define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_ptrs))
|
||||
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
typedef struct profgc_stat {
|
||||
/* Монотонное время по "настенным часам"
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_monotonic;
|
||||
/* Монотонное время по "настенным часам" затраченное
|
||||
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
||||
uint64_t xtime_monotonic;
|
||||
/* Процессорное время в режим пользователя
|
||||
* затраченное на чтение и поиск внутри GC */
|
||||
uint64_t rtime_cpu;
|
||||
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
||||
uint32_t rsteps;
|
||||
/* Количество запросов на выделение последовательностей страниц,
|
||||
* т.е. когда запрашивает выделение больше одной страницы */
|
||||
uint32_t xpages;
|
||||
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
||||
uint32_t spe_counter;
|
||||
/* page faults (hard page faults) */
|
||||
uint32_t majflt;
|
||||
} profgc_stat_t;
|
||||
|
||||
/* Statistics of page operations overall of all (running, completed and aborted)
|
||||
* transactions */
|
||||
typedef struct {
|
||||
typedef struct pgop_stat {
|
||||
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
||||
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
||||
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
||||
@ -2532,10 +2772,31 @@ typedef struct {
|
||||
MDBX_atomic_uint64_t
|
||||
wops; /* Number of explicit write operations (not a pages) to a disk */
|
||||
MDBX_atomic_uint64_t
|
||||
gcrtime; /* Time spending for reading/searching GC (aka FreeDB). The
|
||||
unit/scale is platform-depended, see osal_monotime(). */
|
||||
} MDBX_pgop_stat_t;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||
msync; /* Number of explicit msync/flush-to-disk operations */
|
||||
MDBX_atomic_uint64_t
|
||||
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
||||
|
||||
/* Статистика для профилирования GC.
|
||||
* Логически эти данные может быть стоит вынести в другую структуру,
|
||||
* но разница будет сугубо косметическая. */
|
||||
struct {
|
||||
/* Затраты на поддержку данных пользователя */
|
||||
profgc_stat_t work;
|
||||
/* Затраты на поддержку и обновления самой GC */
|
||||
profgc_stat_t self;
|
||||
/* Итераций обновления GC,
|
||||
* больше 1 если были повторы/перезапуски */
|
||||
uint32_t wloops;
|
||||
/* Итерации слияния записей GC */
|
||||
uint32_t coalescences;
|
||||
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t wipes;
|
||||
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
||||
uint32_t flushes;
|
||||
/* Попытки пнуть тормозящих читателей */
|
||||
uint32_t kicks;
|
||||
} gc_prof;
|
||||
} pgop_stat_t;
|
||||
|
||||
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
||||
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
||||
@ -2666,13 +2927,16 @@ typedef struct MDBX_lockinfo {
|
||||
/* Marker to distinguish uniqueness of DB/CLK. */
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
||||
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
||||
* lock at leat one page, so therefore madvise() could return EINVAL. */
|
||||
MDBX_atomic_uint32_t mti_mlcnt[2];
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
/* Statistics of costly ops of all (running, completed and aborted)
|
||||
* transactions */
|
||||
MDBX_pgop_stat_t mti_pgop_stat;
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
pgop_stat_t mti_pgop_stat;
|
||||
|
||||
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
||||
|
||||
@ -2683,20 +2947,20 @@ typedef struct MDBX_lockinfo {
|
||||
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
||||
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
||||
* or clock_gettime(CLOCK_MONOTONIC). */
|
||||
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
MDBX_atomic_uint64_t mti_unsynced_pages;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
||||
pgno_t mti_readahead_anchor;
|
||||
|
||||
@ -2799,7 +3063,7 @@ typedef struct MDBX_dp {
|
||||
MDBX_page *ptr;
|
||||
pgno_t pgno;
|
||||
union {
|
||||
unsigned extra;
|
||||
uint32_t extra;
|
||||
__anonymous_struct_extension__ struct {
|
||||
unsigned multi : 1;
|
||||
unsigned lru : 31;
|
||||
@ -2809,10 +3073,10 @@ typedef struct MDBX_dp {
|
||||
|
||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||
typedef struct MDBX_dpl {
|
||||
unsigned sorted;
|
||||
unsigned length;
|
||||
unsigned pages_including_loose; /* number of pages, but not an entries. */
|
||||
unsigned detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
size_t sorted;
|
||||
size_t length;
|
||||
size_t pages_including_loose; /* number of pages, but not an entries. */
|
||||
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
||||
@ -2831,11 +3095,17 @@ typedef struct MDBX_dpl {
|
||||
((1u << 17) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
||||
|
||||
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
||||
#define MDBX_PNL_SIZE(pl) ((pl)[0])
|
||||
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
||||
#define MDBX_PNL_SETSIZE(pl, size) \
|
||||
do { \
|
||||
const size_t __size = size; \
|
||||
assert(__size < INT_MAX); \
|
||||
(pl)[0] = (pgno_t)__size; \
|
||||
} while (0)
|
||||
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_SIZE(pl)])
|
||||
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
||||
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_SIZE(pl) + 1])
|
||||
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
||||
|
||||
#if MDBX_PNL_ASCENDING
|
||||
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
||||
@ -2845,8 +3115,8 @@ typedef struct MDBX_dpl {
|
||||
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
||||
#endif
|
||||
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_SIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_SIZE(pl) == 0)
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Internal structures */
|
||||
@ -2865,6 +3135,9 @@ typedef struct MDBX_dbx {
|
||||
|
||||
typedef struct troika {
|
||||
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
||||
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
||||
uint32_t unused_pad;
|
||||
#endif
|
||||
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7)
|
||||
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64)
|
||||
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128)
|
||||
@ -2886,9 +3159,13 @@ struct MDBX_txn {
|
||||
/* Additional flag for sync_locked() */
|
||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||
|
||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
||||
|
||||
#define TXN_FLAGS \
|
||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
||||
MDBX_TXN_FROZEN_RE)
|
||||
|
||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||
@ -2947,18 +3224,18 @@ struct MDBX_txn {
|
||||
struct {
|
||||
meta_troika_t troika;
|
||||
/* In write txns, array of cursors for each DB */
|
||||
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
pgno_t *relist; /* Reclaimed GC pages */
|
||||
txnid_t last_reclaimed; /* ID of last used record */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
pgno_t loose_refund_wl /* FIXME: describe */;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
||||
* Includes ancestor txns' dirty pages not hidden by other txns'
|
||||
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
||||
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
||||
unsigned dirtyroom;
|
||||
/* a sequence to spilling dirty page with LRU policy */
|
||||
unsigned dirtylru;
|
||||
size_t dirtyroom;
|
||||
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
||||
MDBX_dpl *dirtylist;
|
||||
/* The list of reclaimed txns from GC */
|
||||
@ -2969,8 +3246,8 @@ struct MDBX_txn {
|
||||
* in this transaction, linked through `mp_next`. */
|
||||
MDBX_page *loose_pages;
|
||||
/* Number of loose pages (tw.loose_pages) */
|
||||
unsigned loose_count;
|
||||
unsigned spill_least_removed;
|
||||
size_t loose_count;
|
||||
size_t spill_least_removed;
|
||||
/* The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
* shifted left by 1, deleted slots have the LSB set. */
|
||||
@ -3024,9 +3301,7 @@ struct MDBX_cursor {
|
||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||
#define C_RECLAIMING 0x20 /* GC lookup is prohibited */
|
||||
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
|
||||
uint8_t mc_flags; /* see mdbx_cursor */
|
||||
uint8_t mc_flags;
|
||||
|
||||
/* Cursor checking flags. */
|
||||
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
||||
@ -3037,7 +3312,7 @@ struct MDBX_cursor {
|
||||
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
||||
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
||||
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
||||
uint8_t mc_checking; /* page checking level */
|
||||
uint8_t mc_checking;
|
||||
|
||||
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
||||
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
||||
@ -3086,14 +3361,20 @@ struct MDBX_env {
|
||||
osal_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
#define me_fd4data me_ioring.fd
|
||||
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE me_overlapped_fd, me_data_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
struct MDBX_lockinfo *me_lck;
|
||||
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
||||
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
atomic_pgno_t me_mlocked_pgno;
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
uint16_t me_merge_threshold,
|
||||
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
||||
@ -3165,6 +3446,7 @@ struct MDBX_env {
|
||||
unsigned me_dp_reserve_len;
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
osal_ioring_t me_ioring;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t me_remap_guard;
|
||||
@ -3190,7 +3472,7 @@ struct MDBX_env {
|
||||
#define xMDBX_DEBUG_SPILLING 0
|
||||
#endif
|
||||
#if xMDBX_DEBUG_SPILLING == 2
|
||||
unsigned debug_dirtied_est, debug_dirtied_act;
|
||||
size_t debug_dirtied_est, debug_dirtied_act;
|
||||
#endif /* xMDBX_DEBUG_SPILLING */
|
||||
|
||||
/* ------------------------------------------------- stub for lck-less mode */
|
||||
@ -3295,10 +3577,22 @@ MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
||||
#define FATAL(fmt, ...) \
|
||||
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
||||
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
||||
unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
assert_fail(msg, func, line); \
|
||||
} while (0)
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
#define ENSURE_MSG(env, expr, msg) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) \
|
||||
mdbx_assert_fail(env, msg, __func__, __LINE__); \
|
||||
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
||||
@ -3369,7 +3663,9 @@ MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
||||
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
||||
|
||||
MDBX_INTERNAL_FUNC void global_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
||||
MDBX_INTERNAL_FUNC void global_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
||||
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
@ -3490,12 +3786,12 @@ typedef struct MDBX_node {
|
||||
#error "Oops, some flags overlapped or wrong"
|
||||
#endif
|
||||
|
||||
/* max number of pages to commit in one writev() call */
|
||||
#define MDBX_COMMIT_PAGES 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_COMMIT_PAGES /* sysconf(_SC_IOV_MAX) */
|
||||
#undef MDBX_COMMIT_PAGES
|
||||
#define MDBX_COMMIT_PAGES IOV_MAX
|
||||
#endif
|
||||
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
||||
#define MDBX_AUXILARY_IOV_MAX 64
|
||||
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
||||
#undef MDBX_AUXILARY_IOV_MAX
|
||||
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
||||
#endif /* MDBX_AUXILARY_IOV_MAX */
|
||||
|
||||
/*
|
||||
* /
|
||||
@ -3552,20 +3848,24 @@ ceil_powerof2(size_t value, size_t granularity) {
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
||||
log2n_powerof2(size_t value) {
|
||||
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
|
||||
assert((value & -(int32_t)value) == value);
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
return __builtin_ctzl(value);
|
||||
log2n_powerof2(size_t value_uintptr) {
|
||||
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
||||
is_powerof2(value_uintptr));
|
||||
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
||||
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
||||
return __builtin_ctz(value_uint32);
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long)value);
|
||||
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
||||
_BitScanForward(&index, value_uint32);
|
||||
return index;
|
||||
#else
|
||||
static const uint8_t debruijn_ctz32[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return debruijn_ctz32[(uint32_t)(value * 0x077CB531u) >> 27];
|
||||
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user