Skip to content

Commit d6d1718

Browse files
committed
Merge tag 'v5.18.3'
2 parents 8f6a123 + 641fae6 commit d6d1718

File tree

360 files changed

+16273
-4569
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

360 files changed

+16273
-4569
lines changed

CMakeLists.txt

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ option(WITH_SNAPPY "build with SNAPPY" OFF)
4949
option(WITH_LZ4 "build with lz4" OFF)
5050
option(WITH_ZLIB "build with zlib" OFF)
5151
option(WITH_ZSTD "build with zstd" OFF)
52+
option(WITH_WINDOWS_UTF8_FILENAMES "use UTF8 as characterset for opening files, regardles of the system code page" OFF)
53+
if (WITH_WINDOWS_UTF8_FILENAMES)
54+
add_definitions(-DROCKSDB_WINDOWS_UTF8_FILENAMES)
55+
endif()
5256
if(MSVC)
5357
# Defaults currently different for GFLAGS.
5458
# We will address find_package work a little later
@@ -362,7 +366,7 @@ endif()
362366
option(ROCKSDB_LITE "Build RocksDBLite version" OFF)
363367
if(ROCKSDB_LITE)
364368
add_definitions(-DROCKSDB_LITE)
365-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
369+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -Os")
366370
endif()
367371

368372
if(CMAKE_SYSTEM_NAME MATCHES "Cygwin")
@@ -467,6 +471,7 @@ set(SOURCES
467471
db/compaction_iterator.cc
468472
db/compaction_job.cc
469473
db/compaction_picker.cc
474+
db/compaction_picker_fifo.cc
470475
db/compaction_picker_universal.cc
471476
db/convenience.cc
472477
db/db_filesnapshot.cc
@@ -499,6 +504,7 @@ set(SOURCES
499504
db/merge_helper.cc
500505
db/merge_operator.cc
501506
db/range_del_aggregator.cc
507+
db/range_tombstone_fragmenter.cc
502508
db/repair.cc
503509
db/snapshot_impl.cc
504510
db/table_cache.cc
@@ -572,6 +578,7 @@ set(SOURCES
572578
table/plain_table_index.cc
573579
table/plain_table_key_coding.cc
574580
table/plain_table_reader.cc
581+
table/sst_file_reader.cc
575582
table/sst_file_writer.cc
576583
table/table_properties.cc
577584
table/two_level_iterator.cc
@@ -598,6 +605,7 @@ set(SOURCES
598605
util/filename.cc
599606
util/filter_policy.cc
600607
util/hash.cc
608+
util/jemalloc_nodump_allocator.cc
601609
util/log_buffer.cc
602610
util/murmurhash.cc
603611
util/random.cc
@@ -679,12 +687,10 @@ set(SOURCES
679687
utilities/write_batch_with_index/write_batch_with_index_internal.cc
680688
$<TARGET_OBJECTS:build_version>)
681689

682-
if(HAVE_SSE42 AND NOT FORCE_SSE42)
683-
if(NOT MSVC)
684-
set_source_files_properties(
685-
util/crc32c.cc
686-
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
687-
endif()
690+
if(HAVE_SSE42 AND NOT MSVC)
691+
set_source_files_properties(
692+
util/crc32c.cc
693+
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
688694
endif()
689695

690696
if(HAVE_POWER8)
@@ -899,6 +905,8 @@ if(WITH_TESTS)
899905
db/perf_context_test.cc
900906
db/plain_table_db_test.cc
901907
db/prefix_test.cc
908+
db/range_del_aggregator_test.cc
909+
db/range_tombstone_fragmenter_test.cc
902910
db/repair_test.cc
903911
db/table_properties_collector_test.cc
904912
db/version_builder_test.cc
@@ -927,6 +935,7 @@ if(WITH_TESTS)
927935
table/data_block_hash_index_test.cc
928936
table/full_filter_block_test.cc
929937
table/merger_test.cc
938+
table/sst_file_reader_test.cc
930939
table/table_test.cc
931940
tools/ldb_cmd_test.cc
932941
tools/reduce_levels_test.cc

HISTORY.md

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,50 @@
11
# Rocksdb Change Log
2-
3-
# 5.17.2 (10/24/2018)
2+
## 5.18.3 (2/11/2019)
43
### Bug Fixes
5-
* Fix the bug that WriteBatchWithIndex's SeekForPrev() doesn't see the entries with the same key.
4+
* Fix possible LSM corruption when both range deletions and subcompactions are used. The symptom of this corruption is L1+ files overlapping in the user key space.
5+
6+
## 5.18.2 (01/31/2019)
7+
### Public API Change
8+
* Change time resolution in FileOperationInfo.
9+
* Deleting Blob files also go through SStFileManager.
10+
11+
## 5.18.0 (11/30/2018)
12+
### New Features
13+
* Introduced `JemallocNodumpAllocator` memory allocator. When being use, block cache will be excluded from core dump.
14+
* Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. Added per-level perf context for bloom filter and `Get` query.
15+
* With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind.
16+
* Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled.
17+
* Added `num_deletions` and `num_merge_operands` members to `TableProperties`.
18+
* Added "rocksdb.min-obsolete-sst-number-to-keep" DB property that reports the lower bound on SST file numbers that are being kept from deletion, even if the SSTs are obsolete.
19+
* Add xxhash64 checksum support
20+
* Introduced `MemoryAllocator`, which lets the user specify custom memory allocator for block based table.
21+
* Improved `DeleteRange` to prevent read performance degradation. The feature is no longer marked as experimental.
22+
* Enabled checkpoint on readonly db (DBImplReadOnly).
23+
24+
### Public API Change
25+
* `DBOptions::use_direct_reads` now affects reads issued by `BackupEngine` on the database's SSTs.
26+
* `NO_ITERATORS` is divided into two counters `NO_ITERATOR_CREATED` and `NO_ITERATOR_DELETE`. Both of them are only increasing now, just as other counters.
627

7-
# 5.17.1 (10/16/2018)
828
### Bug Fixes
9-
* Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds.
10-
* Properly set the stop key for a truncated manual CompactRange
1129
* Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set.
12-
13-
### New Features
14-
* Introduced CacheAllocator, which lets the user specify custom allocator for memory in block cache.
30+
* Fix in-memory range tombstone truncation to avoid erroneously covering newer keys at a lower level, and include range tombstones in compacted files whose largest key is the range tombstone's start key.
31+
* Properly set the stop key for a truncated manual CompactRange
32+
* Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds.
33+
* Fix the bug that WriteBatchWithIndex's SeekForPrev() doesn't see the entries with the same key.
34+
* Fix the bug where user comparator was sometimes fed with InternalKey instead of the user key. The bug manifests when during GenerateBottommostFiles.
35+
* Fix a bug in WritePrepared txns where if the number of old snapshots goes beyond the snapshot cache size (128 default) the rest will not be checked when evicting a commit entry from the commit cache.
36+
* Fixed Get correctness bug in the presence of range tombstones where merge operands covered by a range tombstone always result in NotFound.
37+
* Start populating `NO_FILE_CLOSES` ticker statistic, which was always zero previously.
38+
* The default value of NewBloomFilterPolicy()'s argument use_block_based_builder is changed to false. Note that this new default may cause large temp memory usage when building very large SST files.
39+
* Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls.
40+
* Make DB ignore dropped column families while committing results of atomic flush.
1541

1642
## 5.17.0 (10/05/2018)
1743
### Public API Change
1844
* `OnTableFileCreated` will now be called for empty files generated during compaction. In that case, `TableFileCreationInfo::file_path` will be "(nil)" and `TableFileCreationInfo::file_size` will be zero.
1945
* Add `FlushOptions::allow_write_stall`, which controls whether Flush calls start working immediately, even if it causes user writes to stall, or will wait until flush can be performed without causing write stall (similar to `CompactRangeOptions::allow_write_stall`). Note that the default value is false, meaning we add delay to Flush calls until stalling can be avoided when possible. This is behavior change compared to previous RocksDB versions, where Flush calls didn't check if they might cause stall or not.
2046
* Application using PessimisticTransactionDB is expected to rollback/commit recovered transactions before starting new ones. This assumption is used to skip concurrency control during recovery.
47+
* Expose column family id to `OnCompactionCompleted`.
2148

2249
### New Features
2350
* TransactionOptions::skip_concurrency_control allows pessimistic transactions to skip the overhead of concurrency control. Could be used for optimizing certain transactions or during recovery.

Makefile

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,38 @@ ifeq ($(MAKECMDGOALS),rocksdbjavastaticpublish)
9393
DEBUG_LEVEL=0
9494
endif
9595

96+
# Lite build flag.
97+
LITE ?= 0
98+
ifeq ($(LITE), 0)
99+
ifneq ($(filter -DROCKSDB_LITE,$(OPT)),)
100+
# Be backward compatible and support older format where OPT=-DROCKSDB_LITE is
101+
# specified instead of LITE=1 on the command line.
102+
LITE=1
103+
endif
104+
else ifeq ($(LITE), 1)
105+
ifeq ($(filter -DROCKSDB_LITE,$(OPT)),)
106+
OPT += -DROCKSDB_LITE
107+
endif
108+
endif
109+
110+
# Figure out optimize level.
111+
ifneq ($(DEBUG_LEVEL), 2)
112+
ifeq ($(LITE), 0)
113+
OPT += -O2
114+
else
115+
OPT += -Os
116+
endif
117+
endif
118+
96119
# compile with -O2 if debug level is not 2
97120
ifneq ($(DEBUG_LEVEL), 2)
98-
OPT += -O2 -fno-omit-frame-pointer
121+
OPT += -fno-omit-frame-pointer
99122
# Skip for archs that don't support -momit-leaf-frame-pointer
100123
ifeq (,$(shell $(CXX) -fsyntax-only -momit-leaf-frame-pointer -xc /dev/null 2>&1))
101124
OPT += -momit-leaf-frame-pointer
102125
endif
103126
endif
104127

105-
ifeq (,$(shell $(CXX) -fsyntax-only -faligned-new -xc++ /dev/null 2>&1))
106-
CXXFLAGS += -faligned-new -DHAVE_ALIGNED_NEW
107-
endif
108-
109128
ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1))
110129
CXXFLAGS += -DHAS_ALTIVEC
111130
CFLAGS += -DHAS_ALTIVEC
@@ -322,7 +341,7 @@ endif
322341
ifeq ("$(wildcard $(LUA_LIB))", "") # LUA_LIB does not exist
323342
$(error $(LUA_LIB) does not exist. Try to specify both LUA_PATH and LUA_LIB manually)
324343
endif
325-
LDFLAGS += $(LUA_LIB)
344+
EXEC_LDFLAGS += $(LUA_LIB)
326345

327346
endif
328347

@@ -526,7 +545,6 @@ TESTS = \
526545
persistent_cache_test \
527546
statistics_test \
528547
lua_test \
529-
range_del_aggregator_test \
530548
lru_cache_test \
531549
object_registry_test \
532550
repair_test \
@@ -536,6 +554,9 @@ TESTS = \
536554
db_universal_compaction_test \
537555
trace_analyzer_test \
538556
repeatable_thread_test \
557+
range_tombstone_fragmenter_test \
558+
range_del_aggregator_test \
559+
sst_file_reader_test \
539560

540561
PARALLEL_TEST = \
541562
backupable_db_test \
@@ -886,6 +907,7 @@ crash_test: whitebox_crash_test blackbox_crash_test
886907

887908
blackbox_crash_test: db_stress
888909
python -u tools/db_crashtest.py --simple blackbox $(CRASH_TEST_EXT_ARGS)
910+
python -u tools/db_crashtest.py --enable_atomic_flush blackbox $(CRASH_TEST_EXT_ARGS)
889911
python -u tools/db_crashtest.py blackbox $(CRASH_TEST_EXT_ARGS)
890912

891913
ifeq ($(CRASH_TEST_KILL_ODD),)
@@ -894,6 +916,8 @@ endif
894916

895917
whitebox_crash_test: db_stress
896918
python -u tools/db_crashtest.py --simple whitebox --random_kill_odd \
919+
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
920+
python -u tools/db_crashtest.py --enable_atomic_flush whitebox --random_kill_odd \
897921
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
898922
python -u tools/db_crashtest.py whitebox --random_kill_odd \
899923
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
@@ -1568,6 +1592,12 @@ blob_db_test: utilities/blob_db/blob_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
15681592
repeatable_thread_test: util/repeatable_thread_test.o $(LIBOBJECTS) $(TESTHARNESS)
15691593
$(AM_LINK)
15701594

1595+
range_tombstone_fragmenter_test: db/range_tombstone_fragmenter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
1596+
$(AM_LINK)
1597+
1598+
sst_file_reader_test: table/sst_file_reader_test.o $(LIBOBJECTS) $(TESTHARNESS)
1599+
$(AM_LINK)
1600+
15711601
#-------------------------------------------------
15721602
# make install related stuff
15731603
INSTALL_PATH ?= /usr/local

TARGETS

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ is_opt_mode = build_mode.startswith("opt")
6767
if is_opt_mode:
6868
rocksdb_compiler_flags.append("-DNDEBUG")
6969

70+
sanitizer = read_config("fbcode", "sanitizer")
71+
72+
# Do not enable jemalloc if sanitizer presents. RocksDB will further detect
73+
# whether the binary is linked with jemalloc at runtime.
74+
if sanitizer == "":
75+
rocksdb_compiler_flags.append("-DROCKSDB_JEMALLOC")
76+
rocksdb_external_deps.append(("jemalloc", None, "headers"))
77+
7078
cpp_library(
7179
name = "rocksdb_lib",
7280
srcs = [
@@ -81,6 +89,7 @@ cpp_library(
8189
"db/compaction_iterator.cc",
8290
"db/compaction_job.cc",
8391
"db/compaction_picker.cc",
92+
"db/compaction_picker_fifo.cc",
8493
"db/compaction_picker_universal.cc",
8594
"db/convenience.cc",
8695
"db/db_filesnapshot.cc",
@@ -113,6 +122,7 @@ cpp_library(
113122
"db/merge_helper.cc",
114123
"db/merge_operator.cc",
115124
"db/range_del_aggregator.cc",
125+
"db/range_tombstone_fragmenter.cc",
116126
"db/repair.cc",
117127
"db/snapshot_impl.cc",
118128
"db/table_cache.cc",
@@ -190,6 +200,7 @@ cpp_library(
190200
"table/plain_table_index.cc",
191201
"table/plain_table_key_coding.cc",
192202
"table/plain_table_reader.cc",
203+
"table/sst_file_reader.cc",
193204
"table/sst_file_writer.cc",
194205
"table/table_properties.cc",
195206
"table/two_level_iterator.cc",
@@ -215,6 +226,7 @@ cpp_library(
215226
"util/filename.cc",
216227
"util/filter_policy.cc",
217228
"util/hash.cc",
229+
"util/jemalloc_nodump_allocator.cc",
218230
"util/log_buffer.cc",
219231
"util/murmurhash.cc",
220232
"util/random.cc",
@@ -920,6 +932,11 @@ ROCKS_TESTS = [
920932
"db/range_del_aggregator_test.cc",
921933
"serial",
922934
],
935+
[
936+
"range_tombstone_fragmenter_test",
937+
"db/range_tombstone_fragmenter_test.cc",
938+
"serial",
939+
],
923940
[
924941
"rate_limiter_test",
925942
"util/rate_limiter_test.cc",
@@ -965,6 +982,11 @@ ROCKS_TESTS = [
965982
"tools/sst_dump_test.cc",
966983
"serial",
967984
],
985+
[
986+
"sst_file_reader_test",
987+
"table/sst_file_reader_test.cc",
988+
"serial",
989+
],
968990
[
969991
"statistics_test",
970992
"monitoring/statistics_test.cc",

buckifier/targets_cfg.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,14 @@
7070
# doesn't harm and avoid forgetting to add it.
7171
if is_opt_mode:
7272
rocksdb_compiler_flags.append("-DNDEBUG")
73+
74+
sanitizer = read_config("fbcode", "sanitizer")
75+
76+
# Do not enable jemalloc if sanitizer presents. RocksDB will further detect
77+
# whether the binary is linked with jemalloc at runtime.
78+
if sanitizer == "":
79+
rocksdb_compiler_flags.append("-DROCKSDB_JEMALLOC")
80+
rocksdb_external_deps.append(("jemalloc", None, "headers"))
7381
"""
7482

7583

build_tools/RocksDBCommonHelper.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
// Name of the environment variables which need to be set by the entity which
88
// triggers continuous runs so that code at the end of the file gets executed
99
// and Sandcastle run starts.
10-
define("ENV_POST_RECEIVE_HOOK", "POST_RECEIVE_HOOK");
11-
define("ENV_HTTPS_APP_VALUE", "HTTPS_APP_VALUE");
12-
define("ENV_HTTPS_TOKEN_VALUE", "HTTPS_TOKEN_VALUE");
10+
const ENV_POST_RECEIVE_HOOK = "POST_RECEIVE_HOOK";
11+
const ENV_HTTPS_APP_VALUE = "HTTPS_APP_VALUE";
12+
const ENV_HTTPS_TOKEN_VALUE = "HTTPS_TOKEN_VALUE";
1313

14-
define("PRIMARY_TOKEN_FILE", '/home/krad/.sandcastle');
15-
define("CONT_RUN_ALIAS", "leveldb");
14+
const PRIMARY_TOKEN_FILE = '/home/krad/.sandcastle';
15+
const CONT_RUN_ALIAS = "leveldb";
1616

1717
//////////////////////////////////////////////////////////////////////
1818
/* Run tests in sandcastle */

build_tools/build_detect_platform

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,17 @@ EOF
474474
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SCHED_GETCPU_PRESENT"
475475
fi
476476
fi
477+
478+
if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then
479+
# Test whether c++17 aligned-new is supported
480+
$CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o /dev/null 2>/dev/null <<EOF
481+
struct alignas(1024) t {int a;};
482+
int main() {}
483+
EOF
484+
if [ "$?" = 0 ]; then
485+
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS -faligned-new -DHAVE_ALIGNED_NEW"
486+
fi
487+
fi
477488
fi
478489

479490
# TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning.

build_tools/gnu_parallel

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5082,8 +5082,8 @@ sub openoutputfiles {
50825082
# Set reading FD if using --group (--ungroup does not need)
50835083
for my $fdno (1,2) {
50845084
# Re-open the file for reading
5085-
# so fdw can be closed seperately
5086-
# and fdr can be seeked seperately (for --line-buffer)
5085+
# so fdw can be closed separately
5086+
# and fdr can be seeked separately (for --line-buffer)
50875087
open(my $fdr,"<", $self->fh($fdno,'name')) ||
50885088
::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
50895089
$self->set_fh($fdno,'r',$fdr);

0 commit comments

Comments
 (0)