Skip to content

Commit befb0ca

Browse files
author
Paweł Andruszkiewicz
committed
BUG#35180061 mysqlsh produces wrong dump in case of complex primary key
When dumping with `chunking` enabled, if PK or a unique index selected to chunk a table contained an ENUM column with values specified in non-alphabetic order, some rows from that table were missing in the dump. The chunking algorithm is using '>' and '<' operators to create ranges defining each chunk. In case of an ENUM, these operators use lexicographical ordering of the string literals assigned to the index values and not the index values themselves. If values of an ENUM column are not specified in the alphabetic order, chunks will not cover the whole table and some rows will be missing. As a fix, PKs and unique indexes which contain one or more ENUM columns are ignored when selecting an index for the chunking algorithm. Change-Id: I188351135231d8821c0bb26b6c0b9badc243acf6 (cherry picked from commit c827b268540d26ed64fe4f5d854a2bc27fe46ea2)
1 parent 53f9b04 commit befb0ca

File tree

2 files changed

+95
-26
lines changed

2 files changed

+95
-26
lines changed

modules/util/dump/instance_cache.cc

Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -755,12 +755,15 @@ void Instance_cache_builder::fetch_table_indexes() {
755755
info.where = "COLUMN_NAME IS NOT NULL AND NON_UNIQUE=0";
756756

757757
const std::string primary_index = "PRIMARY";
758-
// schema -> table -> index name -> columns
758+
struct Index_info {
759+
std::vector<Instance_cache::Column *> columns;
760+
bool unsafe = false;
761+
};
762+
// schema -> table -> index name -> index info
763+
// indexes are ordered to ensure repeatability of the algorithm
759764
std::unordered_map<
760765
std::string,
761-
std::unordered_map<
762-
std::string,
763-
std::map<std::string, std::map<uint64_t, Instance_cache::Column *>>>>
766+
std::unordered_map<std::string, std::map<std::string, Index_info>>>
764767
indexes;
765768

766769
iterate_tables(
@@ -781,18 +784,38 @@ void Instance_cache_builder::fetch_table_indexes() {
781784
// column will not be found if user is missing SELECT privilege and it
782785
// was not possible to fetch column information
783786
if (t->all_columns.end() != column) {
784-
indexes[schema_name][table_name][row->get_string(2, "")].emplace(
785-
row->get_uint(4), &(*column)); // INDEX_NAME, SEQ_IN_INDEX
787+
auto &index_info = indexes[schema_name][table_name]
788+
[row->get_string(2, {})]; // INDEX_NAME
789+
const auto seq = row->get_uint(4); // SEQ_IN_INDEX
790+
791+
// SEQ_IN_INDEX is 1-based
792+
if (seq > index_info.columns.size()) {
793+
index_info.columns.resize(seq);
794+
}
795+
796+
index_info.columns[seq - 1] = &(*column);
797+
// BUG#35180061 - do not use indexes on ENUM columns
798+
index_info.unsafe |= (mysqlshdk::db::Type::Enum == column->type);
786799
} else {
787800
assert(t->all_columns.empty());
788801
}
789802
});
790803

791-
for (const auto &schema : indexes) {
792-
auto &s = m_cache.schemas.at(schema.first);
804+
for (auto &schema : indexes) {
805+
for (auto &table : schema.second) {
806+
for (auto index = table.second.begin(); index != table.second.end();) {
807+
if (index->second.unsafe) {
808+
index = table.second.erase(index);
809+
} else {
810+
++index;
811+
}
812+
}
793813

794-
for (const auto &table : schema.second) {
795-
auto &t = s.tables.at(table.first);
814+
if (table.second.empty()) {
815+
continue;
816+
}
817+
818+
auto &t = m_cache.schemas.at(schema.first).tables.at(table.first);
796819

797820
auto index = table.second.find(primary_index);
798821
bool primary = false;
@@ -803,12 +826,11 @@ void Instance_cache_builder::fetch_table_indexes() {
803826
const auto mark_integer_columns = [](decltype(index) idx) {
804827
std::vector<bool> result;
805828

806-
result.reserve(idx->second.size());
829+
result.reserve(idx->second.columns.size());
807830

808-
for (const auto &c : idx->second) {
809-
result.emplace_back(
810-
c.second->type == mysqlshdk::db::Type::Integer ||
811-
c.second->type == mysqlshdk::db::Type::UInteger);
831+
for (const auto &c : idx->second.columns) {
832+
result.emplace_back(c->type == mysqlshdk::db::Type::Integer ||
833+
c->type == mysqlshdk::db::Type::UInteger);
812834
}
813835

814836
return result;
@@ -829,23 +851,23 @@ void Instance_cache_builder::fetch_table_indexes() {
829851

830852
// skip first index
831853
for (auto it = std::next(index); it != table.second.end(); ++it) {
832-
const auto size = it->second.size();
854+
const auto size = it->second.columns.size();
833855

834-
if (size < index->second.size()) {
856+
if (size < index->second.columns.size()) {
835857
// prefer shorter index
836858
use_index(it);
837-
} else if (size == index->second.size()) {
859+
} else if (size == index->second.columns.size()) {
838860
// if indexes have equal length, prefer the one with longer run of
839861
// integer columns
840862
auto candidate = mark_integer_columns(it);
841-
auto candidate_column = it->second.begin();
842-
auto current_column = index->second.begin();
863+
auto candidate_column = it->second.columns.begin();
864+
auto current_column = index->second.columns.begin();
843865

844866
for (std::size_t i = 0; i < size; ++i) {
845867
if (candidate[i] == current[i]) {
846868
// both columns are of the same type, check if they are nullable
847-
if (candidate_column->second->nullable ==
848-
current_column->second->nullable) {
869+
if ((*candidate_column)->nullable ==
870+
(*current_column)->nullable) {
849871
// both columns are NULL or NOT NULL
850872
if (!current[i]) {
851873
// both columns are not integers, use the current index
@@ -854,7 +876,7 @@ void Instance_cache_builder::fetch_table_indexes() {
854876
// else, both column are integers, check next column
855877
} else {
856878
// prefer NOT NULL column
857-
if (!candidate_column->second->nullable) {
879+
if (!(*candidate_column)->nullable) {
858880
// candidate is NOT NULL, use that
859881
use_given_index(it, std::move(candidate));
860882
}
@@ -883,8 +905,8 @@ void Instance_cache_builder::fetch_table_indexes() {
883905

884906
t.index.set_primary(primary);
885907

886-
for (auto &column : index->second) {
887-
t.index.add_column(column.second);
908+
for (auto &column : index->second.columns) {
909+
t.index.add_column(column);
888910
}
889911
}
890912
}

unittest/scripts/auto/py_shell/scripts/util_dump_tables_norecord.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3126,12 +3126,59 @@ def generate_gaps():
31263126

31273127
session.run_sql("DROP SCHEMA IF EXISTS !;", [ tested_schema ])
31283128
session.run_sql("CREATE SCHEMA !", [ tested_schema ])
3129-
session.run_sql(f"CREATE TABLE !.! (a TEXT, b TEXT, c GEOMETRY NOT NULL, d GEOMETRY NOT NULL, FULLTEXT (a), FULLTEXT (b), SPATIAL KEY (c), SPATIAL KEY (d))", [ tested_schema, tested_table ])
3129+
session.run_sql("CREATE TABLE !.! (a TEXT, b TEXT, c GEOMETRY NOT NULL, d GEOMETRY NOT NULL, FULLTEXT (a), FULLTEXT (b), SPATIAL KEY (c), SPATIAL KEY (d))", [ tested_schema, tested_table ])
31303130

31313131
TEST_DUMP_AND_LOAD(tested_schema, [ tested_table ], { "showProgress": False })
31323132

31333133
session.run_sql("DROP SCHEMA !;", [ tested_schema ])
31343134

3135+
#@<> BUG#35180061 - dumping a table which has a PK containing an ENUM column, whose values are not specified in an alphabetic order
3136+
tested_schema = "tested_schema"
3137+
tested_table = "tested_table"
3138+
3139+
session.run_sql("DROP SCHEMA IF EXISTS !;", [ tested_schema ])
3140+
session.run_sql("CREATE SCHEMA !", [ tested_schema ])
3141+
session.run_sql("""CREATE TABLE !.! (
3142+
`content_type` ENUM('music','music_video','ringtone') NOT NULL,
3143+
`service` ENUM('individual','individual_annual','match','student','family','trial') NOT NULL,
3144+
`product_id` int unsigned NOT NULL,
3145+
`ordinal` int unsigned NOT NULL,
3146+
PRIMARY KEY (`content_type`,`service`),
3147+
KEY `product_id` (`product_id`),
3148+
UNIQUE KEY `ordinal` (`ordinal`)
3149+
)""", [ tested_schema, tested_table ])
3150+
session.run_sql("""INSERT INTO !.! VALUES
3151+
('music_video', 'individual', 255, 1),
3152+
('music', 'trial', 327, 2),
3153+
('ringtone', 'trial', 327, 3),
3154+
('music', 'family', 383, 4),
3155+
('ringtone', 'family', 383, 5),
3156+
('music', 'individual', 384, 6),
3157+
('ringtone', 'individual', 384, 7),
3158+
('music_video', 'family', 385, 8),
3159+
('music_video', 'trial', 386, 9),
3160+
('music', 'student', 392, 10),
3161+
('ringtone', 'student', 392, 11),
3162+
('music_video', 'student', 393, 12),
3163+
('music', 'match', 394, 13),
3164+
('ringtone', 'match', 394, 14),
3165+
('music_video', 'match', 395, 15),
3166+
('music', 'individual_annual', 431, 16),
3167+
('ringtone', 'individual_annual', 431, 17),
3168+
('music_video', 'individual_annual', 432, 18)
3169+
""", [ tested_schema, tested_table ])
3170+
3171+
# PK consists of ENUM columns, but there's a unique index, it's going to be used for chunking and only one chunk is going to be created
3172+
TEST_DUMP_AND_LOAD(tested_schema, [ tested_table ], { "showProgress": False })
3173+
EXPECT_STDOUT_CONTAINS("1 chunks (18 rows, 447 bytes) for 1 tables in 1 schemas were loaded")
3174+
3175+
# PK consists of ENUM columns, there are no more indexes, table is not going to be chunked, chunks are created while dumping the table, two chunks are going to be created
3176+
session.run_sql("DROP INDEX `ordinal` ON !.!", [ tested_schema, tested_table ])
3177+
TEST_DUMP_AND_LOAD(tested_schema, [ tested_table ], { "showProgress": False })
3178+
EXPECT_STDOUT_CONTAINS("2 chunks (18 rows, 447 bytes) for 1 tables in 1 schemas were loaded")
3179+
3180+
session.run_sql("DROP SCHEMA !;", [ tested_schema ])
3181+
31353182
#@<> Cleanup
31363183
drop_all_schemas()
31373184
session.run_sql("SET GLOBAL local_infile = false;")

0 commit comments

Comments
 (0)