Skip to content

Commit 65c6deb

Browse files
author
Paweł Andruszkiewicz
committed
BUG#35895247 "One-file-per-thread" inadequate for single files with escaped wildcards
When importing a single file which contained only escaped wildcards characters, util.importTable() was loading the whole file using a single thread. With this fix, that file is loaded in chunks by multiple threads. Change-Id: I75f4eba97f1aa54ac7274e6d20d8f4165ea31595 (cherry picked from commit 191093263d5ab1582d4c8ce83a1ab5dfd9e75d93)
1 parent 3cb0b6d commit 65c6deb

File tree

7 files changed

+136
-8
lines changed

7 files changed

+136
-8
lines changed

modules/util/import_table/import_table_options.cc

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "mysqlshdk/libs/storage/ifile.h"
4141
#include "mysqlshdk/libs/utils/strformat.h"
4242
#include "mysqlshdk/libs/utils/utils_file.h"
43+
#include "mysqlshdk/libs/utils/utils_general.h"
4344
#include "mysqlshdk/libs/utils/utils_path.h"
4445

4546
namespace {
@@ -212,9 +213,24 @@ void Import_table_option_pack::set_decode_columns(
212213
}
213214
}
214215

215-
bool Import_table_option_pack::check_if_multifile() const {
216+
bool Import_table_option_pack::check_if_multifile() {
216217
if (m_filelist_from_user.size() == 1) {
217-
return has_wildcard(m_filelist_from_user[0]);
218+
if (storage_config() ||
219+
shcore::OperatingSystem::WINDOWS != shcore::get_os_type()) {
220+
// local non-Windows and remote filesystems allow for * and ? characters
221+
// in file names
222+
// BUG#35895247: if all wildcard characters are escaped, load the file in
223+
// chunks
224+
if (auto path = shcore::unescape_glob(m_filelist_from_user[0]);
225+
path.has_value()) {
226+
m_filelist_from_user[0] = std::move(*path);
227+
return false;
228+
} else {
229+
return true;
230+
}
231+
} else {
232+
return has_wildcard(m_filelist_from_user[0]);
233+
}
218234
}
219235

220236
return true;

modules/util/import_table/import_table_options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ class Import_table_option_pack {
146146
void set_bytes_per_chunk(const std::string &value);
147147
void set_max_rate(const std::string &value);
148148
void on_unpacked_options();
149-
bool check_if_multifile() const;
149+
bool check_if_multifile();
150150

151151
protected:
152152
std::vector<std::string> m_filelist_from_user;

mysqlshdk/libs/utils/utils_general.cc

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,10 @@ std::string to_string(OperatingSystem os_type) {
11181118

11191119
namespace {
11201120

1121+
constexpr char k_glob_escape = '\\';
1122+
constexpr char k_glob_match_single = '?';
1123+
constexpr char k_glob_match_many = '*';
1124+
11211125
/**
11221126
* https://research.swtch.com/glob
11231127
*/
@@ -1134,23 +1138,25 @@ bool _match_glob(const std::string_view pat, const std::string_view str) {
11341138
char c = pat[px];
11351139

11361140
switch (c) {
1137-
case '?':
1141+
case k_glob_match_single:
11381142
if (sx < send) {
11391143
++px;
11401144
++sx;
11411145
continue;
11421146
}
11431147
break;
11441148

1145-
case '*':
1149+
case k_glob_match_many:
11461150
ppx = px;
11471151
psx = sx + 1;
11481152
++px;
11491153
continue;
11501154

1151-
case '\\':
1152-
// if '\' is followed by * or ?, it's an escape sequence, skip '\'
1153-
if ((px + 1) < pend && (pat[px + 1] == '*' || pat[px + 1] == '?')) {
1155+
case k_glob_escape:
1156+
// if '\' is followed by \, * or ?, it's an escape sequence, skip '\'
1157+
if ((px + 1) < pend && (pat[px + 1] == k_glob_escape ||
1158+
pat[px + 1] == k_glob_match_many ||
1159+
pat[px + 1] == k_glob_match_single)) {
11541160
++px;
11551161
c = pat[px];
11561162
}
@@ -1168,6 +1174,11 @@ bool _match_glob(const std::string_view pat, const std::string_view str) {
11681174
}
11691175

11701176
if (0 < psx && psx <= send) {
1177+
// original code has here:
1178+
// px = ppx;
1179+
// sx = psx;
1180+
// which jumps back to the '*' case, we skip than one iteration by setting
1181+
// the correct values right away
11711182
px = ppx + 1;
11721183
sx = psx;
11731184
++psx;
@@ -1200,6 +1211,42 @@ bool match_glob(const std::string_view pattern, const std::string_view s,
12001211
return _match_glob(pattern, s);
12011212
}
12021213

1214+
std::optional<std::string> unescape_glob(const std::string_view pattern) {
1215+
const auto length = pattern.length();
1216+
std::string result;
1217+
result.reserve(length);
1218+
1219+
for (auto i = decltype(length){0}; i < length; ++i) {
1220+
auto c = pattern[i];
1221+
1222+
switch (c) {
1223+
case k_glob_match_single:
1224+
case k_glob_match_many:
1225+
// unescaped wildcard
1226+
return std::nullopt;
1227+
1228+
case k_glob_escape:
1229+
if (i + 1 < length) {
1230+
if (const auto cc = pattern[i + 1]; k_glob_escape == cc ||
1231+
k_glob_match_single == cc ||
1232+
k_glob_match_many == cc) {
1233+
// escaped wildcard, skip escape character and copy wildcard
1234+
// verbatim
1235+
++i;
1236+
c = cc;
1237+
}
1238+
}
1239+
[[fallthrough]];
1240+
1241+
default:
1242+
result.push_back(c);
1243+
break;
1244+
}
1245+
}
1246+
1247+
return result;
1248+
}
1249+
12031250
const char *get_long_version() {
12041251
return "Ver " MYSH_FULL_VERSION " for " SYSTEM_TYPE " on " MACHINE_TYPE
12051252
" - for MySQL " LIBMYSQL_VERSION " (" MYSQL_COMPILATION_COMMENT ")";

mysqlshdk/libs/utils/utils_general.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <chrono>
2929
#include <functional>
3030
#include <list>
31+
#include <optional>
3132
#include <set>
3233
#include <sstream>
3334
#include <string>
@@ -196,6 +197,18 @@ bool SHCORE_PUBLIC match_glob(const std::string_view pattern,
196197
const std::string_view s,
197198
bool case_sensitive = false);
198199

200+
/**
201+
* Replaces all escaped wildcard characters with their unescaped counterparts.
202+
* If pattern already contains an unescaped wildcard characters, does not return
203+
* a value.
204+
*
205+
* @param pattern A pattern to process.
206+
*
207+
* @returns Processed pattern or no value.
208+
*/
209+
std::optional<std::string> SHCORE_PUBLIC
210+
unescape_glob(const std::string_view pattern);
211+
199212
std::string SHCORE_PUBLIC to_camel_case(std::string_view name);
200213
std::string SHCORE_PUBLIC from_camel_case(std::string_view name);
201214
std::string SHCORE_PUBLIC from_camel_case_to_dashes(std::string_view name);

unittest/mysqlshdk/libs/utils/utils_general_t.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,7 @@ TEST(utils_general, match_glob) {
512512
EXPECT_TRUE(match_glob("\\", "\\"));
513513
EXPECT_FALSE(match_glob("\\", "x"));
514514
EXPECT_TRUE(match_glob("\\opti*", "\\option"));
515+
EXPECT_TRUE(match_glob("\\\\opti*", "\\option"));
515516
EXPECT_FALSE(match_glob("\\opti\\*", "\\option"));
516517
EXPECT_TRUE(match_glob("*\\*", "abcdefgh*"));
517518

@@ -555,6 +556,39 @@ TEST(utils_general, match_glob) {
555556
EXPECT_TRUE(match_glob("*x", "xxx"));
556557
}
557558

559+
TEST(utils_general, unescape_glob) {
560+
EXPECT_EQ("", unescape_glob(""));
561+
562+
EXPECT_EQ(std::nullopt, unescape_glob("*"));
563+
EXPECT_EQ(std::nullopt, unescape_glob("?"));
564+
EXPECT_EQ("\\", unescape_glob("\\"));
565+
EXPECT_EQ("a", unescape_glob("a"));
566+
567+
EXPECT_EQ(std::nullopt, unescape_glob("\\??"));
568+
EXPECT_EQ(std::nullopt, unescape_glob("\\?*"));
569+
EXPECT_EQ("?\\", unescape_glob("\\?\\"));
570+
EXPECT_EQ("?\\", unescape_glob("\\?\\\\"));
571+
EXPECT_EQ("?a", unescape_glob("\\?a"));
572+
EXPECT_EQ("??", unescape_glob("\\?\\?"));
573+
EXPECT_EQ("?*", unescape_glob("\\?\\*"));
574+
575+
EXPECT_EQ(std::nullopt, unescape_glob("\\*?"));
576+
EXPECT_EQ(std::nullopt, unescape_glob("\\**"));
577+
EXPECT_EQ("*\\", unescape_glob("\\*\\"));
578+
EXPECT_EQ("*\\", unescape_glob("\\*\\\\"));
579+
EXPECT_EQ("*a", unescape_glob("\\*a"));
580+
EXPECT_EQ("*?", unescape_glob("\\*\\?"));
581+
EXPECT_EQ("**", unescape_glob("\\*\\*"));
582+
583+
EXPECT_EQ("\\option", unescape_glob("\\option"));
584+
EXPECT_EQ(std::nullopt, unescape_glob("\\optio*"));
585+
EXPECT_EQ(std::nullopt, unescape_glob("\\optio?"));
586+
EXPECT_EQ("\\optio?", unescape_glob("\\optio\\?"));
587+
EXPECT_EQ("\\optio*", unescape_glob("\\optio\\*"));
588+
EXPECT_EQ("\\optio\\", unescape_glob("\\optio\\"));
589+
EXPECT_EQ("\\optio\\n", unescape_glob("\\optio\\n"));
590+
}
591+
558592
TEST(utils_general, get_long_version) {
559593
if (*MYSH_BUILD_ID) {
560594
std::cout << "Verifying values for the build server" << std::endl;

unittest/scripts/auto/py_oci/scripts/util_import_table_oci_norecord.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,13 @@
152152

153153
testutil.clear_traps("os_bucket")
154154

155+
#@<> BUG#35895247 - importing a file with escaped wildcard characters should load it in chunks {__os_type != "windows"}
156+
testutil.anycopy(os.path.join(__import_data_path, SOURCE_FILE), {'osBucketName': OS_BUCKET_NAME, 'osNamespace': OS_NAMESPACE, 'ociConfigFile': OCI_CONFIG_FILE, 'name': "will *this work?"})
157+
session.run_sql(f"TRUNCATE TABLE {quote_identifier(TARGET_SCHEMA, 'cities')}")
158+
159+
EXPECT_NO_THROWS(lambda: util.import_table("will \\*this work\\?", { "schema": TARGET_SCHEMA, "table": "cities", "showProgress": False, 'osBucketName': OS_BUCKET_NAME, 'osNamespace': OS_NAMESPACE, 'ociConfigFile': OCI_CONFIG_FILE }), "import should not fail")
160+
EXPECT_STDOUT_CONTAINS(f"Importing from file 'will *this work?' to table ")
161+
155162
#@<> Cleanup
156163
delete_bucket(OS_BUCKET_NAME)
157164

unittest/scripts/auto/py_shell/scripts/util_import_table_norecord.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#@<> This is unit test file for WL12193 Parallel data import
44
import os
55
import os.path
6+
import shutil
67

78
target_port = __mysql_port
89
target_xport = __port
@@ -395,6 +396,16 @@ def TEST_STRING_OPTION(option):
395396
#@<> s3EndpointOverride is using wrong scheme
396397
EXPECT_THROWS(lambda: util.import_table(world_x_cities_dump, { "s3BucketName": "bucket", "s3EndpointOverride": "FTp://endpoint", "schema": target_schema, "table": target_table }), "ValueError: Util.import_table: Argument #2: The value of the option 's3EndpointOverride' uses an invalid scheme 'FTp://', expected: http:// or https://.")
397398

399+
#@<> BUG#35895247 - importing a file with escaped wildcard characters should load it in chunks {__os_type != "windows"}
400+
full_path = os.path.join(__tmp_dir, "will *this work?")
401+
shutil.copyfile(world_x_cities_dump, full_path)
402+
truncate_table()
403+
404+
EXPECT_NO_THROWS(lambda: util.import_table(os.path.join(__tmp_dir, "will \\*this work\\?"), { "schema": target_schema, "table": target_table, "showProgress": False }), "import should not fail")
405+
EXPECT_STDOUT_CONTAINS(f"Importing from file '{filename_for_output(full_path)}' to table ")
406+
407+
os.remove(full_path)
408+
398409
#@<> Teardown
399410
session.run_sql("DROP SCHEMA IF EXISTS " + target_schema)
400411
session.close()

0 commit comments

Comments
 (0)