Skip to content

Commit 5ea86f4

Browse files
[3.12] gh-115712: Support CSV dialects with delimiter=' ' and skipinitialspace=True (GH-115721) (GH-115729)
csv.writer() now quotes empty fields if delimiter is a space and skipinitialspace is true and raises exception if quoting is not possible. (cherry picked from commit 937d282)
1 parent 20907ca commit 5ea86f4

File tree

3 files changed

+92
-8
lines changed

3 files changed

+92
-8
lines changed

Lib/test/test_csv.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,20 @@ def _test_arg_valid(self, ctor, arg):
4646
quoting=csv.QUOTE_ALL, quotechar=None)
4747
self.assertRaises(TypeError, ctor, arg,
4848
quoting=csv.QUOTE_NONE, quotechar='')
49+
ctor(arg, delimiter=' ')
50+
ctor(arg, escapechar=' ')
51+
ctor(arg, quotechar=' ')
52+
ctor(arg, delimiter='\t', skipinitialspace=True)
53+
ctor(arg, escapechar='\t', skipinitialspace=True)
54+
ctor(arg, quotechar='\t', skipinitialspace=True)
55+
ctor(arg, delimiter=' ', skipinitialspace=True)
56+
ctor(arg, delimiter='^')
57+
ctor(arg, escapechar='^')
58+
ctor(arg, quotechar='^')
59+
ctor(arg, delimiter='\x85')
60+
ctor(arg, escapechar='\x85')
61+
ctor(arg, quotechar='\x85')
62+
ctor(arg, lineterminator='\x85')
4963

5064
def test_reader_arg_valid(self):
5165
self._test_arg_valid(csv.reader, [])
@@ -152,9 +166,6 @@ def _write_error_test(self, exc, fields, **kwargs):
152166

153167
def test_write_arg_valid(self):
154168
self._write_error_test(csv.Error, None)
155-
self._write_test((), '')
156-
self._write_test([None], '""')
157-
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
158169
# Check that exceptions are passed up the chain
159170
self._write_error_test(OSError, BadIterable())
160171
class BadList:
@@ -168,7 +179,6 @@ class BadItem:
168179
def __str__(self):
169180
raise OSError
170181
self._write_error_test(OSError, [BadItem()])
171-
172182
def test_write_bigfield(self):
173183
# This exercises the buffer realloc functionality
174184
bigstring = 'X' * 50000
@@ -275,6 +285,49 @@ def test_writerows_with_none(self):
275285
fileobj.seek(0)
276286
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')
277287

288+
289+
def test_write_empty_fields(self):
290+
self._write_test((), '')
291+
self._write_test([''], '""')
292+
self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE)
293+
self._write_test([''], '""', quoting=csv.QUOTE_STRINGS)
294+
self._write_test([''], '""', quoting=csv.QUOTE_NOTNULL)
295+
self._write_test([None], '""')
296+
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE)
297+
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_STRINGS)
298+
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NOTNULL)
299+
self._write_test(['', ''], ',')
300+
self._write_test([None, None], ',')
301+
302+
def test_write_empty_fields_space_delimiter(self):
303+
self._write_test([''], '""', delimiter=' ', skipinitialspace=False)
304+
self._write_test([''], '""', delimiter=' ', skipinitialspace=True)
305+
self._write_test([None], '""', delimiter=' ', skipinitialspace=False)
306+
self._write_test([None], '""', delimiter=' ', skipinitialspace=True)
307+
308+
self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False)
309+
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True)
310+
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False)
311+
self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True)
312+
313+
self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False,
314+
quoting=csv.QUOTE_NONE)
315+
self._write_error_test(csv.Error, ['', ''],
316+
delimiter=' ', skipinitialspace=True,
317+
quoting=csv.QUOTE_NONE)
318+
for quoting in csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
319+
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=False,
320+
quoting=quoting)
321+
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True,
322+
quoting=quoting)
323+
324+
for quoting in csv.QUOTE_NONE, csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
325+
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False,
326+
quoting=quoting)
327+
self._write_error_test(csv.Error, [None, None],
328+
delimiter=' ', skipinitialspace=True,
329+
quoting=quoting)
330+
278331
def test_writerows_errors(self):
279332
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
280333
writer = csv.writer(fileobj)
@@ -376,6 +429,14 @@ def test_read_skipinitialspace(self):
376429
[['no space', 'space', 'spaces', '\ttab']],
377430
skipinitialspace=True)
378431

432+
def test_read_space_delimiter(self):
433+
self._read_test(['a b', ' a ', ' ', ''],
434+
[['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []],
435+
delimiter=' ', skipinitialspace=False)
436+
self._read_test(['a b', ' a ', ' ', ''],
437+
[['a', 'b'], ['a', ''], [''], []],
438+
delimiter=' ', skipinitialspace=True)
439+
379440
def test_read_bigfield(self):
380441
# This exercises the buffer realloc functionality and field size
381442
# limits.
@@ -502,10 +563,10 @@ class space(csv.excel):
502563
escapechar = "\\"
503564

504565
with TemporaryFile("w+", encoding="utf-8") as fileobj:
505-
fileobj.write("abc def\nc1ccccc1 benzene\n")
566+
fileobj.write("abc def\nc1ccccc1 benzene\n")
506567
fileobj.seek(0)
507568
reader = csv.reader(fileobj, dialect=space())
508-
self.assertEqual(next(reader), ["abc", "def"])
569+
self.assertEqual(next(reader), ["abc", "", "", "def"])
509570
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])
510571

511572
def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`csv.writer()` now quotes empty fields if delimiter is a
2+
space and skipinitialspace is true and raises exception if quoting is not
3+
possible.

Modules/_csv.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
11801180
static int
11811181
join_append(WriterObj *self, PyObject *field, int quoted)
11821182
{
1183+
DialectObj *dialect = self->dialect;
11831184
int field_kind = -1;
11841185
const void *field_data = NULL;
11851186
Py_ssize_t field_len = 0;
@@ -1192,6 +1193,19 @@ join_append(WriterObj *self, PyObject *field, int quoted)
11921193
field_data = PyUnicode_DATA(field);
11931194
field_len = PyUnicode_GET_LENGTH(field);
11941195
}
1196+
if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
1197+
if (dialect->quoting == QUOTE_NONE ||
1198+
(field == NULL &&
1199+
(dialect->quoting == QUOTE_STRINGS ||
1200+
dialect->quoting == QUOTE_NOTNULL)))
1201+
{
1202+
PyErr_Format(self->error_obj,
1203+
"empty field must be quoted if delimiter is a space "
1204+
"and skipinitialspace is true");
1205+
return 0;
1206+
}
1207+
quoted = 1;
1208+
}
11951209
rec_len = join_append_data(self, field_kind, field_data, field_len,
11961210
&quoted, 0);
11971211
if (rec_len < 0)
@@ -1243,6 +1257,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
12431257
{
12441258
DialectObj *dialect = self->dialect;
12451259
PyObject *iter, *field, *line, *result;
1260+
bool null_field = false;
12461261

12471262
iter = PyObject_GetIter(seq);
12481263
if (iter == NULL) {
@@ -1279,11 +1294,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
12791294
break;
12801295
}
12811296

1297+
null_field = (field == Py_None);
12821298
if (PyUnicode_Check(field)) {
12831299
append_ok = join_append(self, field, quoted);
12841300
Py_DECREF(field);
12851301
}
1286-
else if (field == Py_None) {
1302+
else if (null_field) {
12871303
append_ok = join_append(self, NULL, quoted);
12881304
Py_DECREF(field);
12891305
}
@@ -1309,7 +1325,11 @@ csv_writerow(WriterObj *self, PyObject *seq)
13091325
return NULL;
13101326

13111327
if (self->num_fields > 0 && self->rec_len == 0) {
1312-
if (dialect->quoting == QUOTE_NONE) {
1328+
if (dialect->quoting == QUOTE_NONE ||
1329+
(null_field &&
1330+
(dialect->quoting == QUOTE_STRINGS ||
1331+
dialect->quoting == QUOTE_NOTNULL)))
1332+
{
13131333
PyErr_Format(self->error_obj,
13141334
"single empty field record must be quoted");
13151335
return NULL;

0 commit comments

Comments
 (0)