Skip to content

Commit a7b8613

Browse files
committed
Merge pull request scrapy#1499 from scrapy/py3-port-exporters
[MRG+1] PY3 exporters
2 parents b204631 + d0eacfe commit a7b8613

File tree

4 files changed

+180
-77
lines changed

4 files changed

+180
-77
lines changed

scrapy/exporters.py

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import csv
6+
import io
67
import sys
78
import pprint
89
import marshal
@@ -11,7 +12,11 @@
1112
from xml.sax.saxutils import XMLGenerator
1213

1314
from scrapy.utils.serialize import ScrapyJSONEncoder
15+
from scrapy.utils.python import to_bytes, to_unicode, to_native_str, is_listlike
1416
from scrapy.item import BaseItem
17+
from scrapy.exceptions import ScrapyDeprecationWarning
18+
import warnings
19+
1520

1621
__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
1722
'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
@@ -38,7 +43,7 @@ def export_item(self, item):
3843
raise NotImplementedError
3944

4045
def serialize_field(self, field, name, value):
41-
serializer = field.get('serializer', self._to_str_if_unicode)
46+
serializer = field.get('serializer', lambda x: x)
4247
return serializer(value)
4348

4449
def start_exporting(self):
@@ -47,9 +52,6 @@ def start_exporting(self):
4752
def finish_exporting(self):
4853
pass
4954

50-
def _to_str_if_unicode(self, value):
51-
return value.encode(self.encoding) if isinstance(value, unicode) else value
52-
5355
def _get_serialized_fields(self, item, default_value=None, include_empty=None):
5456
"""Return the fields to export as an iterable of tuples
5557
(name, serialized_value)
@@ -86,10 +88,10 @@ def __init__(self, file, **kwargs):
8688

8789
def export_item(self, item):
8890
itemdict = dict(self._get_serialized_fields(item))
89-
self.file.write(self.encoder.encode(itemdict) + '\n')
91+
self.file.write(to_bytes(self.encoder.encode(itemdict) + '\n'))
9092

9193

92-
class JsonItemExporter(JsonLinesItemExporter):
94+
class JsonItemExporter(BaseItemExporter):
9395

9496
def __init__(self, file, **kwargs):
9597
self._configure(kwargs, dont_fail=True)
@@ -98,18 +100,18 @@ def __init__(self, file, **kwargs):
98100
self.first_item = True
99101

100102
def start_exporting(self):
101-
self.file.write("[")
103+
self.file.write(b"[")
102104

103105
def finish_exporting(self):
104-
self.file.write("]")
106+
self.file.write(b"]")
105107

106108
def export_item(self, item):
107109
if self.first_item:
108110
self.first_item = False
109111
else:
110-
self.file.write(',\n')
112+
self.file.write(b',\n')
111113
itemdict = dict(self._get_serialized_fields(item))
112-
self.file.write(self.encoder.encode(itemdict))
114+
self.file.write(to_bytes(self.encoder.encode(itemdict)))
113115

114116

115117
class XmlItemExporter(BaseItemExporter):
@@ -139,7 +141,7 @@ def _export_xml_field(self, name, serialized_value):
139141
if hasattr(serialized_value, 'items'):
140142
for subname, value in serialized_value.items():
141143
self._export_xml_field(subname, value)
142-
elif hasattr(serialized_value, '__iter__'):
144+
elif is_listlike(serialized_value):
143145
for value in serialized_value:
144146
self._export_xml_field('value', value)
145147
else:
@@ -153,10 +155,10 @@ def _export_xml_field(self, name, serialized_value):
153155
# and Python 3.x will require unicode, so ">= 2.7.4" should be fine.
154156
if sys.version_info[:3] >= (2, 7, 4):
155157
def _xg_characters(self, serialized_value):
156-
if not isinstance(serialized_value, unicode):
158+
if not isinstance(serialized_value, six.text_type):
157159
serialized_value = serialized_value.decode(self.encoding)
158160
return self.xg.characters(serialized_value)
159-
else:
161+
else: # pragma: no cover
160162
def _xg_characters(self, serialized_value):
161163
return self.xg.characters(serialized_value)
162164

@@ -166,17 +168,22 @@ class CsvItemExporter(BaseItemExporter):
166168
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
167169
self._configure(kwargs, dont_fail=True)
168170
self.include_headers_line = include_headers_line
171+
file = file if six.PY2 else io.TextIOWrapper(file, line_buffering=True)
169172
self.csv_writer = csv.writer(file, **kwargs)
170173
self._headers_not_written = True
171174
self._join_multivalued = join_multivalued
172175

173-
def _to_str_if_unicode(self, value):
176+
def serialize_field(self, field, name, value):
177+
serializer = field.get('serializer', self._join_if_needed)
178+
return serializer(value)
179+
180+
def _join_if_needed(self, value):
174181
if isinstance(value, (list, tuple)):
175182
try:
176-
value = self._join_multivalued.join(value)
183+
return self._join_multivalued.join(value)
177184
except TypeError: # list in value may not contain strings
178185
pass
179-
return super(CsvItemExporter, self)._to_str_if_unicode(value)
186+
return value
180187

181188
def export_item(self, item):
182189
if self._headers_not_written:
@@ -185,9 +192,16 @@ def export_item(self, item):
185192

186193
fields = self._get_serialized_fields(item, default_value='',
187194
include_empty=True)
188-
values = [x[1] for x in fields]
195+
values = list(self._build_row(x for _, x in fields))
189196
self.csv_writer.writerow(values)
190197

198+
def _build_row(self, values):
199+
for s in values:
200+
try:
201+
yield to_native_str(s)
202+
except TypeError:
203+
yield to_native_str(repr(s))
204+
191205
def _write_headers_and_set_fields_to_export(self, item):
192206
if self.include_headers_line:
193207
if not self.fields_to_export:
@@ -197,7 +211,8 @@ def _write_headers_and_set_fields_to_export(self, item):
197211
else:
198212
# use fields declared in Item
199213
self.fields_to_export = list(item.fields.keys())
200-
self.csv_writer.writerow(self.fields_to_export)
214+
row = list(self._build_row(self.fields_to_export))
215+
self.csv_writer.writerow(row)
201216

202217

203218
class PickleItemExporter(BaseItemExporter):
@@ -230,7 +245,7 @@ def __init__(self, file, **kwargs):
230245

231246
def export_item(self, item):
232247
itemdict = dict(self._get_serialized_fields(item))
233-
self.file.write(pprint.pformat(itemdict) + '\n')
248+
self.file.write(to_bytes(pprint.pformat(itemdict) + '\n'))
234249

235250

236251
class PythonItemExporter(BaseItemExporter):
@@ -239,6 +254,13 @@ class PythonItemExporter(BaseItemExporter):
239254
json, msgpack, binc, etc) can be used on top of it. Its main goal is to
240255
seamless support what BaseItemExporter does plus nested items.
241256
"""
257+
def _configure(self, options, dont_fail=False):
258+
self.binary = options.pop('binary', True)
259+
super(PythonItemExporter, self)._configure(options, dont_fail)
260+
if self.binary:
261+
warnings.warn(
262+
"PythonItemExporter will drop support for binary export in the future",
263+
ScrapyDeprecationWarning)
242264

243265
def serialize_field(self, field, name, value):
244266
serializer = field.get('serializer', self._serialize_value)
@@ -249,13 +271,20 @@ def _serialize_value(self, value):
249271
return self.export_item(value)
250272
if isinstance(value, dict):
251273
return dict(self._serialize_dict(value))
252-
if hasattr(value, '__iter__'):
274+
if is_listlike(value):
253275
return [self._serialize_value(v) for v in value]
254-
return self._to_str_if_unicode(value)
276+
if self.binary:
277+
return to_bytes(value, encoding=self.encoding)
278+
else:
279+
return to_unicode(value, encoding=self.encoding)
255280

256281
def _serialize_dict(self, value):
257282
for key, val in six.iteritems(value):
283+
key = to_bytes(key) if self.binary else key
258284
yield key, self._serialize_value(val)
259285

260286
def export_item(self, item):
261-
return dict(self._get_serialized_fields(item))
287+
result = dict(self._get_serialized_fields(item))
288+
if self.binary:
289+
result = dict(self._serialize_dict(result))
290+
return result

tests/py3-ignores.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
tests/test_exporters.py
21
tests/test_linkextractors_deprecated.py
32
tests/test_proxy_connect.py
43

0 commit comments

Comments
 (0)