Closed
Description
Bug report
test_tarfile
files in non-UTF-8 locales. For example:
$ LC_ALL=uk_UA ./python -m test -vuall test_tarfile -m 'NoneInfoExtractTests_*' -m test_data_filter -m test_tar_filter
======================================================================
ERROR: setUpClass (test.test_tarfile.NoneInfoExtractTests_Data)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/serhiy/py/cpython/Lib/test/test_tarfile.py", line 3264, in setUpClass
tar.extractall(cls.control_dir, filter=cls.extraction_filter)
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2389, in extractall
tarinfo = self._get_extract_tarinfo(member, filter_function, path)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2441, in _get_extract_tarinfo
tarinfo = filter_function(tarinfo, path)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 842, in data_filter
new_attrs = _get_filtered_attrs(member, dest_path, True)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 782, in _get_filtered_attrs
target_path = os.path.realpath(os.path.join(dest_path, name))
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 405, in realpath
return _realpath(filename, strict, sep, curdir, pardir, getcwd)
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 452, in _realpath
st_mode = lstat(newpath).st_mode
~~~~~^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/encodings/koi8_u.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode characters in position 112-118: character maps to <undefined>
encoding with 'koi8-u' codec failed
======================================================================
ERROR: setUpClass (test.test_tarfile.NoneInfoExtractTests_Default)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/serhiy/py/cpython/Lib/test/test_tarfile.py", line 3264, in setUpClass
tar.extractall(cls.control_dir, filter=cls.extraction_filter)
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2389, in extractall
tarinfo = self._get_extract_tarinfo(member, filter_function, path)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2441, in _get_extract_tarinfo
tarinfo = filter_function(tarinfo, path)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 842, in data_filter
new_attrs = _get_filtered_attrs(member, dest_path, True)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 782, in _get_filtered_attrs
target_path = os.path.realpath(os.path.join(dest_path, name))
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 405, in realpath
return _realpath(filename, strict, sep, curdir, pardir, getcwd)
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 452, in _realpath
st_mode = lstat(newpath).st_mode
~~~~~^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/encodings/koi8_u.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode characters in position 112-118: character maps to <undefined>
encoding with 'koi8-u' codec failed
======================================================================
ERROR: setUpClass (test.test_tarfile.NoneInfoExtractTests_FullyTrusted)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/serhiy/py/cpython/Lib/test/test_tarfile.py", line 3264, in setUpClass
tar.extractall(cls.control_dir, filter=cls.extraction_filter)
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2397, in extractall
self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
numeric_owner=numeric_owner)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2460, in _extract_one
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
set_attrs=set_attrs,
^^^^^^^^^^^^^^^^^^^^
numeric_owner=numeric_owner)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2543, in _extract_member
self.makefile(tarinfo, targetpath)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2589, in makefile
with bltn_open(targetpath, "wb") as target:
~~~~~~~~~^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/encodings/koi8_u.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode characters in position 112-118: character maps to <undefined>
encoding with 'koi8-u' codec failed
======================================================================
ERROR: setUpClass (test.test_tarfile.NoneInfoExtractTests_Tar)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/serhiy/py/cpython/Lib/test/test_tarfile.py", line 3264, in setUpClass
tar.extractall(cls.control_dir, filter=cls.extraction_filter)
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2389, in extractall
tarinfo = self._get_extract_tarinfo(member, filter_function, path)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 2441, in _get_extract_tarinfo
tarinfo = filter_function(tarinfo, path)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 836, in tar_filter
new_attrs = _get_filtered_attrs(member, dest_path, False)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 782, in _get_filtered_attrs
target_path = os.path.realpath(os.path.join(dest_path, name))
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 405, in realpath
return _realpath(filename, strict, sep, curdir, pardir, getcwd)
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 452, in _realpath
st_mode = lstat(newpath).st_mode
~~~~~^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/encodings/koi8_u.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode characters in position 112-118: character maps to <undefined>
encoding with 'koi8-u' codec failed
======================================================================
ERROR: test_data_filter (test.test_tarfile.TestExtractionFilters.test_data_filter)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/serhiy/py/cpython/Lib/test/test_tarfile.py", line 4086, in test_data_filter
filtered = tarfile.data_filter(tarinfo, '')
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 842, in data_filter
new_attrs = _get_filtered_attrs(member, dest_path, True)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 782, in _get_filtered_attrs
target_path = os.path.realpath(os.path.join(dest_path, name))
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 405, in realpath
return _realpath(filename, strict, sep, curdir, pardir, getcwd)
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 452, in _realpath
st_mode = lstat(newpath).st_mode
~~~~~^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/encodings/koi8_u.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode characters in position 69-75: character maps to <undefined>
encoding with 'koi8-u' codec failed
======================================================================
ERROR: test_tar_filter (test.test_tarfile.TestExtractionFilters.test_tar_filter)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/serhiy/py/cpython/Lib/test/test_tarfile.py", line 4076, in test_tar_filter
filtered = tarfile.tar_filter(tarinfo, '')
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 836, in tar_filter
new_attrs = _get_filtered_attrs(member, dest_path, False)
File "/home/serhiy/py/cpython/Lib/tarfile.py", line 782, in _get_filtered_attrs
target_path = os.path.realpath(os.path.join(dest_path, name))
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 405, in realpath
return _realpath(filename, strict, sep, curdir, pardir, getcwd)
File "/home/serhiy/py/cpython/Lib/posixpath.py", line 452, in _realpath
st_mode = lstat(newpath).st_mode
~~~~~^^^^^^^^^
File "/home/serhiy/py/cpython/Lib/encodings/koi8_u.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode characters in position 69-75: character maps to <undefined>
encoding with 'koi8-u' codec failed
----------------------------------------------------------------------
This happens because they use os.path.realpath()
for paths in a tar archive, which uses os.stat()
, which fails with unexpected UnicodeEncodeError
if the path in a tar archive can't be encoded in the current filesystem encoding. This error should be handled at some level, either in os.path.realpath()
or in tarfile
. os.stat()
can also raise ValueError
if the path contain null bytes. Don't know if this is relevant here, we should test.
Linked PRs
Metadata
Metadata
Assignees
Labels
Projects
Status
Done