From 625c6c198ae82d0130c085adcd5868edda3c6f12 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 24 Aug 2024 06:59:42 -0700 Subject: [PATCH 01/40] Drop python 2 and make python 3.8 be the minimum. Python 3.7 is end-of-life: https://devguide.python.org/versions/ --- .github/workflows/build.yml | 2 +- setup.cfg | 3 +- shapefile.py | 137 +++++++++++------------------------- test_shapefile.py | 11 +-- 4 files changed, 45 insertions(+), 108 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f46613..df83f0d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["2.7.18", "3.5.10", "3.6.15", "3.7.17", "3.8.18", "3.9.18", "3.10.13", "3.11.7", "3.12.1", "3.13.0a2"] + python-version: ["3.8.18", "3.9.18", "3.10.13", "3.11.7", "3.12.1", "3.13.0a2"] runs-on: ubuntu-latest container: diff --git a/setup.cfg b/setup.cfg index 906abd3..f5e113c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ keywords = gis, geospatial, geographic, shapefile, shapefiles classifiers = Development Status :: 5 - Production/Stable Programming Language :: Python - Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 Topic :: Scientific/Engineering :: GIS Topic :: Software Development :: Libraries @@ -24,7 +23,7 @@ classifiers = [options] py_modules = shapefile -python_requires = >=2.7 +python_requires = >=3.8 [bdist_wheel] universal=1 diff --git a/shapefile.py b/shapefile.py index 3b4e529..9c7c49f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -74,112 +74,55 @@ 5: 'RING'} -# Python 2-3 handling +xrange = range +izip = zip -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip - - from urllib.parse import urlparse, urlunparse - from urllib.error import HTTPError - from urllib.request import urlopen, Request +from urllib.parse import urlparse, urlunparse +from urllib.error import HTTPError +from urllib.request import urlopen, Request -else: - from itertools import izip - - from urlparse import urlparse, urlunparse - from urllib2 import HTTPError - from urllib2 import urlopen, Request - - # Helpers MISSING = [None,''] NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. -if PYTHON3: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return u"" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) +def b(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + +def u(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) - def is_string(v): - return isinstance(v, basestring) +def is_string(v): + return isinstance(v, str) -if sys.version_info[0:2] >= (3, 6): - def pathlike_obj(path): - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path -else: - def pathlike_obj(path): - if is_string(path): - return path - elif hasattr(path, "__fspath__"): - return path.__fspath__() - else: - try: - return str(path) - except: - return path + +def pathlike_obj(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path # Begin diff --git a/test_shapefile.py b/test_shapefile.py index 774e59c..501c215 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -2,18 +2,13 @@ This module tests the functionality of shapefile.py. """ # std lib imports +import datetime +import json import os.path -import sys -if sys.version_info.major == 3: - from pathlib import Path +from pathlib import Path # third party imports import pytest -import json -import datetime -if sys.version_info.major == 2: - # required by pytest for python <36 - from pathlib2 import Path # our imports import shapefile From 4b4743ab9e319afee297bc3b5eb1f1a6723dac27 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 24 Aug 2024 07:17:21 -0700 Subject: [PATCH 02/40] shapefile.py: Drop inheriting from object. https://www.python.org/doc/newstyle/ > New-style classes has been integrated into Python 2.7 and old-style classes has been removed in Python 3. --- shapefile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 9c7c49f..a9c3922 100644 --- a/shapefile.py +++ b/shapefile.py @@ -395,7 +395,7 @@ def organize_polygon_rings(rings, return_errors=None): polys = [[ext] for ext in exteriors] return polys -class Shape(object): +class Shape: def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are @@ -768,7 +768,7 @@ def __dir__(self): fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) return default + fnames -class ShapeRecord(object): +class ShapeRecord: """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" def __init__(self, shape=None, record=None): @@ -853,7 +853,7 @@ class ShapefileException(Exception): # msg = '\n'.join(messages) # logger.warning(msg) -class Reader(object): +class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, .dbf) is missing no exception is thrown until you try @@ -1735,7 +1735,7 @@ def iterShapeRecords(self, fields=None, bbox=None): yield ShapeRecord(shape=shape, record=record) -class Writer(object): +class Writer: """Provides write support for ESRI Shapefiles.""" def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): self.target = target From d0259ae95c4951fb8f27e4de2e6c4bd252f4443a Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 24 Aug 2024 07:39:48 -0700 Subject: [PATCH 03/40] shapefile.py: Minor cleanup of module comment and order includes. --- shapefile.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/shapefile.py b/shapefile.py index a9c3922..4ecbde7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -3,22 +3,26 @@ Provides read and write support for ESRI Shapefiles. authors: jlawheadgeospatialpython.com maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions 2.7-3.x +Compatible with Python versions >= 3.8 """ __version__ = "2.3.1" -from struct import pack, unpack, calcsize, error, Struct +import array +from datetime import date +import io +import logging import os +from struct import pack, unpack, calcsize, error, Struct import sys -import time -import array import tempfile -import logging -import io -from datetime import date +import time import zipfile +from urllib.error import HTTPError +from urllib.parse import urlparse, urlunparse +from urllib.request import urlopen, Request + # Create named logger logger = logging.getLogger(__name__) @@ -76,10 +80,6 @@ xrange = range izip = zip - -from urllib.parse import urlparse, urlunparse -from urllib.error import HTTPError -from urllib.request import urlopen, Request # Helpers From e364fa05a999660efa92125f0e78d631c7a4bb45 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 24 Aug 2024 07:44:06 -0700 Subject: [PATCH 04/40] build.yml: Allow workflow on all branches when pushing. This should let me see the workflow run in my work on my fork. --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index df83f0d..5ed591f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,7 +5,6 @@ name: build on: push: - branches: [ master ] pull_request: branches: [ master ] workflow_dispatch: From 0146bc6aefab2b3591f5ce2a118878430f6cf69c Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 24 Aug 2024 07:51:43 -0700 Subject: [PATCH 05/40] build.yml: Do not specify the patch level of python versions. - Still specify the exact python 3.13 as it is not yet released. - Updated to release candidate 1. 3.13 release is expected in October 2024. - Also fix the more information link. The old one one is dead. --- .github/workflows/build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5ed591f..f808147 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,6 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# For more information see: +# https://docs.github.com/en/actions/use-cases-and-examples/building-and-testing/building-and-testing-python name: build @@ -15,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8.18", "3.9.18", "3.10.13", "3.11.7", "3.12.1", "3.13.0a2"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13.0rc1"] runs-on: ubuntu-latest container: From 50d08ec3a147b28a139c12a6d2754ceba63b54ef Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 24 Aug 2024 08:03:04 -0700 Subject: [PATCH 06/40] build.yml: Used checkout v4 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f808147..242d898 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: image: python:${{ matrix.python-version }}-slim steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies run: | python -m pip install --upgrade pip From fd4e56436c5b2e07596f9f1b3ca604ab65f0bb4a Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 24 Aug 2024 06:59:42 -0700 Subject: [PATCH 07/40] build.yml: Allow workflow on all branches when pushing. Squash in preparation for rebasing This should let me see the workflow run in my work on my fork. build.yml: Used checkout v4 build.yml: Do not specify the patch level of python versions. - Still specify the exact python 3.13 as it is not yet released. - Updated to release candidate 1. 3.13 release is expected in October 2024. - Also fix the more information link. The old one one is dead. shapefile.py: Minor cleanup of module comment and order includes. shapefile.py: Drop inheriting from object. https://www.python.org/doc/newstyle/ > New-style classes has been integrated into Python 2.7 and old-style classes has been removed in Python 3. Drop python 2 and make python 3.8 be the minimum. Python 3.7 is end-of-life: https://devguide.python.org/versions/ --- .github/workflows/build.yml | 3 +- setup.cfg | 3 +- shapefile.py | 167 ++++++++++++------------------------ test_shapefile.py | 11 +-- 4 files changed, 61 insertions(+), 123 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dfeae58..5b53b50 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,6 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# For more information see: +# https://docs.github.com/en/actions/use-cases-and-examples/building-and-testing/building-and-testing-python name: build diff --git a/setup.cfg b/setup.cfg index 906abd3..f5e113c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ keywords = gis, geospatial, geographic, shapefile, shapefiles classifiers = Development Status :: 5 - Production/Stable Programming Language :: Python - Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 Topic :: Scientific/Engineering :: GIS Topic :: Software Development :: Libraries @@ -24,7 +23,7 @@ classifiers = [options] py_modules = shapefile -python_requires = >=2.7 +python_requires = >=3.8 [bdist_wheel] universal=1 diff --git a/shapefile.py b/shapefile.py index beab236..879fead 100644 --- a/shapefile.py +++ b/shapefile.py @@ -3,22 +3,26 @@ Provides read and write support for ESRI Shapefiles. authors: jlawheadgeospatialpython.com maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions 2.7-3.x +Compatible with Python versions >= 3.8 """ __version__ = "2.3.1" -from struct import pack, unpack, calcsize, error, Struct +import array +from datetime import date +import io +import logging import os +from struct import pack, unpack, calcsize, error, Struct import sys -import time -import array import tempfile -import logging -import io -from datetime import date +import time import zipfile +from urllib.error import HTTPError +from urllib.parse import urlparse, urlunparse +from urllib.request import urlopen, Request + # Create named logger logger = logging.getLogger(__name__) @@ -74,112 +78,51 @@ 5: 'RING'} -# Python 2-3 handling - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip - - from urllib.parse import urlparse, urlunparse - from urllib.error import HTTPError - from urllib.request import urlopen, Request - -else: - from itertools import izip - - from urlparse import urlparse, urlunparse - from urllib2 import HTTPError - from urllib2 import urlopen, Request - - +xrange = range +izip = zip + # Helpers MISSING = [None,''] NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. -if PYTHON3: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return u"" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) +def b(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + +def u(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) - def is_string(v): - return isinstance(v, basestring) +def is_string(v): + return isinstance(v, str) -if sys.version_info[0:2] >= (3, 6): - def pathlike_obj(path): - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path -else: - def pathlike_obj(path): - if is_string(path): - return path - elif hasattr(path, "__fspath__"): - return path.__fspath__() - else: - try: - return str(path) - except: - return path + +def pathlike_obj(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path # Begin @@ -452,7 +395,7 @@ def organize_polygon_rings(rings, return_errors=None): polys = [[ext] for ext in exteriors] return polys -class Shape(object): +class Shape: def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are @@ -823,9 +766,9 @@ def __dir__(self): """ default = list(dir(type(self))) # default list methods and attributes of this class fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) - return default + fnames - -class ShapeRecord(object): + return default + fnames + +class ShapeRecord: """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" def __init__(self, shape=None, record=None): @@ -874,7 +817,7 @@ class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" pass -class Reader(object): +class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, .dbf) is missing no exception is thrown until you try @@ -1756,7 +1699,7 @@ def iterShapeRecords(self, fields=None, bbox=None): yield ShapeRecord(shape=shape, record=record) -class Writer(object): +class Writer: """Provides write support for ESRI Shapefiles.""" def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): self.target = target diff --git a/test_shapefile.py b/test_shapefile.py index ec73b45..f4a786f 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -2,18 +2,13 @@ This module tests the functionality of shapefile.py. """ # std lib imports +import datetime +import json import os.path -import sys -if sys.version_info.major == 3: - from pathlib import Path +from pathlib import Path # third party imports import pytest -import json -import datetime -if sys.version_info.major == 2: - # required by pytest for python <36 - from pathlib2 import Path # our imports import shapefile From c7d7ee0f5dab857e7a5a57fc14aa1cbe170378fb Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 14:33:14 +0100 Subject: [PATCH 08/40] Don't test on dropped Python versions --- .github/workflows/build.yml | 4 ---- README.md | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5b53b50..203aebe 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,10 +17,6 @@ jobs: fail-fast: false matrix: python-version: [ - "2.7", - "3.5", - "3.6", - "3.7", "3.8", "3.9", "3.10", diff --git a/README.md b/README.md index 94861bb..8a92f84 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Both the Esri and XBase file-formats are very simple in design and memory efficient which is part of the reason the shapefile format remains popular despite the numerous ways to store and exchange GIS data available today. -Pyshp is compatible with Python 2.7-3.x. +Pyshp is compatible with Pythons >= 3.8. This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), From f1684fbbaff42be03763496b04e03d13c508a0fd Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 14:35:36 +0100 Subject: [PATCH 09/40] Run build workflow on PRs for all branches --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 203aebe..32d7fc8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,6 @@ name: build on: push: pull_request: - branches: [ master ] workflow_dispatch: jobs: From 29c5c68bbf5ab50b5d719831ac673251d1cf3e04 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 14:47:10 +0100 Subject: [PATCH 10/40] Drop Python 3.8 - its EOL is due in just over 3 weeks --- .github/workflows/build.yml | 1 - README.md | 2 +- setup.cfg | 2 +- shapefile.py | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 32d7fc8..5387f4b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,6 @@ jobs: fail-fast: false matrix: python-version: [ - "3.8", "3.9", "3.10", "3.11", diff --git a/README.md b/README.md index 8a92f84..f16f808 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Both the Esri and XBase file-formats are very simple in design and memory efficient which is part of the reason the shapefile format remains popular despite the numerous ways to store and exchange GIS data available today. -Pyshp is compatible with Pythons >= 3.8. +Pyshp is compatible with Pythons >= 3.9. This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), diff --git a/setup.cfg b/setup.cfg index f5e113c..d13d43b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,7 @@ classifiers = [options] py_modules = shapefile -python_requires = >=3.8 +python_requires = >=3.9 [bdist_wheel] universal=1 diff --git a/shapefile.py b/shapefile.py index 879fead..5001f36 100644 --- a/shapefile.py +++ b/shapefile.py @@ -3,7 +3,7 @@ Provides read and write support for ESRI Shapefiles. authors: jlawheadgeospatialpython.com maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions >= 3.8 +Compatible with Python versions >= 3.9 """ __version__ = "2.3.1" From b7d377aeaf65d8d58c0782870c15696fdef05e39 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 15:05:53 +0100 Subject: [PATCH 11/40] Add pre-commit config file and settings for ruff in pyproject.toml --- .pre-commit-config.yaml | 13 +++++++++ pyproject.toml | 63 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3d90e84 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.4 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/pyproject.toml b/pyproject.toml index fed528d..e6c43f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,66 @@ [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py39" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" From 2cfd48bbb43dc034d2a33c5ef9e606ca4386d28a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 15:38:17 +0100 Subject: [PATCH 12/40] Run Ruff and Ruff-format on PyShp 3.0-dev --- shapefile.py | 1038 ++++++++++++++++++++++++++++----------------- test_shapefile.py | 931 +++++++++++++++++++++++++--------------- 2 files changed, 1235 insertions(+), 734 deletions(-) diff --git a/shapefile.py b/shapefile.py index 5001f36..1064c99 100644 --- a/shapefile.py +++ b/shapefile.py @@ -47,20 +47,21 @@ MULTIPATCH = 31 SHAPETYPE_LOOKUP = { - 0: 'NULL', - 1: 'POINT', - 3: 'POLYLINE', - 5: 'POLYGON', - 8: 'MULTIPOINT', - 11: 'POINTZ', - 13: 'POLYLINEZ', - 15: 'POLYGONZ', - 18: 'MULTIPOINTZ', - 21: 'POINTM', - 23: 'POLYLINEM', - 25: 'POLYGONM', - 28: 'MULTIPOINTM', - 31: 'MULTIPATCH'} + 0: "NULL", + 1: "POINT", + 3: "POLYLINE", + 5: "POLYGON", + 8: "MULTIPOINT", + 11: "POINTZ", + 13: "POLYLINEZ", + 15: "POLYGONZ", + 18: "MULTIPOINTZ", + 21: "POINTM", + 23: "POLYLINEM", + 25: "POLYGONM", + 28: "MULTIPOINTM", + 31: "MULTIPATCH", +} TRIANGLE_STRIP = 0 TRIANGLE_FAN = 1 @@ -70,23 +71,25 @@ RING = 5 PARTTYPE_LOOKUP = { - 0: 'TRIANGLE_STRIP', - 1: 'TRIANGLE_FAN', - 2: 'OUTER_RING', - 3: 'INNER_RING', - 4: 'FIRST_RING', - 5: 'RING'} + 0: "TRIANGLE_STRIP", + 1: "TRIANGLE_FAN", + 2: "OUTER_RING", + 3: "INNER_RING", + 4: "FIRST_RING", + 5: "RING", +} xrange = range izip = zip - + # Helpers -MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. +MISSING = [None, ""] +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. + -def b(v, encoding='utf-8', encodingErrors='strict'): +def b(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, str): # For python 3 encode str to bytes. return v.encode(encoding, encodingErrors) @@ -100,7 +103,8 @@ def b(v, encoding='utf-8', encodingErrors='strict'): # Force string representation. return str(v).encode(encoding, encodingErrors) -def u(v, encoding='utf-8', encodingErrors='strict'): + +def u(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, bytes): # For python 3 decode bytes to str. return v.decode(encoding, encodingErrors) @@ -114,6 +118,7 @@ def u(v, encoding='utf-8', encodingErrors='strict'): # Force string representation. return bytes(v).decode(encoding, encodingErrors) + def is_string(v): return isinstance(v, str) @@ -127,27 +132,31 @@ def pathlike_obj(path): # Begin + class _Array(array.array): """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" + def __repr__(self): return str(self.tolist()) + def signed_area(coords, fast=False): """Return the signed area enclosed by a ring using the linear time algorithm. A value >= 0 indicates a counter-clockwise oriented ring. A faster version is possible by setting 'fast' to True, which returns 2x the area, e.g. if you're only interested in the sign of the area. """ - xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values xs.append(xs[1]) ys.append(ys[1]) - area2 = sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords))) + area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) if fast: return area2 else: return area2 / 2.0 + def is_cw(coords): """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. @@ -155,34 +164,35 @@ def is_cw(coords): area2 = signed_area(coords, fast=True) return area2 < 0 + def rewind(coords): - """Returns the input coords in reversed order. - """ + """Returns the input coords in reversed order.""" return list(reversed(coords)) + def ring_bbox(coords): - """Calculates and returns the bounding box of a ring. - """ - xs,ys = zip(*coords) - bbox = min(xs),min(ys),max(xs),max(ys) + """Calculates and returns the bounding box of a ring.""" + xs, ys = zip(*coords) + bbox = min(xs), min(ys), max(xs), max(ys) return bbox + def bbox_overlap(bbox1, bbox2): - """Tests whether two bounding boxes overlap, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - overlap = (xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2) + """Tests whether two bounding boxes overlap, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 return overlap + def bbox_contains(bbox1, bbox2): - """Tests whether bbox1 fully contains bbox2, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - contains = (xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2) + """Tests whether bbox1 fully contains bbox2, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 return contains + def ring_contains_point(coords, p): """Fast point-in-polygon crossings algorithm, MacMartin optimization. @@ -194,29 +204,31 @@ def ring_contains_point(coords, p): compare vertex Y values to the testing point's Y and quickly discard edges which are entirely to one side of the test ray. """ - tx,ty = p + tx, ty = p # get initial test bit for above/below X axis vtx0 = coords[0] - yflag0 = ( vtx0[1] >= ty ) + yflag0 = vtx0[1] >= ty inside_flag = False for vtx1 in coords[1:]: - yflag1 = ( vtx1[1] >= ty ) + yflag1 = vtx1[1] >= ty # check if endpoints straddle (are on opposite sides) of X axis # (i.e. the Y's differ); if so, +X ray could intersect this edge. if yflag0 != yflag1: - xflag0 = ( vtx0[0] >= tx ) + xflag0 = vtx0[0] >= tx # check if endpoints are on same side of the Y axis (i.e. X's # are the same); if so, it's easy to test if edge hits or misses. - if xflag0 == ( vtx1[0] >= tx ): + if xflag0 == (vtx1[0] >= tx): # if edge's X values both right of the point, must hit if xflag0: inside_flag = not inside_flag else: # compute intersection of pgon segment with +X ray, note # if >= point's X; if so, the ray hits it. - if ( vtx1[0] - (vtx1[1]-ty) * ( vtx0[0]-vtx1[0]) / (vtx0[1]-vtx1[1]) ) >= tx: + if ( + vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) + ) >= tx: inside_flag = not inside_flag # move to next pair of vertices, retaining info as possible @@ -225,6 +237,7 @@ def ring_contains_point(coords, p): return inside_flag + def ring_sample(coords, ccw=False): """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation @@ -233,6 +246,7 @@ def ring_sample(coords, ccw=False): (counter-clockwise) is set to True. """ triplet = [] + def itercoords(): # iterate full closed ring for p in coords: @@ -248,7 +262,9 @@ def itercoords(): # new triplet, try to get sample if len(triplet) == 3: # check that triplet does not form a straight line (not a triangle) - is_straight_line = (triplet[0][1] - triplet[1][1]) * (triplet[0][0] - triplet[2][0]) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + is_straight_line = (triplet[0][1] - triplet[1][1]) * ( + triplet[0][0] - triplet[2][0] + ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) if not is_straight_line: # get triplet orientation closed_triplet = triplet + [triplet[0]] @@ -256,26 +272,27 @@ def itercoords(): # check that triplet has the same orientation as the ring (means triangle is inside the ring) if ccw == triplet_ccw: # get triplet centroid - xs,ys = zip(*triplet) - xmean,ymean = sum(xs) / 3.0, sum(ys) / 3.0 + xs, ys = zip(*triplet) + xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 # check that triplet centroid is truly inside the ring - if ring_contains_point(coords, (xmean,ymean)): - return xmean,ymean + if ring_contains_point(coords, (xmean, ymean)): + return xmean, ymean # failed to get sample point from this triplet # remove oldest triplet coord to allow iterating to next triplet triplet.pop(0) else: - raise Exception('Unexpected error: Unable to find a ring sample point.') + raise Exception("Unexpected error: Unable to find a ring sample point.") + def ring_contains_ring(coords1, coords2): - '''Returns True if all vertexes in coords2 are fully inside coords1. - ''' + """Returns True if all vertexes in coords2 are fully inside coords1.""" return all((ring_contains_point(coords1, p2) for p2 in coords2)) + def organize_polygon_rings(rings, return_errors=None): - '''Organize a list of coordinate rings into one or more polygons with holes. + """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), any errors encountered will be added to it. @@ -285,7 +302,7 @@ def organize_polygon_rings(rings, return_errors=None): holes if they run in counter-clockwise direction. This method is used to construct GeoJSON (multi)polygons from the shapefile polygon shape type, which does not explicitly store the structure of the polygons beyond exterior/interior ring orientation. - ''' + """ # first iterate rings and classify as exterior or hole exteriors = [] holes = [] @@ -319,17 +336,16 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) + hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) exterior_bboxes = [ring_bbox(ring) for ring in exteriors] for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) - for ext_i,ext_bbox in enumerate(exterior_bboxes): + for ext_i, ext_bbox in enumerate(exterior_bboxes): if bbox_contains(ext_bbox, hole_bbox): - hole_exteriors[hole_i].append( ext_i ) + hole_exteriors[hole_i].append(ext_i) # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test - for hole_i,exterior_candidates in hole_exteriors.items(): - + for hole_i, exterior_candidates in hole_exteriors.items(): if len(exterior_candidates) > 1: # get hole sample point ccw = not is_cw(holes[hole_i]) @@ -338,7 +354,9 @@ def organize_polygon_rings(rings, return_errors=None): new_exterior_candidates = [] for ext_i in exterior_candidates: # check that hole sample point is inside exterior - hole_in_exterior = ring_contains_point(exteriors[ext_i], hole_sample) + hole_in_exterior = ring_contains_point( + exteriors[ext_i], hole_sample + ) if hole_in_exterior: new_exterior_candidates.append(ext_i) @@ -346,31 +364,33 @@ def organize_polygon_rings(rings, return_errors=None): hole_exteriors[hole_i] = new_exterior_candidates # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole - for hole_i,exterior_candidates in hole_exteriors.items(): - + for hole_i, exterior_candidates in hole_exteriors.items(): if len(exterior_candidates) > 1: # exterior candidate with the smallest area is the hole's most immediate parent - ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] + ext_i = sorted( + exterior_candidates, + key=lambda x: abs(signed_area(exteriors[x], fast=True)), + )[0] hole_exteriors[hole_i] = [ext_i] # separate out holes that are orphaned (not contained by any exterior) orphan_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): + for hole_i, exterior_candidates in list(hole_exteriors.items()): if not exterior_candidates: - orphan_holes.append( hole_i ) + orphan_holes.append(hole_i) del hole_exteriors[hole_i] continue # each hole should now only belong to one exterior, group into exterior-holes polygons polys = [] - for ext_i,ext in enumerate(exteriors): + for ext_i, ext in enumerate(exteriors): poly = [ext] # find relevant holes poly_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): + for hole_i, exterior_candidates in list(hole_exteriors.items()): # hole is relevant if previously matched with this exterior if exterior_candidates[0] == ext_i: - poly_holes.append( holes[hole_i] ) + poly_holes.append(holes[hole_i]) poly += poly_holes polys.append(poly) @@ -382,21 +402,24 @@ def organize_polygon_rings(rings, return_errors=None): polys.append(poly) if orphan_holes and return_errors is not None: - return_errors['polygon_orphaned_holes'] = len(orphan_holes) + return_errors["polygon_orphaned_holes"] = len(orphan_holes) return polys # no exteriors, be nice and assume due to incorrect winding order else: if return_errors is not None: - return_errors['polygon_only_holes'] = len(holes) + return_errors["polygon_only_holes"] = len(holes) exteriors = holes # add as single exterior without any holes polys = [[ext] for ext in exteriors] return polys + class Shape: - def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): + def __init__( + self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None + ): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are usually point, polyline, or polygons. Every shape type @@ -431,42 +454,39 @@ def __geo_interface__(self): # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Point', 'coordinates':tuple()} + return {"type": "Point", "coordinates": tuple()} else: - return { - 'type': 'Point', - 'coordinates': tuple(self.points[0]) - } + return {"type": "Point", "coordinates": tuple(self.points[0])} elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'MultiPoint', 'coordinates':[]} + return {"type": "MultiPoint", "coordinates": []} else: # multipoint return { - 'type': 'MultiPoint', - 'coordinates': [tuple(p) for p in self.points] + "type": "MultiPoint", + "coordinates": [tuple(p) for p in self.points], } elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'LineString', 'coordinates':[]} + return {"type": "LineString", "coordinates": []} elif len(self.parts) == 1: # linestring return { - 'type': 'LineString', - 'coordinates': [tuple(p) for p in self.points] + "type": "LineString", + "coordinates": [tuple(p) for p in self.points], } else: # multilinestring ps = None coordinates = [] for part in self.parts: - if ps == None: + if ps is None: ps = part continue else: @@ -474,16 +494,13 @@ def __geo_interface__(self): ps = part else: coordinates.append([tuple(p) for p in self.points[part:]]) - return { - 'type': 'MultiLineString', - 'coordinates': coordinates - } + return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Polygon', 'coordinates':[]} + return {"type": "Polygon", "coordinates": []} else: # get all polygon rings rings = [] @@ -491,7 +508,7 @@ def __geo_interface__(self): # get indexes of start and end points of the ring start = self.parts[i] try: - end = self.parts[i+1] + end = self.parts[i + 1] except IndexError: end = len(self.points) @@ -506,35 +523,40 @@ def __geo_interface__(self): # if VERBOSE is True, issue detailed warning about any shape errors # encountered during the Shapefile to GeoJSON conversion if VERBOSE and self._errors: - header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) - orphans = self._errors.get('polygon_orphaned_holes', None) + header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( + self.oid + ) + orphans = self._errors.get("polygon_orphaned_holes", None) if orphans: - msg = header + 'Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ -encoded as GeoJSON exterior rings instead of holes.' +encoded as GeoJSON exterior rings instead of holes." + ) logger.warning(msg) - only_holes = self._errors.get('polygon_only_holes', None) + only_holes = self._errors.get("polygon_only_holes", None) if only_holes: - msg = header + 'Shapefile format requires that polygons contain at least one exterior ring, \ + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ -still included but were encoded as GeoJSON exterior rings instead of holes.' +still included but were encoded as GeoJSON exterior rings instead of holes." + ) logger.warning(msg) # return as geojson if len(polys) == 1: - return { - 'type': 'Polygon', - 'coordinates': polys[0] - } + return {"type": "Polygon", "coordinates": polys[0]} else: - return { - 'type': 'MultiPolygon', - 'coordinates': polys - } + return {"type": "MultiPolygon", "coordinates": polys} else: - raise Exception('Shape type "%s" cannot be represented as GeoJSON.' % SHAPETYPE_LOOKUP[self.shapeType]) + raise Exception( + 'Shape type "%s" cannot be represented as GeoJSON.' + % SHAPETYPE_LOOKUP[self.shapeType] + ) @staticmethod def _from_geojson(geoj): @@ -562,16 +584,16 @@ def _from_geojson(geoj): # set points and parts if geojType == "Point": - shape.points = [ geoj["coordinates"] ] + shape.points = [geoj["coordinates"]] shape.parts = [0] - elif geojType in ("MultiPoint","LineString"): + elif geojType in ("MultiPoint", "LineString"): shape.points = geoj["coordinates"] shape.parts = [0] elif geojType in ("Polygon"): points = [] parts = [] index = 0 - for i,ext_or_hole in enumerate(geoj["coordinates"]): + for i, ext_or_hole in enumerate(geoj["coordinates"]): # although the latest GeoJSON spec states that exterior rings should have # counter-clockwise orientation, we explicitly check orientation since older # GeoJSONs might not enforce this. @@ -601,7 +623,7 @@ def _from_geojson(geoj): parts = [] index = 0 for polygon in geoj["coordinates"]: - for i,ext_or_hole in enumerate(polygon): + for i, ext_or_hole in enumerate(polygon): # although the latest GeoJSON spec states that exterior rings should have # counter-clockwise orientation, we explicitly check orientation since older # GeoJSONs might not enforce this. @@ -628,7 +650,8 @@ def shapeTypeName(self): return SHAPETYPE_LOOKUP[self.shapeType] def __repr__(self): - return 'Shape #{}: {}'.format(self.__oid, self.shapeTypeName) + return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) + class _Record(list): """ @@ -673,14 +696,16 @@ def __getattr__(self, item): corresponding value in the Record does not exist """ try: - if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() - raise AttributeError('_Record does not implement __setstate__') + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError("_Record does not implement __setstate__") index = self.__field_positions[item] return list.__getitem__(self, index) except KeyError: - raise AttributeError('{} is not a field name'.format(item)) + raise AttributeError("{} is not a field name".format(item)) except IndexError: - raise IndexError('{} found as a field but not enough values available.'.format(item)) + raise IndexError( + "{} found as a field but not enough values available.".format(item) + ) def __setattr__(self, key, value): """ @@ -690,13 +715,13 @@ def __setattr__(self, key, value): :return: None :raises: AttributeError, if key is not a field of the shapefile """ - if key.startswith('_'): # Prevent infinite loop when setting mangled attribute + if key.startswith("_"): # Prevent infinite loop when setting mangled attribute return list.__setattr__(self, key, value) try: index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError('{} is not a field name'.format(key)) + raise AttributeError("{} is not a field name".format(key)) def __getitem__(self, item): """ @@ -735,7 +760,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) else: - raise IndexError('{} is not a field name and not an int'.format(key)) + raise IndexError("{} is not a field name and not an int".format(key)) @property def oid(self): @@ -749,13 +774,13 @@ def as_dict(self, date_strings=False): """ dct = dict((f, self[i]) for f, i in self.__field_positions.items()) if date_strings: - for k,v in dct.items(): + for k, v in dct.items(): if isinstance(v, date): - dct[k] = '{:04d}{:02d}{:02d}'.format(v.year, v.month, v.day) + dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) return dct def __repr__(self): - return 'Record #{}: {}'.format(self.__oid, list(self)) + return "Record #{}: {}".format(self.__oid, list(self)) def __dir__(self): """ @@ -764,22 +789,33 @@ def __dir__(self): :return: List of method names and fields """ - default = list(dir(type(self))) # default list methods and attributes of this class - fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) - return default + fnames - + default = list( + dir(type(self)) + ) # default list methods and attributes of this class + fnames = list( + self.__field_positions.keys() + ) # plus field names (random order if Python version < 3.6) + return default + fnames + + class ShapeRecord: """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" + def __init__(self, shape=None, record=None): self.shape = shape self.record = record @property def __geo_interface__(self): - return {'type': 'Feature', - 'properties': self.record.as_dict(date_strings=True), - 'geometry': None if self.shape.shapeType == NULL else self.shape.__geo_interface__} + return { + "type": "Feature", + "properties": self.record.as_dict(date_strings=True), + "geometry": None + if self.shape.shapeType == NULL + else self.shape.__geo_interface__, + } + class Shapes(list): """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with @@ -788,16 +824,19 @@ class Shapes(list): to return a GeometryCollection dictionary.""" def __repr__(self): - return 'Shapes: {}'.format(list(self)) + return "Shapes: {}".format(list(self)) @property def __geo_interface__(self): # Note: currently this will fail if any of the shapes are null-geometries # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - collection = {'type': 'GeometryCollection', - 'geometries': [shape.__geo_interface__ for shape in self]} + collection = { + "type": "GeometryCollection", + "geometries": [shape.__geo_interface__ for shape in self], + } return collection + class ShapeRecords(list): """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with former work and to reuse all the optimizations of the builtin list. @@ -805,18 +844,23 @@ class ShapeRecords(list): to return a FeatureCollection dictionary.""" def __repr__(self): - return 'ShapeRecords: {}'.format(list(self)) + return "ShapeRecords: {}".format(list(self)) @property def __geo_interface__(self): - collection = {'type': 'FeatureCollection', - 'features': [shaperec.__geo_interface__ for shaperec in self]} + collection = { + "type": "FeatureCollection", + "features": [shaperec.__geo_interface__ for shaperec in self], + } return collection + class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" + pass + class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -837,6 +881,7 @@ class Reader: efficiently as possible. Shapefiles are usually not large but they can be. """ + def __init__(self, *args, **kwargs): self.shp = None self.shx = None @@ -850,106 +895,140 @@ def __init__(self, *args, **kwargs): self.fields = [] self.__dbfHdrLength = 0 self.__fieldLookup = {} - self.encoding = kwargs.pop('encoding', 'utf-8') - self.encodingErrors = kwargs.pop('encodingErrors', 'strict') + self.encoding = kwargs.pop("encoding", "utf-8") + self.encodingErrors = kwargs.pop("encodingErrors", "strict") # See if a shapefile name was passed as the first argument if len(args) > 0: path = pathlike_obj(args[0]) if is_string(path): - - if '.zip' in path: + if ".zip" in path: # Shapefile is inside a zipfile - if path.count('.zip') > 1: + if path.count(".zip") > 1: # Multiple nested zipfiles - raise ShapefileException('Reading from multiple nested zipfiles is not supported: %s' % path) + raise ShapefileException( + "Reading from multiple nested zipfiles is not supported: %s" + % path + ) # Split into zipfile and shapefile paths - if path.endswith('.zip'): + if path.endswith(".zip"): zpath = path shapefile = None else: - zpath = path[:path.find('.zip')+4] - shapefile = path[path.find('.zip')+4+1:] + zpath = path[: path.find(".zip") + 4] + shapefile = path[path.find(".zip") + 4 + 1 :] # Create a zip file handle - if zpath.startswith('http'): + if zpath.startswith("http"): # Zipfile is from a url # Download to a temporary url and treat as normal zipfile - req = Request(zpath, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + req = Request( + zpath, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) resp = urlopen(req) # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected - zipfileobj = tempfile.NamedTemporaryFile(mode='w+b', suffix='.zip', delete=True) + zipfileobj = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".zip", delete=True + ) zipfileobj.write(resp.read()) zipfileobj.seek(0) else: # Zipfile is from a file - zipfileobj = open(zpath, mode='rb') + zipfileobj = open(zpath, mode="rb") # Open the zipfile archive - with zipfile.ZipFile(zipfileobj, 'r') as archive: + with zipfile.ZipFile(zipfileobj, "r") as archive: if not shapefile: # Only the zipfile path is given # Inspect zipfile contents to find the full shapefile path - shapefiles = [name - for name in archive.namelist() - if (name.endswith('.SHP') or name.endswith('.shp'))] + shapefiles = [ + name + for name in archive.namelist() + if (name.endswith(".SHP") or name.endswith(".shp")) + ] # The zipfile must contain exactly one shapefile if len(shapefiles) == 0: - raise ShapefileException('Zipfile does not contain any shapefiles') + raise ShapefileException( + "Zipfile does not contain any shapefiles" + ) elif len(shapefiles) == 1: shapefile = shapefiles[0] else: - raise ShapefileException('Zipfile contains more than one shapefile: %s. Please specify the full \ - path to the shapefile you would like to open.' % shapefiles ) + raise ShapefileException( + "Zipfile contains more than one shapefile: %s. Please specify the full \ + path to the shapefile you would like to open." + % shapefiles + ) # Try to extract file-like objects from zipfile - shapefile = os.path.splitext(shapefile)[0] # root shapefile name - for ext in ['SHP','SHX','DBF','shp','shx','dbf']: + shapefile = os.path.splitext(shapefile)[ + 0 + ] # root shapefile name + for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: try: - member = archive.open(shapefile+'.'+ext) + member = archive.open(shapefile + "." + ext) + except zipfile.BadZipFile: + pass + else: # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) fileobj.write(member.read()) fileobj.seek(0) setattr(self, ext.lower(), fileobj) self._files_to_close.append(fileobj) - except: - pass # Close and delete the temporary zipfile - try: zipfileobj.close() - except: pass + try: + zipfileobj.close() + except OSError: + pass # Try to load shapefile - if (self.shp or self.dbf): + if self.shp or self.dbf: # Load and exit early self.load() return else: - raise ShapefileException("No shp or dbf file found in zipfile: %s" % path) + raise ShapefileException( + "No shp or dbf file found in zipfile: %s" % path + ) - elif path.startswith('http'): + elif path.startswith("http"): # Shapefile is from a url # Download each file to temporary path and treat as normal shapefile path urlinfo = urlparse(path) urlpath = urlinfo[2] - urlpath,_ = os.path.splitext(urlpath) + urlpath, _ = os.path.splitext(urlpath) shapefile = os.path.basename(urlpath) - for ext in ['shp','shx','dbf']: + for ext in ["shp", "shx", "dbf"]: try: _urlinfo = list(urlinfo) - _urlinfo[2] = urlpath + '.' + ext + _urlinfo[2] = urlpath + "." + ext _path = urlunparse(_urlinfo) - req = Request(_path, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + req = Request( + _path, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) resp = urlopen(req) # write url data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) fileobj.write(resp.read()) fileobj.seek(0) setattr(self, ext, fileobj) self._files_to_close.append(fileobj) except HTTPError: pass - if (self.shp or self.dbf): + if self.shp or self.dbf: # Load and exit early self.load() return else: - raise ShapefileException("No shp or dbf file found at url: %s" % path) + raise ShapefileException( + "No shp or dbf file found at url: %s" % path + ) else: # Local file path to a shapefile @@ -1002,14 +1081,18 @@ def __str__(self): """ Use some general info on the shapefile as __str__ """ - info = ['shapefile Reader'] + info = ["shapefile Reader"] if self.shp: - info.append(" {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType])) + info.append( + " {} shapes (type '{}')".format( + len(self), SHAPETYPE_LOOKUP[self.shapeType] + ) + ) if self.dbf: - info.append(' {} records ({} fields)'.format( - len(self), len(self.fields))) - return '\n'.join(info) + info.append( + " {} records ({} fields)".format(len(self), len(self.fields)) + ) + return "\n".join(info) def __enter__(self): """ @@ -1046,11 +1129,11 @@ def __len__(self): # Determine length of shp file shp = self.shp checkpoint = shp.tell() - shp.seek(0,2) + shp.seek(0, 2) shpLength = shp.tell() shp.seek(100) # Do a fast shape iteration until end of file. - unpack = Struct('>2i').unpack + unpack = Struct(">2i").unpack offsets = [] pos = shp.tell() while pos < shpLength: @@ -1081,7 +1164,7 @@ def __iter__(self): def __geo_interface__(self): shaperecords = self.shapeRecords() fcollection = shaperecords.__geo_interface__ - fcollection['bbox'] = list(self.bbox) + fcollection["bbox"] = list(self.bbox) return fcollection @property @@ -1099,7 +1182,9 @@ def load(self, shapefile=None): self.load_shx(shapeName) self.load_dbf(shapeName) if not (self.shp or self.dbf): - raise ShapefileException("Unable to open %s.dbf or %s.shp." % (shapeName, shapeName)) + raise ShapefileException( + "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) + ) if self.shp: self.__shpHeader() if self.dbf: @@ -1111,7 +1196,7 @@ def load_shp(self, shapefile_name): """ Attempts to load file with .shp extension as both lower and upper case """ - shp_ext = 'shp' + shp_ext = "shp" try: self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") self._files_to_close.append(self.shp) @@ -1126,7 +1211,7 @@ def load_shx(self, shapefile_name): """ Attempts to load file with .shx extension as both lower and upper case """ - shx_ext = 'shx' + shx_ext = "shx" try: self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") self._files_to_close.append(self.shx) @@ -1141,7 +1226,7 @@ def load_dbf(self, shapefile_name): """ Attempts to load file with .dbf extension as both lower and upper case """ - dbf_ext = 'dbf' + dbf_ext = "dbf" try: self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") self._files_to_close.append(self.dbf) @@ -1158,7 +1243,7 @@ def __del__(self): def close(self): # Close any files that the reader opened (but not those given by user) for attribute in self._files_to_close: - if hasattr(attribute, 'close'): + if hasattr(attribute, "close"): try: attribute.close() except IOError: @@ -1169,7 +1254,9 @@ def __getFileObj(self, f): """Checks to see if the requested shapefile file object is available. If not a ShapefileException is raised.""" if not f: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object." + ) if self.shp and self.shpLength is None: self.load() if self.dbf and len(self.fields) == 0: @@ -1183,27 +1270,30 @@ def __restrictIndex(self, i): rmax = self.numRecords - 1 if abs(i) > rmax: raise IndexError("Shape or Record index out of range.") - if i < 0: i = range(self.numRecords)[i] + if i < 0: + i = range(self.numRecords)[i] return i def __shpHeader(self): """Reads the header information from a .shp file.""" if not self.shp: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shp file found" + ) shp = self.shp # File length (16-bit word * 2 = bytes) shp.seek(24) self.shpLength = unpack(">i", shp.read(4))[0] * 2 # Shape type shp.seek(32) - self.shapeType= unpack(" NODATA: self.mbox.append(m) @@ -1224,8 +1314,8 @@ def __shape(self, oid=None, bbox=None): if shapeType == 0: record.points = [] # All shape types capable of having a bounding box - elif shapeType in (3,5,8,13,15,18,23,25,28,31): - record.bbox = _Array('d', unpack("<4d", f.read(32))) + elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): + record.bbox = _Array("d", unpack("<4d", f.read(32))) # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, record.bbox): # because we stop parsing this shape, skip to beginning of @@ -1233,33 +1323,33 @@ def __shape(self, oid=None, bbox=None): f.seek(next) return None # Shape types with parts - if shapeType in (3,5,13,15,23,25,31): + if shapeType in (3, 5, 13, 15, 23, 25, 31): nParts = unpack("= 16: (mmin, mmax) = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next - f.tell() >= nPoints * 8: record.m = [] - for m in _Array('d', unpack("<%sd" % nPoints, f.read(nPoints * 8))): + for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): if m > NODATA: record.m.append(m) else: @@ -1267,8 +1357,8 @@ def __shape(self, oid=None, bbox=None): else: record.m = [None for _ in range(nPoints)] # Read a single point - if shapeType in (1,11,21): - record.points = [_Array('d', unpack("<2d", f.read(16)))] + if shapeType in (1, 11, 21): + record.points = [_Array("d", unpack("<2d", f.read(16)))] if bbox is not None: # create bounding box for Point by duplicating coordinates point_bbox = list(record.points[0] + record.points[0]) @@ -1280,7 +1370,7 @@ def __shape(self, oid=None, bbox=None): if shapeType == 11: record.z = list(unpack("= 8: (m,) = unpack("i", shx.read(4))[0] * 2) - 100 self.numShapes = shxRecordLength // 8 def __shxOffsets(self): - '''Reads the shape offset positions from a .shx file''' + """Reads the shape offset positions from a .shx file""" shx = self.shx if not shx: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) # Jump to the first record. shx.seek(100) # Each index record consists of two nrs, we only want the first one - shxRecords = _Array('i', shx.read(2 * self.numShapes * 4) ) - if sys.byteorder != 'big': + shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) + if sys.byteorder != "big": shxRecords.byteswap() self._offsets = [2 * el for el in shxRecords[::2]] @@ -1324,7 +1418,7 @@ def __shapeIndex(self, i=None): in the .shx index file.""" shx = self.shx # Return None if no shx or no index requested - if not shx or i == None: + if not shx or i is None: return None # At this point, we know the shx file exists if not self._offsets: @@ -1343,11 +1437,11 @@ def shape(self, i=0, bbox=None): if not offset: # Shx index not available. # Determine length of shp file - shp.seek(0,2) + shp.seek(0, 2) shpLength = shp.tell() shp.seek(100) # Do a fast shape iteration until the requested index or end of file. - unpack = Struct('>2i').unpack + unpack = Struct(">2i").unpack _i = 0 offset = shp.tell() while offset < shpLength: @@ -1362,7 +1456,11 @@ def shape(self, i=0, bbox=None): _i += 1 # If the index was not found, it likely means the .shp file is incomplete if _i != i: - raise ShapefileException('Shape index {} is out of bounds; the .shp file only contains {} shapes'.format(i, _i)) + raise ShapefileException( + "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( + i, _i + ) + ) # Seek to the offset and read the shape shp.seek(offset) @@ -1388,7 +1486,7 @@ def iterShapes(self, bbox=None): # shp file length in the header. Can't trust # that so we seek to the end of the file # and figure it out. - shp.seek(0,2) + shp.seek(0, 2) shpLength = shp.tell() shp.seek(100) @@ -1422,12 +1520,15 @@ def iterShapes(self, bbox=None): def __dbfHeader(self): """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" if not self.dbf: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" + ) dbf = self.dbf # read relevant header parts dbf.seek(0) - self.numRecords, self.__dbfHdrLength, self.__recordLength = \ - unpack("6i", 9994,0,0,0,0,0)) + f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) # File length (Bytes / 2 = 16-bit words) - if headerType == 'shp': + if headerType == "shp": f.write(pack(">i", self.__shpFileLength())) - elif headerType == 'shx': - f.write(pack('>i', ((100 + (self.shpNum * 8)) // 2))) + elif headerType == "shx": + f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) # Version, Shape type if self.shapeType is None: self.shapeType = NULL @@ -1942,37 +2086,41 @@ def __shapefileHeader(self, fileObj, headerType='shp'): # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] + bbox = [0, 0, 0, 0] f.write(pack("<4d", *bbox)) except error: - raise ShapefileException("Failed to write shapefile bounding box. Floats required.") + raise ShapefileException( + "Failed to write shapefile bounding box. Floats required." + ) else: - f.write(pack("<4d", 0,0,0,0)) + f.write(pack("<4d", 0, 0, 0, 0)) # Elevation - if self.shapeType in (11,13,15,18): + if self.shapeType in (11, 13, 15, 18): # Z values are present in Z type zbox = self.zbox() if zbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = [0,0] + zbox = [0, 0] else: # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0,0] + zbox = [0, 0] # Measure - if self.shapeType in (11,13,15,18,21,23,25,28,31): + if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): # M values are present in M or Z type mbox = self.mbox() if mbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = [0,0] + mbox = [0, 0] else: # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0,0] + mbox = [0, 0] # Try writing try: f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) except error: - raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") + raise ShapefileException( + "Failed to write shapefile elevation and measure values. Floats required." + ) def __dbfHeader(self): """Writes the dbf header and field descriptors.""" @@ -1982,32 +2130,43 @@ def __dbfHeader(self): year, month, day = time.localtime()[:3] year -= 1900 # Get all fields, ignoring DeletionFlag if specified - fields = [field for field in self.fields if field[0] != 'DeletionFlag'] + fields = [field for field in self.fields if field[0] != "DeletionFlag"] # Ensure has at least one field if not fields: - raise ShapefileException("Shapefile dbf file must contain at least one field.") + raise ShapefileException( + "Shapefile dbf file must contain at least one field." + ) numRecs = self.recNum numFields = len(fields) headerLength = numFields * 32 + 33 if headerLength >= 65535: raise ShapefileException( - "Shapefile dbf header length exceeds maximum length.") + "Shapefile dbf header length exceeds maximum length." + ) recordLength = sum([int(field[2]) for field in fields]) + 1 - header = pack(' 2 else 0)) for p in s.points] except error: - raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) + raise ShapefileException( + "Failed to write elevation values for record %s. Expected floats." + % self.shpNum + ) # Write m extremes and values # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA # Note: missing m values are autoset to NODATA. - if s.shapeType in (13,15,18,23,25,28,31): + if s.shapeType in (13, 15, 18, 23, 25, 28, 31): try: f.write(pack("<2d", *self.__mbox(s))) except error: - raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) + raise ShapefileException( + "Failed to write measure extremes for record %s. Expected floats" + % self.shpNum + ) try: - if hasattr(s,"m"): + if hasattr(s, "m"): # if m values are stored in attribute - f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) + f.write( + pack( + "<%sd" % len(s.m), + *[m if m is not None else NODATA for m in s.m], + ) + ) else: # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13,15,18,31) else 2 - [f.write(pack(" mpos and p[mpos] is not None else NODATA)) for p in s.points] + mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 + [ + f.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) + ) + for p in s.points + ] except error: - raise ShapefileException("Failed to write measure values for record %s. Expected floats" % self.shpNum) + raise ShapefileException( + "Failed to write measure values for record %s. Expected floats" + % self.shpNum + ) # Write a single point - if s.shapeType in (1,11,21): + if s.shapeType in (1, 11, 21): try: f.write(pack("<2d", s.points[0][0], s.points[0][1])) except error: - raise ShapefileException("Failed to write point for record %s. Expected floats." % self.shpNum) + raise ShapefileException( + "Failed to write point for record %s. Expected floats." + % self.shpNum + ) # Write a single Z value # Note: missing z values are autoset to 0, but not sure if this is ideal. if s.shapeType == 11: @@ -2127,7 +2327,10 @@ def __shpRecord(self, s): s.z = (0,) f.write(pack("i", length)) f.seek(finish) - return offset,length + return offset, length def __shxRecord(self, offset, length): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - try: - f.write(pack(">i", offset // 2)) - except error: - raise ShapefileException('The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones.') - f.write(pack(">i", length)) + """Writes the shx records.""" + f = self.__getFileObj(self.shx) + try: + f.write(pack(">i", offset // 2)) + except error: + raise ShapefileException( + "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." + ) + f.write(pack(">i", length)) def record(self, *recordList, **recordDict): """Creates a dbf attribute record. You can submit either a sequence of @@ -2192,7 +2406,7 @@ def record(self, *recordList, **recordDict): if self.autoBalance and self.recNum > self.shpNum: self.balance() - fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) + fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) if recordList: record = list(recordList) while len(record) < fieldCount: @@ -2200,8 +2414,8 @@ def record(self, *recordList, **recordDict): elif recordDict: record = [] for field in self.fields: - if field[0] == 'DeletionFlag': - continue # ignore deletionflag field in case it was specified + if field[0] == "DeletionFlag": + continue # ignore deletionflag field in case it was specified if field[0] in recordDict: val = recordDict[field[0]] if val is None: @@ -2209,7 +2423,7 @@ def record(self, *recordList, **recordDict): else: record.append(val) else: - record.append("") # need empty value for missing dict entries + record.append("") # need empty value for missing dict entries else: # Blank fields for empty record record = ["" for _ in range(fieldCount)] @@ -2224,18 +2438,20 @@ def __dbfRecord(self, record): # cannot change the fields after this point self.__dbfHeader() # first byte of the record is deletion flag, always disabled - f.write(b' ') + f.write(b" ") # begin self.recNum += 1 - fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): # write fieldType = fieldType.upper() size = int(size) - if fieldType in ("N","F"): + if fieldType in ("N", "F"): # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. if value in MISSING: - value = b"*"*size # QGIS NULL + value = b"*" * size # QGIS NULL elif not deci: # force to int try: @@ -2246,42 +2462,54 @@ def __dbfRecord(self, record): except ValueError: # forcing directly to int failed, so was probably a float. value = int(float(value)) - value = format(value, "d")[:size].rjust(size) # caps the size if exceeds the field size + value = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size else: value = float(value) - value = format(value, ".%sf"%deci)[:size].rjust(size) # caps the size if exceeds the field size + value = format(value, ".%sf" % deci)[:size].rjust( + size + ) # caps the size if exceeds the field size elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if isinstance(value, date): - value = '{:04d}{:02d}{:02d}'.format(value.year, value.month, value.day) + value = "{:04d}{:02d}{:02d}".format( + value.year, value.month, value.day + ) elif isinstance(value, list) and len(value) == 3: - value = '{:04d}{:02d}{:02d}'.format(*value) + value = "{:04d}{:02d}{:02d}".format(*value) elif value in MISSING: - value = b'0' * 8 # QGIS NULL for date type + value = b"0" * 8 # QGIS NULL for date type elif is_string(value) and len(value) == 8: - pass # value is already a date string + pass # value is already a date string else: - raise ShapefileException("Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value.") - elif fieldType == 'L': + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value in MISSING: - value = b' ' # missing is set to space - elif value in [True,1]: - value = b'T' - elif value in [False,0]: - value = b'F' + value = b" " # missing is set to space + elif value in [True, 1]: + value = b"T" + elif value in [False, 0]: + value = b"F" else: - value = b' ' # unknown is set to space + value = b" " # unknown is set to space else: # anything else is forced to string, truncated to the length of the field value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) if not isinstance(value, bytes): # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b(value, 'ascii', self.encodingErrors) # should be default ascii encoding + value = b( + value, "ascii", self.encodingErrors + ) # should be default ascii encoding if len(value) != size: raise ShapefileException( "Shapefile Writer unable to pack incorrect sized value" - " (size %d) into field '%s' (size %d)." % (len(value), fieldName, size)) + " (size %d) into field '%s' (size %d)." + % (len(value), fieldName, size) + ) f.write(value) def balance(self): @@ -2293,12 +2521,10 @@ def balance(self): while self.recNum < self.shpNum: self.record() - def null(self): """Creates a null shape.""" self.shape(Shape(NULL)) - def point(self, x, y): """Creates a POINT shape.""" shapeType = POINT @@ -2323,12 +2549,13 @@ def pointz(self, x, y, z=0, m=None): pointShape.points.append([x, y, z, m]) self.shape(pointShape) - def multipoint(self, points): """Creates a MULTIPOINT shape. Points is a list of xy values.""" shapeType = MULTIPOINT - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=points, shapeType=shapeType) def multipointm(self, points): @@ -2336,7 +2563,9 @@ def multipointm(self, points): Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = MULTIPOINTM - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=points, shapeType=shapeType) def multipointz(self, points): @@ -2345,10 +2574,11 @@ def multipointz(self, points): If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = MULTIPOINTZ - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=points, shapeType=shapeType) - def line(self, lines): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" @@ -2370,7 +2600,6 @@ def linez(self, lines): shapeType = POLYLINEZ self._shapeparts(parts=lines, shapeType=shapeType) - def poly(self, polys): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. @@ -2398,7 +2627,6 @@ def polyz(self, polys): shapeType = POLYGONZ self._shapeparts(parts=polys, shapeType=shapeType) - def multipatch(self, parts, partTypes): """Creates a MULTIPATCH shape. Parts is a collection of 3D surface patches, each made up of a list of xyzm values. @@ -2424,7 +2652,6 @@ def multipatch(self, parts, partTypes): # write the shape self.shape(polyShape) - def _shapeparts(self, parts, shapeType): """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. @@ -2433,7 +2660,7 @@ def _shapeparts(self, parts, shapeType): polyShape.parts = [] polyShape.points = [] # Make sure polygon rings (parts) are closed - if shapeType in (5,15,25,31): + if shapeType in (5, 15, 25, 31): for part in parts: if part[0] != part[-1]: part.append(part[0]) @@ -2460,20 +2687,23 @@ def field(self, name, fieldType="C", size="50", decimal=0): decimal = 0 if len(self.fields) >= 2046: raise ShapefileException( - "Shapefile Writer reached maximum number of fields: 2046.") + "Shapefile Writer reached maximum number of fields: 2046." + ) self.fields.append((name, fieldType, size, decimal)) # Begin Testing def test(**kwargs): import doctest + doctest.NORMALIZE_WHITESPACE = 1 - verbosity = kwargs.get('verbose', 0) + verbosity = kwargs.get("verbose", 0) if verbosity == 0: - print('Running doctests...') + print("Running doctests...") # ignore py2-3 unicode differences import re + class Py23DocChecker(doctest.OutputChecker): def check_output(self, want, got, optionflags): if sys.version_info[0] == 2: @@ -2481,13 +2711,20 @@ def check_output(self, want, got, optionflags): got = re.sub('u"(.*?)"', '"\\1"', got) res = doctest.OutputChecker.check_output(self, want, got, optionflags) return res + def summarize(self): doctest.OutputChecker.summarize(True) # run tests runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - with open("README.md","rb") as fobj: - test = doctest.DocTestParser().get_doctest(string=fobj.read().decode("utf8").replace('\r\n','\n'), globs={}, name="README", filename="README.md", lineno=0) + with open("README.md", "rb") as fobj: + test = doctest.DocTestParser().get_doctest( + string=fobj.read().decode("utf8").replace("\r\n", "\n"), + globs={}, + name="README", + filename="README.md", + lineno=0, + ) failure_count, test_count = runner.run(test) # print results @@ -2495,12 +2732,13 @@ def summarize(self): runner.summarize(True) else: if failure_count == 0: - print('All test passed successfully') + print("All test passed successfully") elif failure_count > 0: runner.summarize(verbosity) return failure_count + if __name__ == "__main__": """ Doctests are contained in the file 'README.md', and are tested using the built-in diff --git a/test_shapefile.py b/test_shapefile.py index f4a786f..a90d4d1 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1,6 +1,7 @@ """ This module tests the functionality of shapefile.py. """ + # std lib imports import datetime import json @@ -15,184 +16,418 @@ # define various test shape tuples of (type, points, parts indexes, and expected geo interface output) -geo_interface_tests = [ (shapefile.POINT, # point - [(1,1)], - [], - {'type':'Point','coordinates':(1,1)} - ), - (shapefile.MULTIPOINT, # multipoint - [(1,1),(2,1),(2,2)], - [], - {'type':'MultiPoint','coordinates':[(1,1),(2,1),(2,2)]} - ), - (shapefile.POLYLINE, # single linestring - [(1,1),(2,1)], - [0], - {'type':'LineString','coordinates':[(1,1),(2,1)]} - ), - (shapefile.POLYLINE, # multi linestring - [(1,1),(2,1), # line 1 - (10,10),(20,10)], # line 2 - [0,2], - {'type':'MultiLineString','coordinates':[ - [(1,1),(2,1)], # line 1 - [(10,10),(20,10)] # line 2 - ]} - ), - (shapefile.POLYGON, # single polygon, no holes - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - ], - [0], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], - ]} - ), - (shapefile.POLYGON, # single polygon, holes (ordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 - ], - [0,5,5+5], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ]} - ), - (shapefile.POLYGON, # single polygon, holes (unordered) - [ - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 - (1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 - ], - [0,5,5+5], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ]} - ), - (shapefile.POLYGON, # multi polygon, no holes - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior - ], - [0,5], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, holes (unordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 - (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 - (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 - ], - [0,5,10,15,20,25], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], # exterior - [(12,12),(14,12),(14,14),(12,14),(12,12)], # hole 1 - [(15,15),(17,15),(17,17),(15,17),(15,15)], # hole 2 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 - (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 - (4,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 - (2,2),(8,2),(8,8),(2,8),(2,2), # hole 1.1 - ], - [0,5,10,15,20], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior 1 - [(2,2),(8,2),(8,8),(2,8),(2,2)], # hole 1.1 - ], - [ # poly 2 - [(3,3),(3,7),(7,7),(7,3),(3,3)], # exterior 2 - [(4,4),(6,4),(6,6),(4,6),(4,4)], # hole 2.1 - ], - [ # poly 3 - [(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)], # exterior 3 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 - (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 - (4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 (hole has duplicate coords) - (2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2), # hole 1.1 (hole coords form straight line and starts in concave orientation) - ], - [0,5,10,15,20+3], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior 1 - [(2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2)], # hole 1.1 - ], - [ # poly 2 - [(3,3),(3,7),(7,7),(7,3),(3,3)], # exterior 2 - [(4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4)], # hole 2.1 - ], - [ # poly 3 - [(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)], # exterior 3 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 - (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 - (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 - (95,95),(97,95),(97,97),(95,97),(95,95), # hole x.1 (orphaned hole, should be interpreted as exterior) - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 - ], - [0,5,10,15,20,25,30], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], # exterior - [(12,12),(14,12),(14,14),(12,14),(12,12)], # hole 1 - [(15,15),(17,15),(17,17),(15,17),(15,15)], # hole 2 - ], - [ # poly 3 (orphaned hole) - [(95,95),(97,95),(97,97),(95,97),(95,95)], # exterior - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning - [(1,1),(9,1),(9,9),(1,9),(1,1), # exterior with hole-orientation - (11,11),(19,11),(19,19),(11,19),(11,11), # exterior with hole-orientation - ], - [0,5], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(9,1),(9,9),(1,9),(1,1)], - ], - [ # poly 2 - [(11,11),(19,11),(19,19),(11,19),(11,11)], - ], - ]} - ), - ] +geo_interface_tests = [ + ( + shapefile.POINT, # point + [(1, 1)], + [], + {"type": "Point", "coordinates": (1, 1)}, + ), + ( + shapefile.MULTIPOINT, # multipoint + [(1, 1), (2, 1), (2, 2)], + [], + {"type": "MultiPoint", "coordinates": [(1, 1), (2, 1), (2, 2)]}, + ), + ( + shapefile.POLYLINE, # single linestring + [(1, 1), (2, 1)], + [0], + {"type": "LineString", "coordinates": [(1, 1), (2, 1)]}, + ), + ( + shapefile.POLYLINE, # multi linestring + [ + (1, 1), + (2, 1), # line 1 + (10, 10), + (20, 10), + ], # line 2 + [0, 2], + { + "type": "MultiLineString", + "coordinates": [ + [(1, 1), (2, 1)], # line 1 + [(10, 10), (20, 10)], # line 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + ], + [0], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (ordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (unordered) + [ + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 + (2, 2), + (8, 2), + (8, 8), + (2, 8), + (2, 2), # hole 1.1 + ], + [0, 5, 10, 15, 20], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [(2, 2), (8, 2), (8, 8), (2, 8), (2, 2)], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [(4, 4), (6, 4), (6, 6), (4, 6), (4, 4)], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 (hole has duplicate coords) + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + ( + 2, + 2, + ), # hole 1.1 (hole coords form straight line and starts in concave orientation) + ], + [0, 5, 10, 15, 20 + 3], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [ + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + (2, 2), + ], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [ + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), + ], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (95, 95), + (97, 95), + (97, 97), + (95, 97), + (95, 95), # hole x.1 (orphaned hole, should be interpreted as exterior) + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25, 30], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + [ # poly 3 (orphaned hole) + [(95, 95), (97, 95), (97, 97), (95, 97), (95, 95)], # exterior + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning + [ + (1, 1), + (9, 1), + (9, 9), + (1, 9), + (1, 1), # exterior with hole-orientation + (11, 11), + (19, 11), + (19, 19), + (11, 19), + (11, 11), # exterior with hole-orientation + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (9, 1), (9, 9), (1, 9), (1, 1)], + ], + [ # poly 2 + [(11, 11), (19, 11), (19, 19), (11, 19), (11, 11)], + ], + ], + }, + ), +] + def test_empty_shape_geo_interface(): """ @@ -204,6 +439,7 @@ def test_empty_shape_geo_interface(): with pytest.raises(Exception): shape.__geo_interface__ + @pytest.mark.parametrize("typ,points,parts,expected", geo_interface_tests) def test_expected_shape_geo_interface(typ, points, parts, expected): """ @@ -218,22 +454,22 @@ def test_expected_shape_geo_interface(typ, points, parts, expected): def test_reader_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.__geo_interface__ - assert geoj['type'] == 'FeatureCollection' - assert 'bbox' in geoj + assert geoj["type"] == "FeatureCollection" + assert "bbox" in geoj assert json.dumps(geoj) def test_shapes_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.shapes().__geo_interface__ - assert geoj['type'] == 'GeometryCollection' + assert geoj["type"] == "GeometryCollection" assert json.dumps(geoj) def test_shaperecords_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.shapeRecords().__geo_interface__ - assert geoj['type'] == 'FeatureCollection' + assert geoj["type"] == "FeatureCollection" assert json.dumps(geoj) @@ -253,7 +489,7 @@ def test_reader_url(): with shapefile.Reader(url) as sf: for recShape in sf.iterShapeRecords(): pass - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test without extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" @@ -261,7 +497,7 @@ def test_reader_url(): for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test no files found url = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/README.md" @@ -275,7 +511,7 @@ def test_reader_url(): for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True def test_reader_zip(): @@ -287,7 +523,7 @@ def test_reader_zip(): for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test require specific path when reading multi-shapefile zipfile with pytest.raises(shapefile.ShapefileException): @@ -295,18 +531,22 @@ def test_reader_zip(): pass # test specifying the path when reading multi-shapefile zipfile (with extension) - with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp") as sf: + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp" + ) as sf: for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test specifying the path when reading multi-shapefile zipfile (without extension) - with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2") as sf: + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2" + ) as sf: for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test raising error when can't find shapefile inside zipfile with pytest.raises(shapefile.ShapefileException): @@ -342,9 +582,9 @@ def test_reader_close_filelike(): """ # note uses an actual shapefile from # the projects "shapefiles" directory - shp = open("shapefiles/blockgroups.shp", mode='rb') - shx = open("shapefiles/blockgroups.shx", mode='rb') - dbf = open("shapefiles/blockgroups.dbf", mode='rb') + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") sf = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) sf.close() @@ -385,9 +625,9 @@ def test_reader_context_filelike(): """ # note uses an actual shapefile from # the projects "shapefiles" directory - shp = open("shapefiles/blockgroups.shp", mode='rb') - shx = open("shapefiles/blockgroups.shx", mode='rb') - dbf = open("shapefiles/blockgroups.dbf", mode='rb') + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as sf: pass @@ -406,7 +646,7 @@ def test_reader_shapefile_type(): is returned correctly. """ with shapefile.Reader("shapefiles/blockgroups") as sf: - assert sf.shapeType == 5 # 5 means Polygon + assert sf.shapeType == 5 # 5 means Polygon assert sf.shapeType == shapefile.POLYGON assert sf.shapeTypeName == "POLYGON" @@ -424,7 +664,7 @@ def test_reader_shapefile_length(): def test_shape_metadata(): with shapefile.Reader("shapefiles/blockgroups") as sf: shape = sf.shape(0) - assert shape.shapeType == 5 # Polygon + assert shape.shapeType == 5 # Polygon assert shape.shapeType == shapefile.POLYGON assert sf.shapeTypeName == "POLYGON" @@ -441,10 +681,10 @@ def test_reader_fields(): assert isinstance(fields, list) field = fields[0] - assert isinstance(field[0], str) # field name - assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type - assert isinstance(field[2], int) # field length - assert isinstance(field[3], int) # decimal length + assert isinstance(field[0], str) # field name + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type + assert isinstance(field[2], int) # field length + assert isinstance(field[3], int) # decimal length def test_reader_shapefile_extension_ignored(): @@ -480,7 +720,7 @@ def test_reader_dbf_only(): with shapefile.Reader(dbf="shapefiles/blockgroups.dbf") as sf: assert len(sf) == 663 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_shp_shx_only(): @@ -489,7 +729,9 @@ def test_reader_shp_shx_only(): shp and shx argument to the shapefile reader reads just the shp and shx file. """ - with shapefile.Reader(shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx") as sf: + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx" + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 @@ -501,12 +743,14 @@ def test_reader_shp_dbf_only(): shp and shx argument to the shapefile reader reads just the shp and dbf file. """ - with shapefile.Reader(shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf") as sf: + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf" + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_shp_only(): @@ -530,7 +774,7 @@ def test_reader_filelike_dbf_only(): with shapefile.Reader(dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: assert len(sf) == 663 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_filelike_shp_shx_only(): @@ -539,7 +783,10 @@ def test_reader_filelike_shp_shx_only(): shp and shx argument to the shapefile reader reads just the shp and shx file. """ - with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), shx=open("shapefiles/blockgroups.shx", "rb")) as sf: + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + shx=open("shapefiles/blockgroups.shx", "rb"), + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 @@ -551,12 +798,15 @@ def test_reader_filelike_shp_dbf_only(): shp and shx argument to the shapefile reader reads just the shp and dbf file. """ - with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + dbf=open("shapefiles/blockgroups.dbf", "rb"), + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_filelike_shp_only(): @@ -615,7 +865,9 @@ def test_record_attributes(fields=None): else: # default all fields record = full_record - fields = [field[0] for field in sf.fields[1:]] # fieldnames, sans del flag + fields = [ + field[0] for field in sf.fields[1:] + ] # fieldnames, sans del flag # check correct length assert len(record) == len(set(fields)) # check record values (should be in same order as shapefile fields) @@ -623,7 +875,9 @@ def test_record_attributes(fields=None): for field in sf.fields: field_name = field[0] if field_name in fields: - assert record[i] == record[field_name] == getattr(record, field_name) + assert ( + record[i] == record[field_name] == getattr(record, field_name) + ) i += 1 @@ -632,7 +886,7 @@ def test_record_subfields(): Assert that reader correctly retrieves only a subset of fields when specified. """ - fields = ["AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + fields = ["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] test_record_attributes(fields=fields) @@ -642,7 +896,7 @@ def test_record_subfields_unordered(): of fields when specified, given in random order but retrieved in the order of the shapefile fields. """ - fields = sorted(["AREA","POP1990","MALES","FEMALES","MOBILEHOME"]) + fields = sorted(["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"]) test_record_attributes(fields=fields) @@ -650,7 +904,7 @@ def test_record_subfields_delflag_notvalid(): """ Assert that reader does not consider DeletionFlag as a valid field name. """ - fields = ["DeletionFlag","AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + fields = ["DeletionFlag", "AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] with pytest.raises(ValueError): test_record_attributes(fields=fields) @@ -660,7 +914,7 @@ def test_record_subfields_duplicates(): Assert that reader correctly retrieves only a subset of fields when specified, handling duplicate input fields. """ - fields = ["AREA","AREA","AREA","MALES","MALES","MOBILEHOME"] + fields = ["AREA", "AREA", "AREA", "MALES", "MALES", "MOBILEHOME"] test_record_attributes(fields=fields) # check that only 3 values with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -705,13 +959,13 @@ def test_record_oid(): record = sf.record(i) assert record.oid == i - for i,record in enumerate(sf.records()): + for i, record in enumerate(sf.records()): assert record.oid == i - for i,record in enumerate(sf.iterRecords()): + for i, record in enumerate(sf.iterRecords()): assert record.oid == i - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.record.oid == i @@ -725,13 +979,13 @@ def test_shape_oid(): shape = sf.shape(i) assert shape.oid == i - for i,shape in enumerate(sf.shapes()): + for i, shape in enumerate(sf.shapes()): assert shape.oid == i - for i,shape in enumerate(sf.iterShapes()): + for i, shape in enumerate(sf.iterShapes()): assert shape.oid == i - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.shape.oid == i @@ -741,27 +995,29 @@ def test_shape_oid_no_shx(): its index in the shapefile, when shx file is missing. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') - with shapefile.Reader(shp=shp, dbf=dbf) as sf, \ - shapefile.Reader(basename) as sf_expected: + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") + with ( + shapefile.Reader(shp=shp, dbf=dbf) as sf, + shapefile.Reader(basename) as sf_expected, + ): for i in range(len(sf)): shape = sf.shape(i) assert shape.oid == i shape_expected = sf_expected.shape(i) assert shape.__geo_interface__ == shape_expected.__geo_interface__ - for i,shape in enumerate(sf.shapes()): + for i, shape in enumerate(sf.shapes()): assert shape.oid == i shape_expected = sf_expected.shape(i) assert shape.__geo_interface__ == shape_expected.__geo_interface__ - for i,shape in enumerate(sf.iterShapes()): + for i, shape in enumerate(sf.iterShapes()): assert shape.oid == i shape_expected = sf_expected.shape(i) assert shape.__geo_interface__ == shape_expected.__geo_interface__ - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.shape.oid == i shape_expected = sf_expected.shape(i) assert shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ @@ -777,7 +1033,7 @@ def test_reader_offsets(): # shx offsets should not be read during loading assert not sf._offsets # reading a shape index should trigger reading offsets from shx file - shape = sf.shape(3) + __shape = sf.shape(3) assert len(sf._offsets) == len(sf.shapes()) @@ -787,21 +1043,20 @@ def test_reader_offsets_no_shx(): the offsets unless necessary, i.e. reading all the shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(shp=shp, dbf=dbf) as sf: # offsets should not be built during loading assert not sf._offsets # reading a shape index should iterate to the shape # but the list of offsets should remain empty - shape = sf.shape(3) + __shape = sf.shape(3) assert not sf._offsets # reading all the shapes should build the list of offsets shapes = sf.shapes() assert len(sf._offsets) == len(shapes) - def test_reader_numshapes(): """ Assert that reader reads the numShapes attribute from the @@ -810,7 +1065,7 @@ def test_reader_numshapes(): basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: # numShapes should be set during loading - assert sf.numShapes != None + assert sf.numShapes is not None # numShapes should equal the number of shapes assert sf.numShapes == len(sf.shapes()) @@ -822,11 +1077,11 @@ def test_reader_numshapes_no_shx(): reading all the shapes will set the numShapes attribute. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(shp=shp, dbf=dbf) as sf: # numShapes should be unknown due to missing shx file - assert sf.numShapes == None + assert sf.numShapes is None # numShapes should be set after reading all the shapes shapes = sf.shapes() assert sf.numShapes == len(shapes) @@ -857,7 +1112,7 @@ def test_reader_len_dbf_only(): is equal to length of all records. """ basename = "shapefiles/blockgroups" - dbf = open(basename + ".dbf", 'rb') + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(dbf=dbf) as sf: assert len(sf) == len(sf.records()) @@ -868,8 +1123,8 @@ def test_reader_len_no_dbf(): is equal to length of all shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - shx = open(basename + ".shx", 'rb') + shp = open(basename + ".shp", "rb") + shx = open(basename + ".shx", "rb") with shapefile.Reader(shp=shp, shx=shx) as sf: assert len(sf) == len(sf.shapes()) @@ -880,7 +1135,7 @@ def test_reader_len_no_dbf_shx(): is equal to length of all shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') + shp = open(basename + ".shp", "rb") with shapefile.Reader(shp=shp) as sf: assert len(sf) == len(sf.shapes()) @@ -898,10 +1153,10 @@ def test_reader_corrupt_files(): # add 10 line geoms for _ in range(10): w.record("value") - w.line([[(1,1),(1,2),(2,2)]]) + w.line([[(1, 1), (1, 2), (2, 2)]]) # add junk byte data to end of dbf and shp files - w.dbf.write(b'12345') - w.shp.write(b'12345') + w.dbf.write(b"12345") + w.shp.write(b"12345") # read the corrupt shapefile and assert that it reads correctly with shapefile.Reader(basename) as sf: @@ -954,7 +1209,7 @@ def test_bboxfilter_shapes(): # compare assert len(shapes) == len(manual) # check that they line up - for shape,man in zip(shapes,manual): + for shape, man in zip(shapes, manual): assert shape.oid == man.oid assert shape.__geo_interface__ == man.__geo_interface__ @@ -987,7 +1242,7 @@ def test_bboxfilter_itershapes(): # compare assert len(shapes) == len(manual) # check that they line up - for shape,man in zip(shapes,manual): + for shape, man in zip(shapes, manual): assert shape.oid == man.oid assert shape.__geo_interface__ == man.__geo_interface__ @@ -1027,7 +1282,7 @@ def test_bboxfilter_shaperecords(): # compare assert len(shaperecs) == len(manual) # check that they line up - for shaperec,man in zip(shaperecs,manual): + for shaperec, man in zip(shaperecs, manual): # oids assert shaperec.shape.oid == shaperec.record.oid # same shape as manual @@ -1055,7 +1310,7 @@ def test_bboxfilter_itershaperecords(): # compare assert len(shaperecs) == len(manual) # check that they line up - for shaperec,man in zip(shaperecs,manual): + for shaperec, man in zip(shaperecs, manual): # oids assert shaperec.shape.oid == shaperec.record.oid # same shape as manual @@ -1108,7 +1363,7 @@ def test_shaperecord_record(): shaperec = sf.shapeRecord(3) record = shaperec.record - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_write_field_name_limit(tmpdir): @@ -1117,11 +1372,11 @@ def test_write_field_name_limit(tmpdir): """ filename = tmpdir.join("test.shp").strpath with shapefile.Writer(filename) as writer: - writer.field('a'*5, 'C') # many under length limit - writer.field('a'*9, 'C') # 1 under length limit - writer.field('a'*10, 'C') # at length limit - writer.field('a'*11, 'C') # 1 over length limit - writer.field('a'*20, 'C') # many over limit + writer.field("a" * 5, "C") # many under length limit + writer.field("a" * 9, "C") # 1 under length limit + writer.field("a" * 10, "C") # at length limit + writer.field("a" * 11, "C") # 1 over length limit + writer.field("a" * 20, "C") # many over limit with shapefile.Reader(filename) as reader: fields = reader.fields[1:] @@ -1139,27 +1394,30 @@ def test_write_shp_only(tmpdir): creates just a shp file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp') as writer: + with shapefile.Writer(shp=filename + ".shp") as writer: writer.point(1, 1) assert writer.shp and not writer.shx and not writer.dbf assert writer.shpNum == 1 assert len(writer) == 1 - assert writer.shp.closed == True + assert writer.shp.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # test that can read shapes - with shapefile.Reader(shp=filename+'.shp') as reader: + with shapefile.Reader(shp=filename + ".shp") as reader: assert reader.shp and not reader.shx and not reader.dbf - assert (reader.numRecords, reader.numShapes) == (None, None) # numShapes is unknown in the absence of shx file + assert (reader.numRecords, reader.numShapes) == ( + None, + None, + ) # numShapes is unknown in the absence of shx file assert len(reader.shapes()) == 1 # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") # assert test.dbf does not exist - assert not os.path.exists(filename+'.dbf') + assert not os.path.exists(filename + ".dbf") def test_write_shp_shx_only(tmpdir): @@ -1169,29 +1427,29 @@ def test_write_shp_shx_only(tmpdir): creates just a shp and shx file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp', shx=filename+'.shx') as writer: + with shapefile.Writer(shp=filename + ".shp", shx=filename + ".shx") as writer: writer.point(1, 1) assert writer.shp and writer.shx and not writer.dbf assert writer.shpNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.shx.closed == True + assert writer.shp.closed is writer.shx.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # assert test.shx exists - assert os.path.exists(filename+'.shx') + assert os.path.exists(filename + ".shx") # test that can read shapes and offsets - with shapefile.Reader(shp=filename+'.shp', shx=filename+'.shx') as reader: + with shapefile.Reader(shp=filename + ".shp", shx=filename + ".shx") as reader: assert reader.shp and reader.shx and not reader.dbf assert (reader.numRecords, reader.numShapes) == (None, 1) - reader.shape(0) # trigger reading of shx offsets + reader.shape(0) # trigger reading of shx offsets assert len(reader._offsets) == 1 assert len(reader.shapes()) == 1 # assert test.dbf does not exist - assert not os.path.exists(filename+'.dbf') + assert not os.path.exists(filename + ".dbf") def test_write_shp_dbf_only(tmpdir): @@ -1201,30 +1459,33 @@ def test_write_shp_dbf_only(tmpdir): creates just a shp and dbf file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp', dbf=filename+'.dbf') as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + with shapefile.Writer(shp=filename + ".shp", dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.point(1, 1) assert writer.shp and not writer.shx and writer.dbf assert writer.shpNum == writer.recNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.dbf.closed == True + assert writer.shp.closed is writer.dbf.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # assert test.dbf exists - assert os.path.exists(filename+'.dbf') + assert os.path.exists(filename + ".dbf") # test that can read records and shapes - with shapefile.Reader(shp=filename+'.shp', dbf=filename+'.dbf') as reader: + with shapefile.Reader(shp=filename + ".shp", dbf=filename + ".dbf") as reader: assert reader.shp and not reader.shx and reader.dbf - assert (reader.numRecords, reader.numShapes) == (1, None) # numShapes is unknown in the absence of shx file + assert (reader.numRecords, reader.numShapes) == ( + 1, + None, + ) # numShapes is unknown in the absence of shx file assert len(reader.records()) == 1 assert len(reader.shapes()) == 1 # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") def test_write_dbf_only(tmpdir): @@ -1234,28 +1495,28 @@ def test_write_dbf_only(tmpdir): creates just a dbf file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(dbf=filename+'.dbf') as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + with shapefile.Writer(dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") assert not writer.shp and not writer.shx and writer.dbf assert writer.recNum == 1 assert len(writer) == 1 - assert writer.dbf.closed == True + assert writer.dbf.closed is True # assert test.dbf exists - assert os.path.exists(filename+'.dbf') + assert os.path.exists(filename + ".dbf") # test that can read records - with shapefile.Reader(dbf=filename+'.dbf') as reader: + with shapefile.Reader(dbf=filename + ".dbf") as reader: assert not writer.shp and not writer.shx and writer.dbf assert (reader.numRecords, reader.numShapes) == (1, None) assert len(reader.records()) == 1 # assert test.shp does not exist - assert not os.path.exists(filename+'.shp') + assert not os.path.exists(filename + ".shp") # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") def test_write_default_shp_shx_dbf(tmpdir): @@ -1266,8 +1527,8 @@ def test_write_default_shp_shx_dbf(tmpdir): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.null() # assert shp, shx, dbf files exist @@ -1284,8 +1545,8 @@ def test_write_pathlike(tmpdir): filename = tmpdir.join("test") assert not isinstance(filename, str) with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') - writer.record('value') + writer.field("field1", "C") + writer.record("value") writer.null() assert (filename + ".shp").ensure() assert (filename + ".shx").ensure() @@ -1296,12 +1557,12 @@ def test_write_filelike(tmpdir): """ Assert that file-like objects are written correctly. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.null() # test that filelike objects were written correctly @@ -1316,9 +1577,9 @@ def test_write_close_path(tmpdir): closes the shp, shx, and dbf files on exit, if given paths. """ - sf = shapefile.Writer(tmpdir.join('test')) - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf = shapefile.Writer(tmpdir.join("test")) + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() sf.close() @@ -1327,7 +1588,7 @@ def test_write_close_path(tmpdir): assert sf.shx.closed is True # test that opens and reads correctly after - with shapefile.Reader(tmpdir.join('test')) as reader: + with shapefile.Reader(tmpdir.join("test")) as reader: assert len(reader) == 1 assert reader.shape(0).shapeType == shapefile.NULL @@ -1338,12 +1599,12 @@ def test_write_close_filelike(tmpdir): leaves the shp, shx, and dbf files open on exit, if given filelike objects. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") sf = shapefile.Writer(shx=shx, dbf=dbf, shp=shp) - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() sf.close() @@ -1363,9 +1624,9 @@ def test_write_context_path(tmpdir): closes the shp, shx, and dbf files on exit, if given paths. """ - with shapefile.Writer(tmpdir.join('test')) as sf: - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + with shapefile.Writer(tmpdir.join("test")) as sf: + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() assert sf.shp.closed is True @@ -1373,7 +1634,7 @@ def test_write_context_path(tmpdir): assert sf.shx.closed is True # test that opens and reads correctly after - with shapefile.Reader(tmpdir.join('test')) as reader: + with shapefile.Reader(tmpdir.join("test")) as reader: assert len(reader) == 1 assert reader.shape(0).shapeType == shapefile.NULL @@ -1384,12 +1645,12 @@ def test_write_context_filelike(tmpdir): leaves the shp, shx, and dbf files open on exit, if given filelike objects. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as sf: - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() assert sf.shp.closed is False @@ -1411,7 +1672,7 @@ def test_write_shapefile_extension_ignored(tmpdir): ext = ".abc" filename = tmpdir.join(base + ext).strpath with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') # required to create a valid dbf file + writer.field("field1", "C") # required to create a valid dbf file # assert shp, shx, dbf files exist basepath = tmpdir.join(base).strpath @@ -1432,12 +1693,12 @@ def test_write_record(tmpdir): with shapefile.Writer(filename) as writer: writer.autoBalance = True - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") - values = ['one','two','three','four'] + values = ["one", "two", "three", "four"] writer.record(*values) writer.record(*values) @@ -1459,12 +1720,12 @@ def test_write_partial_record(tmpdir): with shapefile.Writer(filename) as writer: writer.autoBalance = True - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") - values = ['one','two'] + values = ["one", "two"] writer.record(*values) writer.record(*values) @@ -1474,7 +1735,7 @@ def test_write_partial_record(tmpdir): with shapefile.Reader(filename) as reader: expected = list(values) - expected.extend(['','']) + expected.extend(["", ""]) for record in reader.iterRecords(): assert record == expected @@ -1487,13 +1748,13 @@ def test_write_geojson(tmpdir): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename) as w: - w.field('TEXT', 'C') - w.field('NUMBER', 'N') - w.field('DATE', 'D') - w.record('text', 123, datetime.date(1898,1,30)) - w.record('text', 123, [1998,1,30]) - w.record('text', 123, '19980130') - w.record('text', 123, '-9999999') # faulty date + w.field("TEXT", "C") + w.field("NUMBER", "N") + w.field("DATE", "D") + w.record("text", 123, datetime.date(1898, 1, 30)) + w.record("text", 123, [1998, 1, 30]) + w.record("text", 123, "19980130") + w.record("text", 123, "-9999999") # faulty date w.record(None, None, None) w.null() w.null() @@ -1508,7 +1769,9 @@ def test_write_geojson(tmpdir): assert json.dumps(r.__geo_interface__) -shape_types = [k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31] # exclude multipatch +shape_types = [ + k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31 +] # exclude multipatch @pytest.mark.parametrize("shape_type", shape_types) @@ -1518,7 +1781,7 @@ def test_write_empty_shapefile(tmpdir, shape_type): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename, shapeType=shape_type) as w: - w.field('field1', 'C') # required to create a valid dbf file + w.field("field1", "C") # required to create a valid dbf file with shapefile.Reader(filename) as r: # test correct shape type From 191b9cda64cc5c8da08c44600c21fec8031d2350 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 15:49:47 +0100 Subject: [PATCH 13/40] Restore a catch all except clause to preserve previous file processing logic. --- shapefile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shapefile.py b/shapefile.py index 1064c99..ead9628 100644 --- a/shapefile.py +++ b/shapefile.py @@ -966,9 +966,6 @@ def __init__(self, *args, **kwargs): for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: try: member = archive.open(shapefile + "." + ext) - except zipfile.BadZipFile: - pass - else: # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() fileobj = tempfile.NamedTemporaryFile( mode="w+b", delete=True @@ -977,6 +974,9 @@ def __init__(self, *args, **kwargs): fileobj.seek(0) setattr(self, ext.lower(), fileobj) self._files_to_close.append(fileobj) + + except BaseException: + pass # Close and delete the temporary zipfile try: zipfileobj.close() From 104e06bc21e654efd58100cba823994cb9748279 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 14 Sep 2024 23:35:20 +0100 Subject: [PATCH 14/40] Install mypy using pre-commit --- .pre-commit-config.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d90e84..f9c6975 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,3 +11,9 @@ repos: - id: ruff args: [ --fix ] - id: ruff-format +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.11.2 + hooks: + - id: mypy + args: [--strict, --ignore-missing-imports, "."] + additional_dependencies: [tokenize-rt==6.0.0] From 3bd25b4db194fd3aef797ed3e98516e51a13d536 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 00:53:40 +0100 Subject: [PATCH 15/40] Add mypy to pre-commit and some basic type hints for it to check --- .dmypy.json | 1 + .pre-commit-config.yaml | 7 ++++++- shapefile.py | 40 +++++++++++++++++++++++++++------------- 3 files changed, 34 insertions(+), 14 deletions(-) create mode 100644 .dmypy.json diff --git a/.dmypy.json b/.dmypy.json new file mode 100644 index 0000000..4eab44d --- /dev/null +++ b/.dmypy.json @@ -0,0 +1 @@ +{"pid": 7632, "connection_name": "\\\\.\\pipe\\dmypy-_CCjf3E3.pipe"} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f9c6975..4ad9670 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,5 +15,10 @@ repos: rev: v1.11.2 hooks: - id: mypy - args: [--strict, --ignore-missing-imports, "."] + name: mypy + entry: dmypy + files: \.py$ + language: python + require_serial: true + args: ["run", "--", "--implicit-reexport", "--warn-unused-ignores", "--cache-fine-grained", "--ignore-missing-imports"] additional_dependencies: [tokenize-rt==6.0.0] diff --git a/shapefile.py b/shapefile.py index ead9628..84b22cc 100644 --- a/shapefile.py +++ b/shapefile.py @@ -23,6 +23,9 @@ from urllib.parse import urlparse, urlunparse from urllib.request import urlopen, Request +from typing import Any, Union, Iterable, ByteString +from collections.abc import Sequence + # Create named logger logger = logging.getLogger(__name__) @@ -89,7 +92,7 @@ NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. -def b(v, encoding="utf-8", encodingErrors="strict"): +def b(v: Any, encoding="utf-8", encodingErrors="strict") -> bytes: if isinstance(v, str): # For python 3 encode str to bytes. return v.encode(encoding, encodingErrors) @@ -104,7 +107,9 @@ def b(v, encoding="utf-8", encodingErrors="strict"): return str(v).encode(encoding, encodingErrors) -def u(v, encoding="utf-8", encodingErrors="strict"): +def u( + v: Union[bytes, str, None, int, ByteString], encoding="utf-8", encodingErrors="strict" +) -> str: if isinstance(v, bytes): # For python 3 decode bytes to str. return v.decode(encoding, encodingErrors) @@ -119,11 +124,11 @@ def u(v, encoding="utf-8", encodingErrors="strict"): return bytes(v).decode(encoding, encodingErrors) -def is_string(v): +def is_string(v: Any) -> bool: return isinstance(v, str) -def pathlike_obj(path): +def pathlike_obj(path: Any) -> Any: if isinstance(path, os.PathLike): return os.fsdecode(path) else: @@ -140,8 +145,11 @@ class _Array(array.array): def __repr__(self): return str(self.tolist()) +Point_T = Sequence[float] +Coords_T = Sequence[Point_T] +BBox_T = tuple[float, float, float, float] -def signed_area(coords, fast=False): +def signed_area(coords: Coords_T, fast: bool = False) -> float: """Return the signed area enclosed by a ring using the linear time algorithm. A value >= 0 indicates a counter-clockwise oriented ring. A faster version is possible by setting 'fast' to True, which returns @@ -157,7 +165,7 @@ def signed_area(coords, fast=False): return area2 / 2.0 -def is_cw(coords): +def is_cw(coords: Coords_T) -> bool: """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. """ @@ -165,19 +173,21 @@ def is_cw(coords): return area2 < 0 -def rewind(coords): +def rewind(coords: Coords_T) -> list[Point_T]: """Returns the input coords in reversed order.""" return list(reversed(coords)) -def ring_bbox(coords): +def ring_bbox(coords: Coords_T) -> BBox_T: """Calculates and returns the bounding box of a ring.""" xs, ys = zip(*coords) bbox = min(xs), min(ys), max(xs), max(ys) return bbox -def bbox_overlap(bbox1, bbox2): +def bbox_overlap( + bbox1: BBox_T, bbox2: BBox_T +) -> bool: """Tests whether two bounding boxes overlap, returning a boolean""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 @@ -185,7 +195,9 @@ def bbox_overlap(bbox1, bbox2): return overlap -def bbox_contains(bbox1, bbox2): +def bbox_contains( + bbox1: BBox_T, bbox2: BBox_T +) -> bool: """Tests whether bbox1 fully contains bbox2, returning a boolean""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 @@ -193,7 +205,7 @@ def bbox_contains(bbox1, bbox2): return contains -def ring_contains_point(coords, p): +def ring_contains_point(coords: Coords_T, p: Point_T) -> bool: """Fast point-in-polygon crossings algorithm, MacMartin optimization. Adapted from code by Eric Haynes @@ -238,7 +250,9 @@ def ring_contains_point(coords, p): return inside_flag -def ring_sample(coords, ccw=False): +def ring_sample( + coords: Coords_T, ccw: bool = False +) -> Point_T: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -286,7 +300,7 @@ def itercoords(): raise Exception("Unexpected error: Unable to find a ring sample point.") -def ring_contains_ring(coords1, coords2): +def ring_contains_ring(coords1, coords2) -> bool: """Returns True if all vertexes in coords2 are fully inside coords1.""" return all((ring_contains_point(coords1, p2) for p2 in coords2)) From 4ff015b37ca0f8c6972dcfe8004366f5f28caa2f Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 00:54:10 +0100 Subject: [PATCH 16/40] Ruff reformat --- shapefile.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/shapefile.py b/shapefile.py index 84b22cc..dbdca0f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -23,7 +23,7 @@ from urllib.parse import urlparse, urlunparse from urllib.request import urlopen, Request -from typing import Any, Union, Iterable, ByteString +from typing import Any, Union, ByteString from collections.abc import Sequence # Create named logger @@ -108,7 +108,9 @@ def b(v: Any, encoding="utf-8", encodingErrors="strict") -> bytes: def u( - v: Union[bytes, str, None, int, ByteString], encoding="utf-8", encodingErrors="strict" + v: Union[bytes, str, None, int, ByteString], + encoding="utf-8", + encodingErrors="strict", ) -> str: if isinstance(v, bytes): # For python 3 decode bytes to str. @@ -145,10 +147,12 @@ class _Array(array.array): def __repr__(self): return str(self.tolist()) + Point_T = Sequence[float] Coords_T = Sequence[Point_T] BBox_T = tuple[float, float, float, float] + def signed_area(coords: Coords_T, fast: bool = False) -> float: """Return the signed area enclosed by a ring using the linear time algorithm. A value >= 0 indicates a counter-clockwise oriented ring. @@ -185,9 +189,7 @@ def ring_bbox(coords: Coords_T) -> BBox_T: return bbox -def bbox_overlap( - bbox1: BBox_T, bbox2: BBox_T -) -> bool: +def bbox_overlap(bbox1: BBox_T, bbox2: BBox_T) -> bool: """Tests whether two bounding boxes overlap, returning a boolean""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 @@ -195,9 +197,7 @@ def bbox_overlap( return overlap -def bbox_contains( - bbox1: BBox_T, bbox2: BBox_T -) -> bool: +def bbox_contains(bbox1: BBox_T, bbox2: BBox_T) -> bool: """Tests whether bbox1 fully contains bbox2, returning a boolean""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 @@ -250,9 +250,7 @@ def ring_contains_point(coords: Coords_T, p: Point_T) -> bool: return inside_flag -def ring_sample( - coords: Coords_T, ccw: bool = False -) -> Point_T: +def ring_sample(coords: Coords_T, ccw: bool = False) -> Point_T: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. From d83f3919fd3ee152a85d02276d10e36c9a66f2b9 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:12:06 +0100 Subject: [PATCH 17/40] Create .gitattributes --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1fe7436 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +* text eol=lf +shapefiles/**/* binary \ No newline at end of file From 6a1a0efad653503ec29bc514cf63b2e668fce53e Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:14:18 +0100 Subject: [PATCH 18/40] Change line endings to LF --- LICENSE.TXT | 18 +- changelog.txt | 542 ++--- shapefile.py | 5520 ++++++++++++++++++++++++------------------------- 3 files changed, 3040 insertions(+), 3040 deletions(-) diff --git a/LICENSE.TXT b/LICENSE.TXT index b7d7276..3ab02f3 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -1,9 +1,9 @@ -The MIT License (MIT) - -Copyright � 2013 Joel Lawhead - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the �Software�), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +The MIT License (MIT) + +Copyright � 2013 Joel Lawhead + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the �Software�), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/changelog.txt b/changelog.txt index 80b88e2..533d704 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,271 +1,271 @@ - -VERSION 2.3.1 - -2022-07-28 - Bug fixes: - * Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) - -VERSION 2.3.0 - -2022-04-30 - New Features: - * Added support for pathlib and path-like shapefile filepaths (@mwtoews). - * Allow reading individual file extensions via filepaths. - - Improvements: - * Simplified setup and deployment (@mwtoews) - * Faster shape access when missing shx file - * Switch to named logger (see #240) - - Bug fixes: - * More robust handling of corrupt shapefiles (fixes #235) - * Fix errors when writing to individual file-handles (fixes #237) - * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) - * Fix test issues in environments without network access (@sebastic, @musicinmybrain). - -VERSION 2.2.0 - -2022-02-02 - New Features: - * Read shapefiles directly from zipfiles. - * Read shapefiles directly from urls. - * Allow fast extraction of only a subset of dbf fields through a `fields` arg. - * Allow fast filtering which shapes to read from the file through a `bbox` arg. - - Improvements: - * More examples and restructuring of README. - * More informative Shape to geojson warnings (see #219). - * Add shapefile.VERBOSE flag to control warnings verbosity (default True). - * Shape object information when calling repr(). - * Faster ring orientation checks, enforce geojson output ring orientation. - - Bug fixes: - * Remove null-padding at end of some record character fields. - * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. - * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) - * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) - * Fix typos in docs (@timgates) - -VERSION 2.1.3 - -2021-01-14 - Bug fixes: - * Fix recent bug in geojson hole-in-polygon checking (see #205) - * Misc fixes to allow geo interface dump to json (eg dates as strings) - * Handle additional dbf date null values, and return faulty dates as unicode (see #187) - * Add writer target typecheck - * Fix bugs to allow reading shp/shx/dbf separately - * Allow delayed shapefile loading by passing no args - * Fix error with writing empty z/m shapefile (@mcuprjak) - * Fix signed_area() so ignores z/m coords - * Enforce writing the 11th field name character as null-terminator (only first 10 are used) - * Minor README fixes - * Added more tests - -VERSION 2.1.2 - -2020-09-10 - Bug fixes: - * Fix issue where `warnings.simplefilter('always')` changes global warning behavior [see #203] - -VERSION 2.1.1 - -2020-09-09 - Improvements: - * Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) - * Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] - * Added pytest testing [@jmoujaes] - - Bug fixes: - * Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] - * Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] - * Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] - * Fix polygons not being auto closed, which was accidentally dropped - * Fix error for null geometries in feature geojson - * Misc docstring cleanup [@fiveham] - -VERSION 2.1.0 - -2019-02-15 - New Features: - * Added back read/write support for unicode field names. - * Improved Record representation - * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() - - Bug fixes: - * Fixed error when reading optional m-values - * Fixed Record attribute autocomplete in Python 3 - * Misc readme cleanup - -VERSION 2.0.1 - -2018-11-05 - * Fix pip install setup.py README decoding error. - -VERSION 2.0.0 - -2018-09-01 - (Note: Some contributor attributions may be missing.) - New Features: - * Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. - * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. - * Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. - - New ways of inspecing shapefile metadata by printing. [@megies] - - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] - * Add more support and documentation for MultiPatch 3D shapes. - * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. - * Better documentation of previously unclear aspects, such as field types. - - Bug Fixes: - * More reliable/robust: - - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. - - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - * Fix some geo interface errors, including checking polygon directions. - * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] - * Enforce maximum field limit. [@mwtoews] - -VERSION 1.2.12 - * ? - -VERSION 1.2.11 - -2017-04-29 Karim Bahgat - * Fixed bugs when reading and writing empty shapefiles. - * Fixed bug when writing null geometry. - * Fixed misc data type errors. - * Fixed error when reading files with wrong record length. - * Use max field precision when saving decimal numbers. - * Improved shapetype detection. - * Expanded docs on data types. - * General doc additions and travis icon. - -VERSION 1.2.10 - -2016-09-24 Karim Bahgat - * Bump version to fix pip install issue. - -VERSION 1.2.9 - -2016-09-22 Karim Bahgat - * Revert back to fix #66. - -VERSION 1.2.8 - -2016-08-17 Joel Lawhead - * Configured Travis-CI - -VERSION 1.2.5 - -2016-08-16 Joel Lawhead - * Reader speed up through batch unpacking bytes - * Merge README text into markdown file. Remove text version. - * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) - -VERSION 1.2.3 - -2015-06-21 Joel Lawhead - *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() - -VERSION 1.2.2 - -### upcoming (2015/01/09 05:27 +00:00) -- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) -- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) -- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) -- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) -- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) -- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) -- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) -- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) -- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) -- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` -- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS -- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) -- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files -- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) -- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 -- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC -- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md -- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) - -VERSION 1.2.1 - -2014-05-11 Joel Lawhead - *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 - -VERSION 1.2.0 - -2013-09-05 Joel Lawhead - *README.txt add example/test for writing a 3D polygon - -VERSION 1.1.9 - -2013-07-27 Joel Lawhead - *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer - when referencing "z" and "m" values. This bug caused errors only when editing - 3D shapefiles. - -VERSION 1.1.8 - -2013-07-02 Joel Lawhead - *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes - *README.txt updated several errors in the documentation. - -2013-06-25 Joel Lawhead - *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by - seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file - lengths reported in the header which crashed when reading or iterating shapes. Most - insist on using the .shx file but there's no real reason to do so. - -VERSION 1.1.7 - -2013-06-22 Joel Lawhead - - *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention - to export shapefiles as GeoJSON. - - *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed - as unicode strings. - - *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through - geometry records for parsing large files efficiently. - - *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through - dbf records efficiently in large files. - - *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx - file is not available. - - *shapefile.py (main) Added __version__ attribute. - - *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to - dbf fields. - - *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The - shapefile spec does not require the content of a geometry record to be as long as the content - length defined in the header. The result is you can delete features without modifying the - record header allowing for empty space in records. - - *shapefile.py (Writer.poly) Added enforcement of closed polygons - - *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed - to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. - - *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() - - *README.txt (main) Updated "bbox" property documentation to match Esri specification. - - - + +VERSION 2.3.1 + +2022-07-28 + Bug fixes: + * Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) + +VERSION 2.3.0 + +2022-04-30 + New Features: + * Added support for pathlib and path-like shapefile filepaths (@mwtoews). + * Allow reading individual file extensions via filepaths. + + Improvements: + * Simplified setup and deployment (@mwtoews) + * Faster shape access when missing shx file + * Switch to named logger (see #240) + + Bug fixes: + * More robust handling of corrupt shapefiles (fixes #235) + * Fix errors when writing to individual file-handles (fixes #237) + * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) + * Fix test issues in environments without network access (@sebastic, @musicinmybrain). + +VERSION 2.2.0 + +2022-02-02 + New Features: + * Read shapefiles directly from zipfiles. + * Read shapefiles directly from urls. + * Allow fast extraction of only a subset of dbf fields through a `fields` arg. + * Allow fast filtering which shapes to read from the file through a `bbox` arg. + + Improvements: + * More examples and restructuring of README. + * More informative Shape to geojson warnings (see #219). + * Add shapefile.VERBOSE flag to control warnings verbosity (default True). + * Shape object information when calling repr(). + * Faster ring orientation checks, enforce geojson output ring orientation. + + Bug fixes: + * Remove null-padding at end of some record character fields. + * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. + * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) + * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) + * Fix typos in docs (@timgates) + +VERSION 2.1.3 + +2021-01-14 + Bug fixes: + * Fix recent bug in geojson hole-in-polygon checking (see #205) + * Misc fixes to allow geo interface dump to json (eg dates as strings) + * Handle additional dbf date null values, and return faulty dates as unicode (see #187) + * Add writer target typecheck + * Fix bugs to allow reading shp/shx/dbf separately + * Allow delayed shapefile loading by passing no args + * Fix error with writing empty z/m shapefile (@mcuprjak) + * Fix signed_area() so ignores z/m coords + * Enforce writing the 11th field name character as null-terminator (only first 10 are used) + * Minor README fixes + * Added more tests + +VERSION 2.1.2 + +2020-09-10 + Bug fixes: + * Fix issue where `warnings.simplefilter('always')` changes global warning behavior [see #203] + +VERSION 2.1.1 + +2020-09-09 + Improvements: + * Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) + * Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] + * Added pytest testing [@jmoujaes] + + Bug fixes: + * Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] + * Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] + * Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] + * Fix polygons not being auto closed, which was accidentally dropped + * Fix error for null geometries in feature geojson + * Misc docstring cleanup [@fiveham] + +VERSION 2.1.0 + +2019-02-15 + New Features: + * Added back read/write support for unicode field names. + * Improved Record representation + * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() + + Bug fixes: + * Fixed error when reading optional m-values + * Fixed Record attribute autocomplete in Python 3 + * Misc readme cleanup + +VERSION 2.0.1 + +2018-11-05 + * Fix pip install setup.py README decoding error. + +VERSION 2.0.0 + +2018-09-01 + (Note: Some contributor attributions may be missing.) + New Features: + * Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. + * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. + * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. + * Reading shapefiles is now more convenient: + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. + - New ways of inspecing shapefile metadata by printing. [@megies] + - More convenient accessing of Record values as attributes. [@philippkraft] + - More convenient shape type name checking. [@megies] + * Add more support and documentation for MultiPatch 3D shapes. + * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. + * Better documentation of previously unclear aspects, such as field types. + + Bug Fixes: + * More reliable/robust: + - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] + - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] + - Improved parsing of field value types, fixed errors and made more flexible. + - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] + * Fix some geo interface errors, including checking polygon directions. + * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] + * Enforce maximum field limit. [@mwtoews] + +VERSION 1.2.12 + * ? + +VERSION 1.2.11 + +2017-04-29 Karim Bahgat + * Fixed bugs when reading and writing empty shapefiles. + * Fixed bug when writing null geometry. + * Fixed misc data type errors. + * Fixed error when reading files with wrong record length. + * Use max field precision when saving decimal numbers. + * Improved shapetype detection. + * Expanded docs on data types. + * General doc additions and travis icon. + +VERSION 1.2.10 + +2016-09-24 Karim Bahgat + * Bump version to fix pip install issue. + +VERSION 1.2.9 + +2016-09-22 Karim Bahgat + * Revert back to fix #66. + +VERSION 1.2.8 + +2016-08-17 Joel Lawhead + * Configured Travis-CI + +VERSION 1.2.5 + +2016-08-16 Joel Lawhead + * Reader speed up through batch unpacking bytes + * Merge README text into markdown file. Remove text version. + * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) + +VERSION 1.2.3 + +2015-06-21 Joel Lawhead + *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() + +VERSION 1.2.2 + +### upcoming (2015/01/09 05:27 +00:00) +- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) +- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) +- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) +- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) +- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) +- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) +- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) +- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) +- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) +- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` +- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS +- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) +- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files +- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) +- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 +- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC +- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md +- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) + +VERSION 1.2.1 + +2014-05-11 Joel Lawhead + *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 + +VERSION 1.2.0 + +2013-09-05 Joel Lawhead + *README.txt add example/test for writing a 3D polygon + +VERSION 1.1.9 + +2013-07-27 Joel Lawhead + *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer + when referencing "z" and "m" values. This bug caused errors only when editing + 3D shapefiles. + +VERSION 1.1.8 + +2013-07-02 Joel Lawhead + *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes + *README.txt updated several errors in the documentation. + +2013-06-25 Joel Lawhead + *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by + seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file + lengths reported in the header which crashed when reading or iterating shapes. Most + insist on using the .shx file but there's no real reason to do so. + +VERSION 1.1.7 + +2013-06-22 Joel Lawhead + + *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention + to export shapefiles as GeoJSON. + + *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed + as unicode strings. + + *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through + geometry records for parsing large files efficiently. + + *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through + dbf records efficiently in large files. + + *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx + file is not available. + + *shapefile.py (main) Added __version__ attribute. + + *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to + dbf fields. + + *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The + shapefile spec does not require the content of a geometry record to be as long as the content + length defined in the header. The result is you can delete features without modifying the + record header allowing for empty space in records. + + *shapefile.py (Writer.poly) Added enforcement of closed polygons + + *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed + to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. + + *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() + + *README.txt (main) Updated "bbox" property documentation to match Esri specification. + + + diff --git a/shapefile.py b/shapefile.py index dbdca0f..ddd4a2e 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1,2760 +1,2760 @@ -""" -shapefile.py -Provides read and write support for ESRI Shapefiles. -authors: jlawheadgeospatialpython.com -maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions >= 3.9 -""" - -__version__ = "2.3.1" - -import array -from datetime import date -import io -import logging -import os -from struct import pack, unpack, calcsize, error, Struct -import sys -import tempfile -import time -import zipfile - -from urllib.error import HTTPError -from urllib.parse import urlparse, urlunparse -from urllib.request import urlopen, Request - -from typing import Any, Union, ByteString -from collections.abc import Sequence - -# Create named logger -logger = logging.getLogger(__name__) - - -# Module settings -VERBOSE = True - -# Constants for shape types -NULL = 0 -POINT = 1 -POLYLINE = 3 -POLYGON = 5 -MULTIPOINT = 8 -POINTZ = 11 -POLYLINEZ = 13 -POLYGONZ = 15 -MULTIPOINTZ = 18 -POINTM = 21 -POLYLINEM = 23 -POLYGONM = 25 -MULTIPOINTM = 28 -MULTIPATCH = 31 - -SHAPETYPE_LOOKUP = { - 0: "NULL", - 1: "POINT", - 3: "POLYLINE", - 5: "POLYGON", - 8: "MULTIPOINT", - 11: "POINTZ", - 13: "POLYLINEZ", - 15: "POLYGONZ", - 18: "MULTIPOINTZ", - 21: "POINTM", - 23: "POLYLINEM", - 25: "POLYGONM", - 28: "MULTIPOINTM", - 31: "MULTIPATCH", -} - -TRIANGLE_STRIP = 0 -TRIANGLE_FAN = 1 -OUTER_RING = 2 -INNER_RING = 3 -FIRST_RING = 4 -RING = 5 - -PARTTYPE_LOOKUP = { - 0: "TRIANGLE_STRIP", - 1: "TRIANGLE_FAN", - 2: "OUTER_RING", - 3: "INNER_RING", - 4: "FIRST_RING", - 5: "RING", -} - - -xrange = range -izip = zip - -# Helpers - -MISSING = [None, ""] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. - - -def b(v: Any, encoding="utf-8", encodingErrors="strict") -> bytes: - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - -def u( - v: Union[bytes, str, None, int, ByteString], - encoding="utf-8", - encodingErrors="strict", -) -> str: - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - -def is_string(v: Any) -> bool: - return isinstance(v, str) - - -def pathlike_obj(path: Any) -> Any: - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path - - -# Begin - - -class _Array(array.array): - """Converts python tuples to lists of the appropriate type. - Used to unpack different shapefile header parts.""" - - def __repr__(self): - return str(self.tolist()) - - -Point_T = Sequence[float] -Coords_T = Sequence[Point_T] -BBox_T = tuple[float, float, float, float] - - -def signed_area(coords: Coords_T, fast: bool = False) -> float: - """Return the signed area enclosed by a ring using the linear time - algorithm. A value >= 0 indicates a counter-clockwise oriented ring. - A faster version is possible by setting 'fast' to True, which returns - 2x the area, e.g. if you're only interested in the sign of the area. - """ - xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values - xs.append(xs[1]) - ys.append(ys[1]) - area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) - if fast: - return area2 - else: - return area2 / 2.0 - - -def is_cw(coords: Coords_T) -> bool: - """Returns True if a polygon ring has clockwise orientation, determined - by a negatively signed area. - """ - area2 = signed_area(coords, fast=True) - return area2 < 0 - - -def rewind(coords: Coords_T) -> list[Point_T]: - """Returns the input coords in reversed order.""" - return list(reversed(coords)) - - -def ring_bbox(coords: Coords_T) -> BBox_T: - """Calculates and returns the bounding box of a ring.""" - xs, ys = zip(*coords) - bbox = min(xs), min(ys), max(xs), max(ys) - return bbox - - -def bbox_overlap(bbox1: BBox_T, bbox2: BBox_T) -> bool: - """Tests whether two bounding boxes overlap, returning a boolean""" - xmin1, ymin1, xmax1, ymax1 = bbox1 - xmin2, ymin2, xmax2, ymax2 = bbox2 - overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 - return overlap - - -def bbox_contains(bbox1: BBox_T, bbox2: BBox_T) -> bool: - """Tests whether bbox1 fully contains bbox2, returning a boolean""" - xmin1, ymin1, xmax1, ymax1 = bbox1 - xmin2, ymin2, xmax2, ymax2 = bbox2 - contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 - return contains - - -def ring_contains_point(coords: Coords_T, p: Point_T) -> bool: - """Fast point-in-polygon crossings algorithm, MacMartin optimization. - - Adapted from code by Eric Haynes - http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c - - Original description: - Shoot a test ray along +X axis. The strategy, from MacMartin, is to - compare vertex Y values to the testing point's Y and quickly discard - edges which are entirely to one side of the test ray. - """ - tx, ty = p - - # get initial test bit for above/below X axis - vtx0 = coords[0] - yflag0 = vtx0[1] >= ty - - inside_flag = False - for vtx1 in coords[1:]: - yflag1 = vtx1[1] >= ty - # check if endpoints straddle (are on opposite sides) of X axis - # (i.e. the Y's differ); if so, +X ray could intersect this edge. - if yflag0 != yflag1: - xflag0 = vtx0[0] >= tx - # check if endpoints are on same side of the Y axis (i.e. X's - # are the same); if so, it's easy to test if edge hits or misses. - if xflag0 == (vtx1[0] >= tx): - # if edge's X values both right of the point, must hit - if xflag0: - inside_flag = not inside_flag - else: - # compute intersection of pgon segment with +X ray, note - # if >= point's X; if so, the ray hits it. - if ( - vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) - ) >= tx: - inside_flag = not inside_flag - - # move to next pair of vertices, retaining info as possible - yflag0 = yflag1 - vtx0 = vtx1 - - return inside_flag - - -def ring_sample(coords: Coords_T, ccw: bool = False) -> Point_T: - """Return a sample point guaranteed to be within a ring, by efficiently - finding the first centroid of a coordinate triplet whose orientation - matches the orientation of the ring and passes the point-in-ring test. - The orientation of the ring is assumed to be clockwise, unless ccw - (counter-clockwise) is set to True. - """ - triplet = [] - - def itercoords(): - # iterate full closed ring - for p in coords: - yield p - # finally, yield the second coordinate to the end to allow checking the last triplet - yield coords[1] - - for p in itercoords(): - # add point to triplet (but not if duplicate) - if p not in triplet: - triplet.append(p) - - # new triplet, try to get sample - if len(triplet) == 3: - # check that triplet does not form a straight line (not a triangle) - is_straight_line = (triplet[0][1] - triplet[1][1]) * ( - triplet[0][0] - triplet[2][0] - ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) - if not is_straight_line: - # get triplet orientation - closed_triplet = triplet + [triplet[0]] - triplet_ccw = not is_cw(closed_triplet) - # check that triplet has the same orientation as the ring (means triangle is inside the ring) - if ccw == triplet_ccw: - # get triplet centroid - xs, ys = zip(*triplet) - xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 - # check that triplet centroid is truly inside the ring - if ring_contains_point(coords, (xmean, ymean)): - return xmean, ymean - - # failed to get sample point from this triplet - # remove oldest triplet coord to allow iterating to next triplet - triplet.pop(0) - - else: - raise Exception("Unexpected error: Unable to find a ring sample point.") - - -def ring_contains_ring(coords1, coords2) -> bool: - """Returns True if all vertexes in coords2 are fully inside coords1.""" - return all((ring_contains_point(coords1, p2) for p2 in coords2)) - - -def organize_polygon_rings(rings, return_errors=None): - """Organize a list of coordinate rings into one or more polygons with holes. - Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. If a return_errors dict is provided (optional), - any errors encountered will be added to it. - - Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). - Rings are determined as exteriors if they run in clockwise direction, or interior - holes if they run in counter-clockwise direction. This method is used to construct - GeoJSON (multi)polygons from the shapefile polygon shape type, which does not - explicitly store the structure of the polygons beyond exterior/interior ring orientation. - """ - # first iterate rings and classify as exterior or hole - exteriors = [] - holes = [] - for ring in rings: - # shapefile format defines a polygon as a sequence of rings - # where exterior rings are clockwise, and holes counterclockwise - if is_cw(ring): - # ring is exterior - exteriors.append(ring) - else: - # ring is a hole - holes.append(ring) - - # if only one exterior, then all holes belong to that exterior - if len(exteriors) == 1: - # exit early - poly = [exteriors[0]] + holes - polys = [poly] - return polys - - # multiple exteriors, ie multi-polygon, have to group holes with correct exterior - # shapefile format does not specify which holes belong to which exteriors - # so have to do efficient multi-stage checking of hole-to-exterior containment - elif len(exteriors) > 1: - # exit early if no holes - if not holes: - polys = [] - for ext in exteriors: - poly = [ext] - polys.append(poly) - return polys - - # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) - exterior_bboxes = [ring_bbox(ring) for ring in exteriors] - for hole_i in hole_exteriors.keys(): - hole_bbox = ring_bbox(holes[hole_i]) - for ext_i, ext_bbox in enumerate(exterior_bboxes): - if bbox_contains(ext_bbox, hole_bbox): - hole_exteriors[hole_i].append(ext_i) - - # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test - for hole_i, exterior_candidates in hole_exteriors.items(): - if len(exterior_candidates) > 1: - # get hole sample point - ccw = not is_cw(holes[hole_i]) - hole_sample = ring_sample(holes[hole_i], ccw=ccw) - # collect new exterior candidates - new_exterior_candidates = [] - for ext_i in exterior_candidates: - # check that hole sample point is inside exterior - hole_in_exterior = ring_contains_point( - exteriors[ext_i], hole_sample - ) - if hole_in_exterior: - new_exterior_candidates.append(ext_i) - - # set new exterior candidates - hole_exteriors[hole_i] = new_exterior_candidates - - # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole - for hole_i, exterior_candidates in hole_exteriors.items(): - if len(exterior_candidates) > 1: - # exterior candidate with the smallest area is the hole's most immediate parent - ext_i = sorted( - exterior_candidates, - key=lambda x: abs(signed_area(exteriors[x], fast=True)), - )[0] - hole_exteriors[hole_i] = [ext_i] - - # separate out holes that are orphaned (not contained by any exterior) - orphan_holes = [] - for hole_i, exterior_candidates in list(hole_exteriors.items()): - if not exterior_candidates: - orphan_holes.append(hole_i) - del hole_exteriors[hole_i] - continue - - # each hole should now only belong to one exterior, group into exterior-holes polygons - polys = [] - for ext_i, ext in enumerate(exteriors): - poly = [ext] - # find relevant holes - poly_holes = [] - for hole_i, exterior_candidates in list(hole_exteriors.items()): - # hole is relevant if previously matched with this exterior - if exterior_candidates[0] == ext_i: - poly_holes.append(holes[hole_i]) - poly += poly_holes - polys.append(poly) - - # add orphan holes as exteriors - for hole_i in orphan_holes: - ext = holes[hole_i] - # add as single exterior without any holes - poly = [ext] - polys.append(poly) - - if orphan_holes and return_errors is not None: - return_errors["polygon_orphaned_holes"] = len(orphan_holes) - - return polys - - # no exteriors, be nice and assume due to incorrect winding order - else: - if return_errors is not None: - return_errors["polygon_only_holes"] = len(holes) - exteriors = holes - # add as single exterior without any holes - polys = [[ext] for ext in exteriors] - return polys - - -class Shape: - def __init__( - self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None - ): - """Stores the geometry of the different shape types - specified in the Shapefile spec. Shape types are - usually point, polyline, or polygons. Every shape type - except the "Null" type contains points at some level for - example vertices in a polygon. If a shape type has - multiple shapes containing points within a single - geometry record then those shapes are called parts. Parts - are designated by their starting index in geometry record's - list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. - """ - self.shapeType = shapeType - self.points = points or [] - self.parts = parts or [] - if partTypes: - self.partTypes = partTypes - - # and a dict to silently record any errors encountered - self._errors = {} - - # add oid - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - - @property - def __geo_interface__(self): - if self.shapeType in [POINT, POINTM, POINTZ]: - # point - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Point", "coordinates": tuple()} - else: - return {"type": "Point", "coordinates": tuple(self.points[0])} - elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "MultiPoint", "coordinates": []} - else: - # multipoint - return { - "type": "MultiPoint", - "coordinates": [tuple(p) for p in self.points], - } - elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "LineString", "coordinates": []} - elif len(self.parts) == 1: - # linestring - return { - "type": "LineString", - "coordinates": [tuple(p) for p in self.points], - } - else: - # multilinestring - ps = None - coordinates = [] - for part in self.parts: - if ps is None: - ps = part - continue - else: - coordinates.append([tuple(p) for p in self.points[ps:part]]) - ps = part - else: - coordinates.append([tuple(p) for p in self.points[part:]]) - return {"type": "MultiLineString", "coordinates": coordinates} - elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Polygon", "coordinates": []} - else: - # get all polygon rings - rings = [] - for i in xrange(len(self.parts)): - # get indexes of start and end points of the ring - start = self.parts[i] - try: - end = self.parts[i + 1] - except IndexError: - end = len(self.points) - - # extract the points that make up the ring - ring = [tuple(p) for p in self.points[start:end]] - rings.append(ring) - - # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). - polys = organize_polygon_rings(rings, self._errors) - - # if VERBOSE is True, issue detailed warning about any shape errors - # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: - header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( - self.oid - ) - orphans = self._errors.get("polygon_orphaned_holes", None) - if orphans: - msg = ( - header - + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ -but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ -orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ -encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) - only_holes = self._errors.get("polygon_only_holes", None) - if only_holes: - msg = ( - header - + "Shapefile format requires that polygons contain at least one exterior ring, \ -but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ -still included but were encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) - - # return as geojson - if len(polys) == 1: - return {"type": "Polygon", "coordinates": polys[0]} - else: - return {"type": "MultiPolygon", "coordinates": polys} - - else: - raise Exception( - 'Shape type "%s" cannot be represented as GeoJSON.' - % SHAPETYPE_LOOKUP[self.shapeType] - ) - - @staticmethod - def _from_geojson(geoj): - # create empty shape - shape = Shape() - # set shapeType - geojType = geoj["type"] if geoj else "Null" - if geojType == "Null": - shapeType = NULL - elif geojType == "Point": - shapeType = POINT - elif geojType == "LineString": - shapeType = POLYLINE - elif geojType == "Polygon": - shapeType = POLYGON - elif geojType == "MultiPoint": - shapeType = MULTIPOINT - elif geojType == "MultiLineString": - shapeType = POLYLINE - elif geojType == "MultiPolygon": - shapeType = POLYGON - else: - raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) - shape.shapeType = shapeType - - # set points and parts - if geojType == "Point": - shape.points = [geoj["coordinates"]] - shape.parts = [0] - elif geojType in ("MultiPoint", "LineString"): - shape.points = geoj["coordinates"] - shape.parts = [0] - elif geojType in ("Polygon"): - points = [] - parts = [] - index = 0 - for i, ext_or_hole in enumerate(geoj["coordinates"]): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - elif geojType in ("MultiLineString"): - points = [] - parts = [] - index = 0 - for linestring in geoj["coordinates"]: - points.extend(linestring) - parts.append(index) - index += len(linestring) - shape.points = points - shape.parts = parts - elif geojType in ("MultiPolygon"): - points = [] - parts = [] - index = 0 - for polygon in geoj["coordinates"]: - for i, ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - return shape - - @property - def oid(self): - """The index position of the shape in the original shapefile""" - return self.__oid - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def __repr__(self): - return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) - - -class _Record(list): - """ - A class to hold a record. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, the values of the record - can also be retrieved using the field's name. For example if the dbf contains - a field ID at position 0, the ID can be retrieved with the position, the field name - as a key, or the field name as an attribute. - - >>> # Create a Record with one field, normally the record is created by the Reader class - >>> r = _Record({'ID': 0}, [0]) - >>> print(r[0]) - >>> print(r['ID']) - >>> print(r.ID) - """ - - def __init__(self, field_positions, values, oid=None): - """ - A Record should be created by the Reader class - - :param field_positions: A dict mapping field names to field positions - :param values: A sequence of values - :param oid: The object id, an int (optional) - """ - self.__field_positions = field_positions - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - list.__init__(self, values) - - def __getattr__(self, item): - """ - __getattr__ is called if an attribute is used that does - not exist in the normal sense. For example r=Record(...), r.ID - calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) - :param item: The field name, used as attribute - :return: Value of the field - :raises: AttributeError, if item is not a field of the shapefile - and IndexError, if the field exists but the field's - corresponding value in the Record does not exist - """ - try: - if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() - raise AttributeError("_Record does not implement __setstate__") - index = self.__field_positions[item] - return list.__getitem__(self, index) - except KeyError: - raise AttributeError("{} is not a field name".format(item)) - except IndexError: - raise IndexError( - "{} found as a field but not enough values available.".format(item) - ) - - def __setattr__(self, key, value): - """ - Sets a value of a field attribute - :param key: The field name - :param value: the value of that field - :return: None - :raises: AttributeError, if key is not a field of the shapefile - """ - if key.startswith("_"): # Prevent infinite loop when setting mangled attribute - return list.__setattr__(self, key, value) - try: - index = self.__field_positions[key] - return list.__setitem__(self, index, value) - except KeyError: - raise AttributeError("{} is not a field name".format(key)) - - def __getitem__(self, item): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID'], r[0] - :param item: Either the position of the value or the name of a field - :return: the value of the field - """ - try: - return list.__getitem__(self, item) - except TypeError: - try: - index = self.__field_positions[item] - except KeyError: - index = None - if index is not None: - return list.__getitem__(self, index) - else: - raise IndexError('"{}" is not a field name and not an int'.format(item)) - - def __setitem__(self, key, value): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID']=2, r[0]=2 - :param key: Either the position of the value or the name of a field - :param value: the new value of the field - """ - try: - return list.__setitem__(self, key, value) - except TypeError: - index = self.__field_positions.get(key) - if index is not None: - return list.__setitem__(self, index, value) - else: - raise IndexError("{} is not a field name and not an int".format(key)) - - @property - def oid(self): - """The index position of the record in the original shapefile""" - return self.__oid - - def as_dict(self, date_strings=False): - """ - Returns this Record as a dictionary using the field names as keys - :return: dict - """ - dct = dict((f, self[i]) for f, i in self.__field_positions.items()) - if date_strings: - for k, v in dct.items(): - if isinstance(v, date): - dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) - return dct - - def __repr__(self): - return "Record #{}: {}".format(self.__oid, list(self)) - - def __dir__(self): - """ - Helps to show the field names in an interactive environment like IPython. - See: http://ipython.readthedocs.io/en/stable/config/integrating.html - - :return: List of method names and fields - """ - default = list( - dir(type(self)) - ) # default list methods and attributes of this class - fnames = list( - self.__field_positions.keys() - ) # plus field names (random order if Python version < 3.6) - return default + fnames - - -class ShapeRecord: - """A ShapeRecord object containing a shape along with its attributes. - Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" - - def __init__(self, shape=None, record=None): - self.shape = shape - self.record = record - - @property - def __geo_interface__(self): - return { - "type": "Feature", - "properties": self.record.as_dict(date_strings=True), - "geometry": None - if self.shape.shapeType == NULL - else self.shape.__geo_interface__, - } - - -class Shapes(list): - """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a GeometryCollection dictionary.""" - - def __repr__(self): - return "Shapes: {}".format(list(self)) - - @property - def __geo_interface__(self): - # Note: currently this will fail if any of the shapes are null-geometries - # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - collection = { - "type": "GeometryCollection", - "geometries": [shape.__geo_interface__ for shape in self], - } - return collection - - -class ShapeRecords(list): - """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a FeatureCollection dictionary.""" - - def __repr__(self): - return "ShapeRecords: {}".format(list(self)) - - @property - def __geo_interface__(self): - collection = { - "type": "FeatureCollection", - "features": [shaperec.__geo_interface__ for shaperec in self], - } - return collection - - -class ShapefileException(Exception): - """An exception to handle shapefile specific problems.""" - - pass - - -class Reader: - """Reads the three files of a shapefile as a unit or - separately. If one of the three files (.shp, .shx, - .dbf) is missing no exception is thrown until you try - to call a method that depends on that particular file. - The .shx index file is used if available for efficiency - but is not required to read the geometry from the .shp - file. The "shapefile" argument in the constructor is the - name of the file you want to open, and can be the path - to a shapefile on a local filesystem, inside a zipfile, - or a url. - - You can instantiate a Reader without specifying a shapefile - and then specify one later with the load() method. - - Only the shapefile headers are read upon loading. Content - within each file is only accessed when required and as - efficiently as possible. Shapefiles are usually not large - but they can be. - """ - - def __init__(self, *args, **kwargs): - self.shp = None - self.shx = None - self.dbf = None - self._files_to_close = [] - self.shapeName = "Not specified" - self._offsets = [] - self.shpLength = None - self.numRecords = None - self.numShapes = None - self.fields = [] - self.__dbfHdrLength = 0 - self.__fieldLookup = {} - self.encoding = kwargs.pop("encoding", "utf-8") - self.encodingErrors = kwargs.pop("encodingErrors", "strict") - # See if a shapefile name was passed as the first argument - if len(args) > 0: - path = pathlike_obj(args[0]) - if is_string(path): - if ".zip" in path: - # Shapefile is inside a zipfile - if path.count(".zip") > 1: - # Multiple nested zipfiles - raise ShapefileException( - "Reading from multiple nested zipfiles is not supported: %s" - % path - ) - # Split into zipfile and shapefile paths - if path.endswith(".zip"): - zpath = path - shapefile = None - else: - zpath = path[: path.find(".zip") + 4] - shapefile = path[path.find(".zip") + 4 + 1 :] - # Create a zip file handle - if zpath.startswith("http"): - # Zipfile is from a url - # Download to a temporary url and treat as normal zipfile - req = Request( - zpath, - headers={ - "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" - }, - ) - resp = urlopen(req) - # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected - zipfileobj = tempfile.NamedTemporaryFile( - mode="w+b", suffix=".zip", delete=True - ) - zipfileobj.write(resp.read()) - zipfileobj.seek(0) - else: - # Zipfile is from a file - zipfileobj = open(zpath, mode="rb") - # Open the zipfile archive - with zipfile.ZipFile(zipfileobj, "r") as archive: - if not shapefile: - # Only the zipfile path is given - # Inspect zipfile contents to find the full shapefile path - shapefiles = [ - name - for name in archive.namelist() - if (name.endswith(".SHP") or name.endswith(".shp")) - ] - # The zipfile must contain exactly one shapefile - if len(shapefiles) == 0: - raise ShapefileException( - "Zipfile does not contain any shapefiles" - ) - elif len(shapefiles) == 1: - shapefile = shapefiles[0] - else: - raise ShapefileException( - "Zipfile contains more than one shapefile: %s. Please specify the full \ - path to the shapefile you would like to open." - % shapefiles - ) - # Try to extract file-like objects from zipfile - shapefile = os.path.splitext(shapefile)[ - 0 - ] # root shapefile name - for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: - try: - member = archive.open(shapefile + "." + ext) - # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile( - mode="w+b", delete=True - ) - fileobj.write(member.read()) - fileobj.seek(0) - setattr(self, ext.lower(), fileobj) - self._files_to_close.append(fileobj) - - except BaseException: - pass - # Close and delete the temporary zipfile - try: - zipfileobj.close() - except OSError: - pass - # Try to load shapefile - if self.shp or self.dbf: - # Load and exit early - self.load() - return - else: - raise ShapefileException( - "No shp or dbf file found in zipfile: %s" % path - ) - - elif path.startswith("http"): - # Shapefile is from a url - # Download each file to temporary path and treat as normal shapefile path - urlinfo = urlparse(path) - urlpath = urlinfo[2] - urlpath, _ = os.path.splitext(urlpath) - shapefile = os.path.basename(urlpath) - for ext in ["shp", "shx", "dbf"]: - try: - _urlinfo = list(urlinfo) - _urlinfo[2] = urlpath + "." + ext - _path = urlunparse(_urlinfo) - req = Request( - _path, - headers={ - "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" - }, - ) - resp = urlopen(req) - # write url data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile( - mode="w+b", delete=True - ) - fileobj.write(resp.read()) - fileobj.seek(0) - setattr(self, ext, fileobj) - self._files_to_close.append(fileobj) - except HTTPError: - pass - if self.shp or self.dbf: - # Load and exit early - self.load() - return - else: - raise ShapefileException( - "No shp or dbf file found at url: %s" % path - ) - - else: - # Local file path to a shapefile - # Load and exit early - self.load(path) - return - - # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) - if "shp" in kwargs.keys(): - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - # Copy if required - try: - self.shp.seek(0) - except (NameError, io.UnsupportedOperation): - self.shp = io.BytesIO(self.shp.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shp"]) - self.load_shp(baseName) - - if "shx" in kwargs.keys(): - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - # Copy if required - try: - self.shx.seek(0) - except (NameError, io.UnsupportedOperation): - self.shx = io.BytesIO(self.shx.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shx"]) - self.load_shx(baseName) - - if "dbf" in kwargs.keys(): - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - # Copy if required - try: - self.dbf.seek(0) - except (NameError, io.UnsupportedOperation): - self.dbf = io.BytesIO(self.dbf.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["dbf"]) - self.load_dbf(baseName) - - # Load the files - if self.shp or self.dbf: - self.load() - - def __str__(self): - """ - Use some general info on the shapefile as __str__ - """ - info = ["shapefile Reader"] - if self.shp: - info.append( - " {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType] - ) - ) - if self.dbf: - info.append( - " {} records ({} fields)".format(len(self), len(self.fields)) - ) - return "\n".join(info) - - def __enter__(self): - """ - Enter phase of context manager. - """ - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Exit phase of context manager, close opened files. - """ - self.close() - - def __len__(self): - """Returns the number of shapes/records in the shapefile.""" - if self.dbf: - # Preferably use dbf record count - if self.numRecords is None: - self.__dbfHeader() - - return self.numRecords - - elif self.shp: - # Otherwise use shape count - if self.shx: - if self.numShapes is None: - self.__shxHeader() - - return self.numShapes - - else: - # Index file not available, iterate all shapes to get total count - if self.numShapes is None: - # Determine length of shp file - shp = self.shp - checkpoint = shp.tell() - shp.seek(0, 2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until end of file. - unpack = Struct(">2i").unpack - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - pos += 8 + (2 * recLength) - shp.seek(pos) - # Set numShapes and offset indices - self.numShapes = len(offsets) - self._offsets = offsets - # Return to previous file position - shp.seek(checkpoint) - - return self.numShapes - - else: - # No file loaded yet, treat as 'empty' shapefile - return 0 - - def __iter__(self): - """Iterates through the shapes/records in the shapefile.""" - for shaperec in self.iterShapeRecords(): - yield shaperec - - @property - def __geo_interface__(self): - shaperecords = self.shapeRecords() - fcollection = shaperecords.__geo_interface__ - fcollection["bbox"] = list(self.bbox) - return fcollection - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def load(self, shapefile=None): - """Opens a shapefile from a filename or file-like - object. Normally this method would be called by the - constructor with the file name as an argument.""" - if shapefile: - (shapeName, ext) = os.path.splitext(shapefile) - self.shapeName = shapeName - self.load_shp(shapeName) - self.load_shx(shapeName) - self.load_dbf(shapeName) - if not (self.shp or self.dbf): - raise ShapefileException( - "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) - ) - if self.shp: - self.__shpHeader() - if self.dbf: - self.__dbfHeader() - if self.shx: - self.__shxHeader() - - def load_shp(self, shapefile_name): - """ - Attempts to load file with .shp extension as both lower and upper case - """ - shp_ext = "shp" - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") - self._files_to_close.append(self.shp) - except IOError: - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") - self._files_to_close.append(self.shp) - except IOError: - pass - - def load_shx(self, shapefile_name): - """ - Attempts to load file with .shx extension as both lower and upper case - """ - shx_ext = "shx" - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") - self._files_to_close.append(self.shx) - except IOError: - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") - self._files_to_close.append(self.shx) - except IOError: - pass - - def load_dbf(self, shapefile_name): - """ - Attempts to load file with .dbf extension as both lower and upper case - """ - dbf_ext = "dbf" - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") - self._files_to_close.append(self.dbf) - except IOError: - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") - self._files_to_close.append(self.dbf) - except IOError: - pass - - def __del__(self): - self.close() - - def close(self): - # Close any files that the reader opened (but not those given by user) - for attribute in self._files_to_close: - if hasattr(attribute, "close"): - try: - attribute.close() - except IOError: - pass - self._files_to_close = [] - - def __getFileObj(self, f): - """Checks to see if the requested shapefile file object is - available. If not a ShapefileException is raised.""" - if not f: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object." - ) - if self.shp and self.shpLength is None: - self.load() - if self.dbf and len(self.fields) == 0: - self.load() - return f - - def __restrictIndex(self, i): - """Provides list-like handling of a record index with a clearer - error message if the index is out of bounds.""" - if self.numRecords: - rmax = self.numRecords - 1 - if abs(i) > rmax: - raise IndexError("Shape or Record index out of range.") - if i < 0: - i = range(self.numRecords)[i] - return i - - def __shpHeader(self): - """Reads the header information from a .shp file.""" - if not self.shp: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no shp file found" - ) - shp = self.shp - # File length (16-bit word * 2 = bytes) - shp.seek(24) - self.shpLength = unpack(">i", shp.read(4))[0] * 2 - # Shape type - shp.seek(32) - self.shapeType = unpack(" NODATA: - self.mbox.append(m) - else: - self.mbox.append(None) - - def __shape(self, oid=None, bbox=None): - """Returns the header info and geometry for a single shape.""" - f = self.__getFileObj(self.shp) - record = Shape(oid=oid) - nParts = nPoints = zmin = zmax = mmin = mmax = None - (recNum, recLength) = unpack(">2i", f.read(8)) - # Determine the start of the next record - next = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - (mmin, mmax) = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] - # Read a single point - if shapeType in (1, 11, 21): - record.points = [_Array("d", unpack("<2d", f.read(16)))] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - point_bbox = list(record.points[0] + record.points[0]) - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, point_bbox): - f.seek(next) - return None - # Read a single Z value - if shapeType == 11: - record.z = list(unpack("= 8: - (m,) = unpack(" NODATA: - record.m = [m] - else: - record.m = [None] - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - f.seek(next) - return record - - def __shxHeader(self): - """Reads the header information from a .shx file.""" - shx = self.shx - if not shx: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no shx file found" - ) - # File length (16-bit word * 2 = bytes) - header length - shx.seek(24) - shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 - self.numShapes = shxRecordLength // 8 - - def __shxOffsets(self): - """Reads the shape offset positions from a .shx file""" - shx = self.shx - if not shx: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no shx file found" - ) - # Jump to the first record. - shx.seek(100) - # Each index record consists of two nrs, we only want the first one - shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) - if sys.byteorder != "big": - shxRecords.byteswap() - self._offsets = [2 * el for el in shxRecords[::2]] - - def __shapeIndex(self, i=None): - """Returns the offset in a .shp file for a shape based on information - in the .shx index file.""" - shx = self.shx - # Return None if no shx or no index requested - if not shx or i is None: - return None - # At this point, we know the shx file exists - if not self._offsets: - self.__shxOffsets() - return self._offsets[i] - - def shape(self, i=0, bbox=None): - """Returns a shape object for a shape in the geometry - record file. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. - """ - shp = self.__getFileObj(self.shp) - i = self.__restrictIndex(i) - offset = self.__shapeIndex(i) - if not offset: - # Shx index not available. - # Determine length of shp file - shp.seek(0, 2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until the requested index or end of file. - unpack = Struct(">2i").unpack - _i = 0 - offset = shp.tell() - while offset < shpLength: - if _i == i: - # Reached the requested index, exit loop with the offset value - break - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - offset += 8 + (2 * recLength) - shp.seek(offset) - _i += 1 - # If the index was not found, it likely means the .shp file is incomplete - if _i != i: - raise ShapefileException( - "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( - i, _i - ) - ) - - # Seek to the offset and read the shape - shp.seek(offset) - return self.__shape(oid=i, bbox=bbox) - - def shapes(self, bbox=None): - """Returns all shapes in a shapefile. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shapes = Shapes() - shapes.extend(self.iterShapes(bbox=bbox)) - return shapes - - def iterShapes(self, bbox=None): - """Returns a generator of shapes in a shapefile. Useful - for handling large shapefiles. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shp = self.__getFileObj(self.shp) - # Found shapefiles which report incorrect - # shp file length in the header. Can't trust - # that so we seek to the end of the file - # and figure it out. - shp.seek(0, 2) - shpLength = shp.tell() - shp.seek(100) - - if self.numShapes: - # Iterate exactly the number of shapes from shx header - for i in xrange(self.numShapes): - # MAYBE: check if more left of file or exit early? - shape = self.__shape(oid=i, bbox=bbox) - if shape: - yield shape - else: - # No shx file, unknown nr of shapes - # Instead iterate until reach end of file - # Collect the offset indices during iteration - i = 0 - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - shape = self.__shape(oid=i, bbox=bbox) - pos = shp.tell() - if shape: - yield shape - i += 1 - # Entire shp file consumed - # Update the number of shapes and list of offsets - assert i == len(offsets) - self.numShapes = i - self._offsets = offsets - - def __dbfHeader(self): - """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" - if not self.dbf: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" - ) - dbf = self.dbf - # read relevant header parts - dbf.seek(0) - self.numRecords, self.__dbfHdrLength, self.__recordLength = unpack( - " 0: - px, py = list(zip(*s.points))[:2] - x.extend(px) - y.extend(py) - else: - # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. - raise Exception( - "Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." - % s.shapeType - ) - bbox = [min(x), min(y), max(x), max(y)] - # update global - if self._bbox: - # compare with existing - self._bbox = [ - min(bbox[0], self._bbox[0]), - min(bbox[1], self._bbox[1]), - max(bbox[2], self._bbox[2]), - max(bbox[3], self._bbox[3]), - ] - else: - # first time bbox is being set - self._bbox = bbox - return bbox - - def __zbox(self, s): - z = [] - for p in s.points: - try: - z.append(p[2]) - except IndexError: - # point did not have z value - # setting it to 0 is probably ok, since it means all are on the same elevation - z.append(0) - zbox = [min(z), max(z)] - # update global - if self._zbox: - # compare with existing - self._zbox = [min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])] - else: - # first time zbox is being set - self._zbox = zbox - return zbox - - def __mbox(self, s): - mpos = 3 if s.shapeType in (11, 13, 15, 18, 31) else 2 - m = [] - for p in s.points: - try: - if p[mpos] is not None: - # mbox should only be calculated on valid m values - m.append(p[mpos]) - except IndexError: - # point did not have m value so is missing - # mbox should only be calculated on valid m values - pass - if not m: - # only if none of the shapes had m values, should mbox be set to missing m values - m.append(NODATA) - mbox = [min(m), max(m)] - # update global - if self._mbox: - # compare with existing - self._mbox = [min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])] - else: - # first time mbox is being set - self._mbox = mbox - return mbox - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def bbox(self): - """Returns the current bounding box for the shapefile which is - the lower-left and upper-right corners. It does not contain the - elevation or measure extremes.""" - return self._bbox - - def zbox(self): - """Returns the current z extremes for the shapefile.""" - return self._zbox - - def mbox(self): - """Returns the current m extremes for the shapefile.""" - return self._mbox - - def __shapefileHeader(self, fileObj, headerType="shp"): - """Writes the specified header type to the specified file-like object. - Several of the shapefile formats are so similar that a single generic - method to read or write them is warranted.""" - f = self.__getFileObj(fileObj) - f.seek(0) - # File code, Unused bytes - f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) - # File length (Bytes / 2 = 16-bit words) - if headerType == "shp": - f.write(pack(">i", self.__shpFileLength())) - elif headerType == "shx": - f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) - # Version, Shape type - if self.shapeType is None: - self.shapeType = NULL - f.write(pack("<2i", 1000, self.shapeType)) - # The shapefile's bounding box (lower left, upper right) - if self.shapeType != 0: - try: - bbox = self.bbox() - if bbox is None: - # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. - # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. - # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. - # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0, 0, 0, 0] - f.write(pack("<4d", *bbox)) - except error: - raise ShapefileException( - "Failed to write shapefile bounding box. Floats required." - ) - else: - f.write(pack("<4d", 0, 0, 0, 0)) - # Elevation - if self.shapeType in (11, 13, 15, 18): - # Z values are present in Z type - zbox = self.zbox() - if zbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = [0, 0] - else: - # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0, 0] - # Measure - if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): - # M values are present in M or Z type - mbox = self.mbox() - if mbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = [0, 0] - else: - # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0, 0] - # Try writing - try: - f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) - except error: - raise ShapefileException( - "Failed to write shapefile elevation and measure values. Floats required." - ) - - def __dbfHeader(self): - """Writes the dbf header and field descriptors.""" - f = self.__getFileObj(self.dbf) - f.seek(0) - version = 3 - year, month, day = time.localtime()[:3] - year -= 1900 - # Get all fields, ignoring DeletionFlag if specified - fields = [field for field in self.fields if field[0] != "DeletionFlag"] - # Ensure has at least one field - if not fields: - raise ShapefileException( - "Shapefile dbf file must contain at least one field." - ) - numRecs = self.recNum - numFields = len(fields) - headerLength = numFields * 32 + 33 - if headerLength >= 65535: - raise ShapefileException( - "Shapefile dbf header length exceeds maximum length." - ) - recordLength = sum([int(field[2]) for field in fields]) + 1 - header = pack( - "2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if s.shapeType != NULL and s.shapeType != self.shapeType: - raise Exception( - "The shape's type (%s) must match the type of the shapefile (%s)." - % (s.shapeType, self.shapeType) - ) - f.write(pack(" 2 else 0)) for p in s.points] - except error: - raise ShapefileException( - "Failed to write elevation values for record %s. Expected floats." - % self.shpNum - ) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. - if s.shapeType in (13, 15, 18, 23, 25, 28, 31): - try: - f.write(pack("<2d", *self.__mbox(s))) - except error: - raise ShapefileException( - "Failed to write measure extremes for record %s. Expected floats" - % self.shpNum - ) - try: - if hasattr(s, "m"): - # if m values are stored in attribute - f.write( - pack( - "<%sd" % len(s.m), - *[m if m is not None else NODATA for m in s.m], - ) - ) - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 - [ - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) - ) - for p in s.points - ] - except error: - raise ShapefileException( - "Failed to write measure values for record %s. Expected floats" - % self.shpNum - ) - # Write a single point - if s.shapeType in (1, 11, 21): - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException( - "Failed to write point for record %s. Expected floats." - % self.shpNum - ) - # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. - if s.shapeType == 11: - # update the global z box - self.__zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack("i", length)) - f.seek(finish) - return offset, length - - def __shxRecord(self, offset, length): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - try: - f.write(pack(">i", offset // 2)) - except error: - raise ShapefileException( - "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." - ) - f.write(pack(">i", length)) - - def record(self, *recordList, **recordDict): - """Creates a dbf attribute record. You can submit either a sequence of - field values or keyword arguments of field names and values. Before - adding records you must add fields for the record values using the - field() method. If the record values exceed the number of fields the - extra ones won't be added. In the case of using keyword arguments to specify - field/value pairs only fields matching the already registered fields - will be added.""" - # Balance if already not balanced - if self.autoBalance and self.recNum > self.shpNum: - self.balance() - - fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) - if recordList: - record = list(recordList) - while len(record) < fieldCount: - record.append("") - elif recordDict: - record = [] - for field in self.fields: - if field[0] == "DeletionFlag": - continue # ignore deletionflag field in case it was specified - if field[0] in recordDict: - val = recordDict[field[0]] - if val is None: - record.append("") - else: - record.append(val) - else: - record.append("") # need empty value for missing dict entries - else: - # Blank fields for empty record - record = ["" for _ in range(fieldCount)] - self.__dbfRecord(record) - - def __dbfRecord(self, record): - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - if self.recNum == 0: - # first records, so all fields should be set - # allowing us to write the dbf header - # cannot change the fields after this point - self.__dbfHeader() - # first byte of the record is deletion flag, always disabled - f.write(b" ") - # begin - self.recNum += 1 - fields = ( - field for field in self.fields if field[0] != "DeletionFlag" - ) # ignore deletionflag field in case it was specified - for (fieldName, fieldType, size, deci), value in zip(fields, record): - # write - fieldType = fieldType.upper() - size = int(size) - if fieldType in ("N", "F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - value = b"*" * size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - else: - value = float(value) - value = format(value, ".%sf" % deci)[:size].rjust( - size - ) # caps the size if exceeds the field size - elif fieldType == "D": - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - value = "{:04d}{:02d}{:02d}".format( - value.year, value.month, value.day - ) - elif isinstance(value, list) and len(value) == 3: - value = "{:04d}{:02d}{:02d}".format(*value) - elif value in MISSING: - value = b"0" * 8 # QGIS NULL for date type - elif is_string(value) and len(value) == 8: - pass # value is already a date string - else: - raise ShapefileException( - "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." - ) - elif fieldType == "L": - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value in MISSING: - value = b" " # missing is set to space - elif value in [True, 1]: - value = b"T" - elif value in [False, 0]: - value = b"F" - else: - value = b" " # unknown is set to space - else: - # anything else is forced to string, truncated to the length of the field - value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b( - value, "ascii", self.encodingErrors - ) # should be default ascii encoding - if len(value) != size: - raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - " (size %d) into field '%s' (size %d)." - % (len(value), fieldName, size) - ) - f.write(value) - - def balance(self): - """Adds corresponding empty attributes or null geometry records depending - on which type of record was created to make sure all three files - are in synch.""" - while self.recNum > self.shpNum: - self.null() - while self.recNum < self.shpNum: - self.record() - - def null(self): - """Creates a null shape.""" - self.shape(Shape(NULL)) - - def point(self, x, y): - """Creates a POINT shape.""" - shapeType = POINT - pointShape = Shape(shapeType) - pointShape.points.append([x, y]) - self.shape(pointShape) - - def pointm(self, x, y, m=None): - """Creates a POINTM shape. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTM - pointShape = Shape(shapeType) - pointShape.points.append([x, y, m]) - self.shape(pointShape) - - def pointz(self, x, y, z=0, m=None): - """Creates a POINTZ shape. - If the z (elevation) value is not set, it defaults to 0. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTZ - pointShape = Shape(shapeType) - pointShape.points.append([x, y, z, m]) - self.shape(pointShape) - - def multipoint(self, points): - """Creates a MULTIPOINT shape. - Points is a list of xy values.""" - shapeType = MULTIPOINT - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointm(self, points): - """Creates a MULTIPOINTM shape. - Points is a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTM - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointz(self, points): - """Creates a MULTIPOINTZ shape. - Points is a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTZ - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def line(self, lines): - """Creates a POLYLINE shape. - Lines is a collection of lines, each made up of a list of xy values.""" - shapeType = POLYLINE - self._shapeparts(parts=lines, shapeType=shapeType) - - def linem(self, lines): - """Creates a POLYLINEM shape. - Lines is a collection of lines, each made up of a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEM - self._shapeparts(parts=lines, shapeType=shapeType) - - def linez(self, lines): - """Creates a POLYLINEZ shape. - Lines is a collection of lines, each made up of a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEZ - self._shapeparts(parts=lines, shapeType=shapeType) - - def poly(self, polys): - """Creates a POLYGON shape. - Polys is a collection of polygons, each made up of a list of xy values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction.""" - shapeType = POLYGON - self._shapeparts(parts=polys, shapeType=shapeType) - - def polym(self, polys): - """Creates a POLYGONM shape. - Polys is a collection of polygons, each made up of a list of xym values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONM - self._shapeparts(parts=polys, shapeType=shapeType) - - def polyz(self, polys): - """Creates a POLYGONZ shape. - Polys is a collection of polygons, each made up of a list of xyzm values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONZ - self._shapeparts(parts=polys, shapeType=shapeType) - - def multipatch(self, parts, partTypes): - """Creates a MULTIPATCH shape. - Parts is a collection of 3D surface patches, each made up of a list of xyzm values. - PartTypes is a list of types that define each of the surface patches. - The types can be any of the following module constants: TRIANGLE_STRIP, - TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPATCH - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - polyShape.partTypes = partTypes - # write the shape - self.shape(polyShape) - - def _shapeparts(self, parts, shapeType): - """Internal method for adding a shape that has multiple collections of points (parts): - lines, polygons, and multipoint shapes. - """ - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - # Make sure polygon rings (parts) are closed - if shapeType in (5, 15, 25, 31): - for part in parts: - if part[0] != part[-1]: - part.append(part[0]) - # Add points and part indexes - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - # write the shape - self.shape(polyShape) - - def field(self, name, fieldType="C", size="50", decimal=0): - """Adds a dbf field descriptor to the shapefile.""" - if fieldType == "D": - size = "8" - decimal = 0 - elif fieldType == "L": - size = "1" - decimal = 0 - if len(self.fields) >= 2046: - raise ShapefileException( - "Shapefile Writer reached maximum number of fields: 2046." - ) - self.fields.append((name, fieldType, size, decimal)) - - -# Begin Testing -def test(**kwargs): - import doctest - - doctest.NORMALIZE_WHITESPACE = 1 - verbosity = kwargs.get("verbose", 0) - if verbosity == 0: - print("Running doctests...") - - # ignore py2-3 unicode differences - import re - - class Py23DocChecker(doctest.OutputChecker): - def check_output(self, want, got, optionflags): - if sys.version_info[0] == 2: - got = re.sub("u'(.*?)'", "'\\1'", got) - got = re.sub('u"(.*?)"', '"\\1"', got) - res = doctest.OutputChecker.check_output(self, want, got, optionflags) - return res - - def summarize(self): - doctest.OutputChecker.summarize(True) - - # run tests - runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - with open("README.md", "rb") as fobj: - test = doctest.DocTestParser().get_doctest( - string=fobj.read().decode("utf8").replace("\r\n", "\n"), - globs={}, - name="README", - filename="README.md", - lineno=0, - ) - failure_count, test_count = runner.run(test) - - # print results - if verbosity: - runner.summarize(True) - else: - if failure_count == 0: - print("All test passed successfully") - elif failure_count > 0: - runner.summarize(verbosity) - - return failure_count - - -if __name__ == "__main__": - """ - Doctests are contained in the file 'README.md', and are tested using the built-in - testing libraries. - """ - failure_count = test() - sys.exit(failure_count) +""" +shapefile.py +Provides read and write support for ESRI Shapefiles. +authors: jlawheadgeospatialpython.com +maintainer: karim.bahgat.norwaygmail.com +Compatible with Python versions >= 3.9 +""" + +__version__ = "2.3.1" + +import array +from datetime import date +import io +import logging +import os +from struct import pack, unpack, calcsize, error, Struct +import sys +import tempfile +import time +import zipfile + +from urllib.error import HTTPError +from urllib.parse import urlparse, urlunparse +from urllib.request import urlopen, Request + +from typing import Any, Union, ByteString +from collections.abc import Sequence + +# Create named logger +logger = logging.getLogger(__name__) + + +# Module settings +VERBOSE = True + +# Constants for shape types +NULL = 0 +POINT = 1 +POLYLINE = 3 +POLYGON = 5 +MULTIPOINT = 8 +POINTZ = 11 +POLYLINEZ = 13 +POLYGONZ = 15 +MULTIPOINTZ = 18 +POINTM = 21 +POLYLINEM = 23 +POLYGONM = 25 +MULTIPOINTM = 28 +MULTIPATCH = 31 + +SHAPETYPE_LOOKUP = { + 0: "NULL", + 1: "POINT", + 3: "POLYLINE", + 5: "POLYGON", + 8: "MULTIPOINT", + 11: "POINTZ", + 13: "POLYLINEZ", + 15: "POLYGONZ", + 18: "MULTIPOINTZ", + 21: "POINTM", + 23: "POLYLINEM", + 25: "POLYGONM", + 28: "MULTIPOINTM", + 31: "MULTIPATCH", +} + +TRIANGLE_STRIP = 0 +TRIANGLE_FAN = 1 +OUTER_RING = 2 +INNER_RING = 3 +FIRST_RING = 4 +RING = 5 + +PARTTYPE_LOOKUP = { + 0: "TRIANGLE_STRIP", + 1: "TRIANGLE_FAN", + 2: "OUTER_RING", + 3: "INNER_RING", + 4: "FIRST_RING", + 5: "RING", +} + + +xrange = range +izip = zip + +# Helpers + +MISSING = [None, ""] +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. + + +def b(v: Any, encoding="utf-8", encodingErrors="strict") -> bytes: + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + + +def u( + v: Union[bytes, str, None, int, ByteString], + encoding="utf-8", + encodingErrors="strict", +) -> str: + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) + + +def is_string(v: Any) -> bool: + return isinstance(v, str) + + +def pathlike_obj(path: Any) -> Any: + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path + + +# Begin + + +class _Array(array.array): + """Converts python tuples to lists of the appropriate type. + Used to unpack different shapefile header parts.""" + + def __repr__(self): + return str(self.tolist()) + + +Point_T = Sequence[float] +Coords_T = Sequence[Point_T] +BBox_T = tuple[float, float, float, float] + + +def signed_area(coords: Coords_T, fast: bool = False) -> float: + """Return the signed area enclosed by a ring using the linear time + algorithm. A value >= 0 indicates a counter-clockwise oriented ring. + A faster version is possible by setting 'fast' to True, which returns + 2x the area, e.g. if you're only interested in the sign of the area. + """ + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + xs.append(xs[1]) + ys.append(ys[1]) + area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) + if fast: + return area2 + else: + return area2 / 2.0 + + +def is_cw(coords: Coords_T) -> bool: + """Returns True if a polygon ring has clockwise orientation, determined + by a negatively signed area. + """ + area2 = signed_area(coords, fast=True) + return area2 < 0 + + +def rewind(coords: Coords_T) -> list[Point_T]: + """Returns the input coords in reversed order.""" + return list(reversed(coords)) + + +def ring_bbox(coords: Coords_T) -> BBox_T: + """Calculates and returns the bounding box of a ring.""" + xs, ys = zip(*coords) + bbox = min(xs), min(ys), max(xs), max(ys) + return bbox + + +def bbox_overlap(bbox1: BBox_T, bbox2: BBox_T) -> bool: + """Tests whether two bounding boxes overlap, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 + return overlap + + +def bbox_contains(bbox1: BBox_T, bbox2: BBox_T) -> bool: + """Tests whether bbox1 fully contains bbox2, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 + return contains + + +def ring_contains_point(coords: Coords_T, p: Point_T) -> bool: + """Fast point-in-polygon crossings algorithm, MacMartin optimization. + + Adapted from code by Eric Haynes + http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c + + Original description: + Shoot a test ray along +X axis. The strategy, from MacMartin, is to + compare vertex Y values to the testing point's Y and quickly discard + edges which are entirely to one side of the test ray. + """ + tx, ty = p + + # get initial test bit for above/below X axis + vtx0 = coords[0] + yflag0 = vtx0[1] >= ty + + inside_flag = False + for vtx1 in coords[1:]: + yflag1 = vtx1[1] >= ty + # check if endpoints straddle (are on opposite sides) of X axis + # (i.e. the Y's differ); if so, +X ray could intersect this edge. + if yflag0 != yflag1: + xflag0 = vtx0[0] >= tx + # check if endpoints are on same side of the Y axis (i.e. X's + # are the same); if so, it's easy to test if edge hits or misses. + if xflag0 == (vtx1[0] >= tx): + # if edge's X values both right of the point, must hit + if xflag0: + inside_flag = not inside_flag + else: + # compute intersection of pgon segment with +X ray, note + # if >= point's X; if so, the ray hits it. + if ( + vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) + ) >= tx: + inside_flag = not inside_flag + + # move to next pair of vertices, retaining info as possible + yflag0 = yflag1 + vtx0 = vtx1 + + return inside_flag + + +def ring_sample(coords: Coords_T, ccw: bool = False) -> Point_T: + """Return a sample point guaranteed to be within a ring, by efficiently + finding the first centroid of a coordinate triplet whose orientation + matches the orientation of the ring and passes the point-in-ring test. + The orientation of the ring is assumed to be clockwise, unless ccw + (counter-clockwise) is set to True. + """ + triplet = [] + + def itercoords(): + # iterate full closed ring + for p in coords: + yield p + # finally, yield the second coordinate to the end to allow checking the last triplet + yield coords[1] + + for p in itercoords(): + # add point to triplet (but not if duplicate) + if p not in triplet: + triplet.append(p) + + # new triplet, try to get sample + if len(triplet) == 3: + # check that triplet does not form a straight line (not a triangle) + is_straight_line = (triplet[0][1] - triplet[1][1]) * ( + triplet[0][0] - triplet[2][0] + ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + if not is_straight_line: + # get triplet orientation + closed_triplet = triplet + [triplet[0]] + triplet_ccw = not is_cw(closed_triplet) + # check that triplet has the same orientation as the ring (means triangle is inside the ring) + if ccw == triplet_ccw: + # get triplet centroid + xs, ys = zip(*triplet) + xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 + # check that triplet centroid is truly inside the ring + if ring_contains_point(coords, (xmean, ymean)): + return xmean, ymean + + # failed to get sample point from this triplet + # remove oldest triplet coord to allow iterating to next triplet + triplet.pop(0) + + else: + raise Exception("Unexpected error: Unable to find a ring sample point.") + + +def ring_contains_ring(coords1, coords2) -> bool: + """Returns True if all vertexes in coords2 are fully inside coords1.""" + return all((ring_contains_point(coords1, p2) for p2 in coords2)) + + +def organize_polygon_rings(rings, return_errors=None): + """Organize a list of coordinate rings into one or more polygons with holes. + Returns a list of polygons, where each polygon is composed of a single exterior + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. + + Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). + Rings are determined as exteriors if they run in clockwise direction, or interior + holes if they run in counter-clockwise direction. This method is used to construct + GeoJSON (multi)polygons from the shapefile polygon shape type, which does not + explicitly store the structure of the polygons beyond exterior/interior ring orientation. + """ + # first iterate rings and classify as exterior or hole + exteriors = [] + holes = [] + for ring in rings: + # shapefile format defines a polygon as a sequence of rings + # where exterior rings are clockwise, and holes counterclockwise + if is_cw(ring): + # ring is exterior + exteriors.append(ring) + else: + # ring is a hole + holes.append(ring) + + # if only one exterior, then all holes belong to that exterior + if len(exteriors) == 1: + # exit early + poly = [exteriors[0]] + holes + polys = [poly] + return polys + + # multiple exteriors, ie multi-polygon, have to group holes with correct exterior + # shapefile format does not specify which holes belong to which exteriors + # so have to do efficient multi-stage checking of hole-to-exterior containment + elif len(exteriors) > 1: + # exit early if no holes + if not holes: + polys = [] + for ext in exteriors: + poly = [ext] + polys.append(poly) + return polys + + # first determine each hole's candidate exteriors based on simple bbox contains test + hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) + exterior_bboxes = [ring_bbox(ring) for ring in exteriors] + for hole_i in hole_exteriors.keys(): + hole_bbox = ring_bbox(holes[hole_i]) + for ext_i, ext_bbox in enumerate(exterior_bboxes): + if bbox_contains(ext_bbox, hole_bbox): + hole_exteriors[hole_i].append(ext_i) + + # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # get hole sample point + ccw = not is_cw(holes[hole_i]) + hole_sample = ring_sample(holes[hole_i], ccw=ccw) + # collect new exterior candidates + new_exterior_candidates = [] + for ext_i in exterior_candidates: + # check that hole sample point is inside exterior + hole_in_exterior = ring_contains_point( + exteriors[ext_i], hole_sample + ) + if hole_in_exterior: + new_exterior_candidates.append(ext_i) + + # set new exterior candidates + hole_exteriors[hole_i] = new_exterior_candidates + + # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # exterior candidate with the smallest area is the hole's most immediate parent + ext_i = sorted( + exterior_candidates, + key=lambda x: abs(signed_area(exteriors[x], fast=True)), + )[0] + hole_exteriors[hole_i] = [ext_i] + + # separate out holes that are orphaned (not contained by any exterior) + orphan_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + if not exterior_candidates: + orphan_holes.append(hole_i) + del hole_exteriors[hole_i] + continue + + # each hole should now only belong to one exterior, group into exterior-holes polygons + polys = [] + for ext_i, ext in enumerate(exteriors): + poly = [ext] + # find relevant holes + poly_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + # hole is relevant if previously matched with this exterior + if exterior_candidates[0] == ext_i: + poly_holes.append(holes[hole_i]) + poly += poly_holes + polys.append(poly) + + # add orphan holes as exteriors + for hole_i in orphan_holes: + ext = holes[hole_i] + # add as single exterior without any holes + poly = [ext] + polys.append(poly) + + if orphan_holes and return_errors is not None: + return_errors["polygon_orphaned_holes"] = len(orphan_holes) + + return polys + + # no exteriors, be nice and assume due to incorrect winding order + else: + if return_errors is not None: + return_errors["polygon_only_holes"] = len(holes) + exteriors = holes + # add as single exterior without any holes + polys = [[ext] for ext in exteriors] + return polys + + +class Shape: + def __init__( + self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None + ): + """Stores the geometry of the different shape types + specified in the Shapefile spec. Shape types are + usually point, polyline, or polygons. Every shape type + except the "Null" type contains points at some level for + example vertices in a polygon. If a shape type has + multiple shapes containing points within a single + geometry record then those shapes are called parts. Parts + are designated by their starting index in geometry record's + list of shapes. For MultiPatch geometry, partTypes designates + the patch type of each of the parts. + """ + self.shapeType = shapeType + self.points = points or [] + self.parts = parts or [] + if partTypes: + self.partTypes = partTypes + + # and a dict to silently record any errors encountered + self._errors = {} + + # add oid + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + + @property + def __geo_interface__(self): + if self.shapeType in [POINT, POINTM, POINTZ]: + # point + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Point", "coordinates": tuple()} + else: + return {"type": "Point", "coordinates": tuple(self.points[0])} + elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "MultiPoint", "coordinates": []} + else: + # multipoint + return { + "type": "MultiPoint", + "coordinates": [tuple(p) for p in self.points], + } + elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "LineString", "coordinates": []} + elif len(self.parts) == 1: + # linestring + return { + "type": "LineString", + "coordinates": [tuple(p) for p in self.points], + } + else: + # multilinestring + ps = None + coordinates = [] + for part in self.parts: + if ps is None: + ps = part + continue + else: + coordinates.append([tuple(p) for p in self.points[ps:part]]) + ps = part + else: + coordinates.append([tuple(p) for p in self.points[part:]]) + return {"type": "MultiLineString", "coordinates": coordinates} + elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Polygon", "coordinates": []} + else: + # get all polygon rings + rings = [] + for i in xrange(len(self.parts)): + # get indexes of start and end points of the ring + start = self.parts[i] + try: + end = self.parts[i + 1] + except IndexError: + end = len(self.points) + + # extract the points that make up the ring + ring = [tuple(p) for p in self.points[start:end]] + rings.append(ring) + + # organize rings into list of polygons, where each polygon is defined as list of rings. + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( + self.oid + ) + orphans = self._errors.get("polygon_orphaned_holes", None) + if orphans: + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ +but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ +orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ +encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + only_holes = self._errors.get("polygon_only_holes", None) + if only_holes: + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ +but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ +still included but were encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + + # return as geojson + if len(polys) == 1: + return {"type": "Polygon", "coordinates": polys[0]} + else: + return {"type": "MultiPolygon", "coordinates": polys} + + else: + raise Exception( + 'Shape type "%s" cannot be represented as GeoJSON.' + % SHAPETYPE_LOOKUP[self.shapeType] + ) + + @staticmethod + def _from_geojson(geoj): + # create empty shape + shape = Shape() + # set shapeType + geojType = geoj["type"] if geoj else "Null" + if geojType == "Null": + shapeType = NULL + elif geojType == "Point": + shapeType = POINT + elif geojType == "LineString": + shapeType = POLYLINE + elif geojType == "Polygon": + shapeType = POLYGON + elif geojType == "MultiPoint": + shapeType = MULTIPOINT + elif geojType == "MultiLineString": + shapeType = POLYLINE + elif geojType == "MultiPolygon": + shapeType = POLYGON + else: + raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) + shape.shapeType = shapeType + + # set points and parts + if geojType == "Point": + shape.points = [geoj["coordinates"]] + shape.parts = [0] + elif geojType in ("MultiPoint", "LineString"): + shape.points = geoj["coordinates"] + shape.parts = [0] + elif geojType in ("Polygon"): + points = [] + parts = [] + index = 0 + for i, ext_or_hole in enumerate(geoj["coordinates"]): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + elif geojType in ("MultiLineString"): + points = [] + parts = [] + index = 0 + for linestring in geoj["coordinates"]: + points.extend(linestring) + parts.append(index) + index += len(linestring) + shape.points = points + shape.parts = parts + elif geojType in ("MultiPolygon"): + points = [] + parts = [] + index = 0 + for polygon in geoj["coordinates"]: + for i, ext_or_hole in enumerate(polygon): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + return shape + + @property + def oid(self): + """The index position of the shape in the original shapefile""" + return self.__oid + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def __repr__(self): + return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) + + +class _Record(list): + """ + A class to hold a record. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, the values of the record + can also be retrieved using the field's name. For example if the dbf contains + a field ID at position 0, the ID can be retrieved with the position, the field name + as a key, or the field name as an attribute. + + >>> # Create a Record with one field, normally the record is created by the Reader class + >>> r = _Record({'ID': 0}, [0]) + >>> print(r[0]) + >>> print(r['ID']) + >>> print(r.ID) + """ + + def __init__(self, field_positions, values, oid=None): + """ + A Record should be created by the Reader class + + :param field_positions: A dict mapping field names to field positions + :param values: A sequence of values + :param oid: The object id, an int (optional) + """ + self.__field_positions = field_positions + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + list.__init__(self, values) + + def __getattr__(self, item): + """ + __getattr__ is called if an attribute is used that does + not exist in the normal sense. For example r=Record(...), r.ID + calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) + :param item: The field name, used as attribute + :return: Value of the field + :raises: AttributeError, if item is not a field of the shapefile + and IndexError, if the field exists but the field's + corresponding value in the Record does not exist + """ + try: + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError("_Record does not implement __setstate__") + index = self.__field_positions[item] + return list.__getitem__(self, index) + except KeyError: + raise AttributeError("{} is not a field name".format(item)) + except IndexError: + raise IndexError( + "{} found as a field but not enough values available.".format(item) + ) + + def __setattr__(self, key, value): + """ + Sets a value of a field attribute + :param key: The field name + :param value: the value of that field + :return: None + :raises: AttributeError, if key is not a field of the shapefile + """ + if key.startswith("_"): # Prevent infinite loop when setting mangled attribute + return list.__setattr__(self, key, value) + try: + index = self.__field_positions[key] + return list.__setitem__(self, index, value) + except KeyError: + raise AttributeError("{} is not a field name".format(key)) + + def __getitem__(self, item): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID'], r[0] + :param item: Either the position of the value or the name of a field + :return: the value of the field + """ + try: + return list.__getitem__(self, item) + except TypeError: + try: + index = self.__field_positions[item] + except KeyError: + index = None + if index is not None: + return list.__getitem__(self, index) + else: + raise IndexError('"{}" is not a field name and not an int'.format(item)) + + def __setitem__(self, key, value): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID']=2, r[0]=2 + :param key: Either the position of the value or the name of a field + :param value: the new value of the field + """ + try: + return list.__setitem__(self, key, value) + except TypeError: + index = self.__field_positions.get(key) + if index is not None: + return list.__setitem__(self, index, value) + else: + raise IndexError("{} is not a field name and not an int".format(key)) + + @property + def oid(self): + """The index position of the record in the original shapefile""" + return self.__oid + + def as_dict(self, date_strings=False): + """ + Returns this Record as a dictionary using the field names as keys + :return: dict + """ + dct = dict((f, self[i]) for f, i in self.__field_positions.items()) + if date_strings: + for k, v in dct.items(): + if isinstance(v, date): + dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) + return dct + + def __repr__(self): + return "Record #{}: {}".format(self.__oid, list(self)) + + def __dir__(self): + """ + Helps to show the field names in an interactive environment like IPython. + See: http://ipython.readthedocs.io/en/stable/config/integrating.html + + :return: List of method names and fields + """ + default = list( + dir(type(self)) + ) # default list methods and attributes of this class + fnames = list( + self.__field_positions.keys() + ) # plus field names (random order if Python version < 3.6) + return default + fnames + + +class ShapeRecord: + """A ShapeRecord object containing a shape along with its attributes. + Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" + + def __init__(self, shape=None, record=None): + self.shape = shape + self.record = record + + @property + def __geo_interface__(self): + return { + "type": "Feature", + "properties": self.record.as_dict(date_strings=True), + "geometry": None + if self.shape.shapeType == NULL + else self.shape.__geo_interface__, + } + + +class Shapes(list): + """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a GeometryCollection dictionary.""" + + def __repr__(self): + return "Shapes: {}".format(list(self)) + + @property + def __geo_interface__(self): + # Note: currently this will fail if any of the shapes are null-geometries + # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords + collection = { + "type": "GeometryCollection", + "geometries": [shape.__geo_interface__ for shape in self], + } + return collection + + +class ShapeRecords(list): + """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a FeatureCollection dictionary.""" + + def __repr__(self): + return "ShapeRecords: {}".format(list(self)) + + @property + def __geo_interface__(self): + collection = { + "type": "FeatureCollection", + "features": [shaperec.__geo_interface__ for shaperec in self], + } + return collection + + +class ShapefileException(Exception): + """An exception to handle shapefile specific problems.""" + + pass + + +class Reader: + """Reads the three files of a shapefile as a unit or + separately. If one of the three files (.shp, .shx, + .dbf) is missing no exception is thrown until you try + to call a method that depends on that particular file. + The .shx index file is used if available for efficiency + but is not required to read the geometry from the .shp + file. The "shapefile" argument in the constructor is the + name of the file you want to open, and can be the path + to a shapefile on a local filesystem, inside a zipfile, + or a url. + + You can instantiate a Reader without specifying a shapefile + and then specify one later with the load() method. + + Only the shapefile headers are read upon loading. Content + within each file is only accessed when required and as + efficiently as possible. Shapefiles are usually not large + but they can be. + """ + + def __init__(self, *args, **kwargs): + self.shp = None + self.shx = None + self.dbf = None + self._files_to_close = [] + self.shapeName = "Not specified" + self._offsets = [] + self.shpLength = None + self.numRecords = None + self.numShapes = None + self.fields = [] + self.__dbfHdrLength = 0 + self.__fieldLookup = {} + self.encoding = kwargs.pop("encoding", "utf-8") + self.encodingErrors = kwargs.pop("encodingErrors", "strict") + # See if a shapefile name was passed as the first argument + if len(args) > 0: + path = pathlike_obj(args[0]) + if is_string(path): + if ".zip" in path: + # Shapefile is inside a zipfile + if path.count(".zip") > 1: + # Multiple nested zipfiles + raise ShapefileException( + "Reading from multiple nested zipfiles is not supported: %s" + % path + ) + # Split into zipfile and shapefile paths + if path.endswith(".zip"): + zpath = path + shapefile = None + else: + zpath = path[: path.find(".zip") + 4] + shapefile = path[path.find(".zip") + 4 + 1 :] + # Create a zip file handle + if zpath.startswith("http"): + # Zipfile is from a url + # Download to a temporary url and treat as normal zipfile + req = Request( + zpath, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected + zipfileobj = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".zip", delete=True + ) + zipfileobj.write(resp.read()) + zipfileobj.seek(0) + else: + # Zipfile is from a file + zipfileobj = open(zpath, mode="rb") + # Open the zipfile archive + with zipfile.ZipFile(zipfileobj, "r") as archive: + if not shapefile: + # Only the zipfile path is given + # Inspect zipfile contents to find the full shapefile path + shapefiles = [ + name + for name in archive.namelist() + if (name.endswith(".SHP") or name.endswith(".shp")) + ] + # The zipfile must contain exactly one shapefile + if len(shapefiles) == 0: + raise ShapefileException( + "Zipfile does not contain any shapefiles" + ) + elif len(shapefiles) == 1: + shapefile = shapefiles[0] + else: + raise ShapefileException( + "Zipfile contains more than one shapefile: %s. Please specify the full \ + path to the shapefile you would like to open." + % shapefiles + ) + # Try to extract file-like objects from zipfile + shapefile = os.path.splitext(shapefile)[ + 0 + ] # root shapefile name + for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: + try: + member = archive.open(shapefile + "." + ext) + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, ext.lower(), fileobj) + self._files_to_close.append(fileobj) + + except BaseException: + pass + # Close and delete the temporary zipfile + try: + zipfileobj.close() + except OSError: + pass + # Try to load shapefile + if self.shp or self.dbf: + # Load and exit early + self.load() + return + else: + raise ShapefileException( + "No shp or dbf file found in zipfile: %s" % path + ) + + elif path.startswith("http"): + # Shapefile is from a url + # Download each file to temporary path and treat as normal shapefile path + urlinfo = urlparse(path) + urlpath = urlinfo[2] + urlpath, _ = os.path.splitext(urlpath) + shapefile = os.path.basename(urlpath) + for ext in ["shp", "shx", "dbf"]: + try: + _urlinfo = list(urlinfo) + _urlinfo[2] = urlpath + "." + ext + _path = urlunparse(_urlinfo) + req = Request( + _path, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write url data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(resp.read()) + fileobj.seek(0) + setattr(self, ext, fileobj) + self._files_to_close.append(fileobj) + except HTTPError: + pass + if self.shp or self.dbf: + # Load and exit early + self.load() + return + else: + raise ShapefileException( + "No shp or dbf file found at url: %s" % path + ) + + else: + # Local file path to a shapefile + # Load and exit early + self.load(path) + return + + # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) + if "shp" in kwargs.keys(): + if hasattr(kwargs["shp"], "read"): + self.shp = kwargs["shp"] + # Copy if required + try: + self.shp.seek(0) + except (NameError, io.UnsupportedOperation): + self.shp = io.BytesIO(self.shp.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shp"]) + self.load_shp(baseName) + + if "shx" in kwargs.keys(): + if hasattr(kwargs["shx"], "read"): + self.shx = kwargs["shx"] + # Copy if required + try: + self.shx.seek(0) + except (NameError, io.UnsupportedOperation): + self.shx = io.BytesIO(self.shx.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shx"]) + self.load_shx(baseName) + + if "dbf" in kwargs.keys(): + if hasattr(kwargs["dbf"], "read"): + self.dbf = kwargs["dbf"] + # Copy if required + try: + self.dbf.seek(0) + except (NameError, io.UnsupportedOperation): + self.dbf = io.BytesIO(self.dbf.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["dbf"]) + self.load_dbf(baseName) + + # Load the files + if self.shp or self.dbf: + self.load() + + def __str__(self): + """ + Use some general info on the shapefile as __str__ + """ + info = ["shapefile Reader"] + if self.shp: + info.append( + " {} shapes (type '{}')".format( + len(self), SHAPETYPE_LOOKUP[self.shapeType] + ) + ) + if self.dbf: + info.append( + " {} records ({} fields)".format(len(self), len(self.fields)) + ) + return "\n".join(info) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, close opened files. + """ + self.close() + + def __len__(self): + """Returns the number of shapes/records in the shapefile.""" + if self.dbf: + # Preferably use dbf record count + if self.numRecords is None: + self.__dbfHeader() + + return self.numRecords + + elif self.shp: + # Otherwise use shape count + if self.shx: + if self.numShapes is None: + self.__shxHeader() + + return self.numShapes + + else: + # Index file not available, iterate all shapes to get total count + if self.numShapes is None: + # Determine length of shp file + shp = self.shp + checkpoint = shp.tell() + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until end of file. + unpack = Struct(">2i").unpack + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + pos += 8 + (2 * recLength) + shp.seek(pos) + # Set numShapes and offset indices + self.numShapes = len(offsets) + self._offsets = offsets + # Return to previous file position + shp.seek(checkpoint) + + return self.numShapes + + else: + # No file loaded yet, treat as 'empty' shapefile + return 0 + + def __iter__(self): + """Iterates through the shapes/records in the shapefile.""" + for shaperec in self.iterShapeRecords(): + yield shaperec + + @property + def __geo_interface__(self): + shaperecords = self.shapeRecords() + fcollection = shaperecords.__geo_interface__ + fcollection["bbox"] = list(self.bbox) + return fcollection + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def load(self, shapefile=None): + """Opens a shapefile from a filename or file-like + object. Normally this method would be called by the + constructor with the file name as an argument.""" + if shapefile: + (shapeName, ext) = os.path.splitext(shapefile) + self.shapeName = shapeName + self.load_shp(shapeName) + self.load_shx(shapeName) + self.load_dbf(shapeName) + if not (self.shp or self.dbf): + raise ShapefileException( + "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) + ) + if self.shp: + self.__shpHeader() + if self.dbf: + self.__dbfHeader() + if self.shx: + self.__shxHeader() + + def load_shp(self, shapefile_name): + """ + Attempts to load file with .shp extension as both lower and upper case + """ + shp_ext = "shp" + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") + self._files_to_close.append(self.shp) + except IOError: + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") + self._files_to_close.append(self.shp) + except IOError: + pass + + def load_shx(self, shapefile_name): + """ + Attempts to load file with .shx extension as both lower and upper case + """ + shx_ext = "shx" + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") + self._files_to_close.append(self.shx) + except IOError: + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") + self._files_to_close.append(self.shx) + except IOError: + pass + + def load_dbf(self, shapefile_name): + """ + Attempts to load file with .dbf extension as both lower and upper case + """ + dbf_ext = "dbf" + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") + self._files_to_close.append(self.dbf) + except IOError: + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") + self._files_to_close.append(self.dbf) + except IOError: + pass + + def __del__(self): + self.close() + + def close(self): + # Close any files that the reader opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, "close"): + try: + attribute.close() + except IOError: + pass + self._files_to_close = [] + + def __getFileObj(self, f): + """Checks to see if the requested shapefile file object is + available. If not a ShapefileException is raised.""" + if not f: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object." + ) + if self.shp and self.shpLength is None: + self.load() + if self.dbf and len(self.fields) == 0: + self.load() + return f + + def __restrictIndex(self, i): + """Provides list-like handling of a record index with a clearer + error message if the index is out of bounds.""" + if self.numRecords: + rmax = self.numRecords - 1 + if abs(i) > rmax: + raise IndexError("Shape or Record index out of range.") + if i < 0: + i = range(self.numRecords)[i] + return i + + def __shpHeader(self): + """Reads the header information from a .shp file.""" + if not self.shp: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shp file found" + ) + shp = self.shp + # File length (16-bit word * 2 = bytes) + shp.seek(24) + self.shpLength = unpack(">i", shp.read(4))[0] * 2 + # Shape type + shp.seek(32) + self.shapeType = unpack(" NODATA: + self.mbox.append(m) + else: + self.mbox.append(None) + + def __shape(self, oid=None, bbox=None): + """Returns the header info and geometry for a single shape.""" + f = self.__getFileObj(self.shp) + record = Shape(oid=oid) + nParts = nPoints = zmin = zmax = mmin = mmax = None + (recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next = f.tell() + (2 * recLength) + shapeType = unpack("= 16: + (mmin, mmax) = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + # Read a single point + if shapeType in (1, 11, 21): + record.points = [_Array("d", unpack("<2d", f.read(16)))] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + point_bbox = list(record.points[0] + record.points[0]) + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, point_bbox): + f.seek(next) + return None + # Read a single Z value + if shapeType == 11: + record.z = list(unpack("= 8: + (m,) = unpack(" NODATA: + record.m = [m] + else: + record.m = [None] + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + f.seek(next) + return record + + def __shxHeader(self): + """Reads the header information from a .shx file.""" + shx = self.shx + if not shx: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) + # File length (16-bit word * 2 = bytes) - header length + shx.seek(24) + shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 + self.numShapes = shxRecordLength // 8 + + def __shxOffsets(self): + """Reads the shape offset positions from a .shx file""" + shx = self.shx + if not shx: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) + # Jump to the first record. + shx.seek(100) + # Each index record consists of two nrs, we only want the first one + shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) + if sys.byteorder != "big": + shxRecords.byteswap() + self._offsets = [2 * el for el in shxRecords[::2]] + + def __shapeIndex(self, i=None): + """Returns the offset in a .shp file for a shape based on information + in the .shx index file.""" + shx = self.shx + # Return None if no shx or no index requested + if not shx or i is None: + return None + # At this point, we know the shx file exists + if not self._offsets: + self.__shxOffsets() + return self._offsets[i] + + def shape(self, i=0, bbox=None): + """Returns a shape object for a shape in the geometry + record file. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + shp = self.__getFileObj(self.shp) + i = self.__restrictIndex(i) + offset = self.__shapeIndex(i) + if not offset: + # Shx index not available. + # Determine length of shp file + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until the requested index or end of file. + unpack = Struct(">2i").unpack + _i = 0 + offset = shp.tell() + while offset < shpLength: + if _i == i: + # Reached the requested index, exit loop with the offset value + break + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + offset += 8 + (2 * recLength) + shp.seek(offset) + _i += 1 + # If the index was not found, it likely means the .shp file is incomplete + if _i != i: + raise ShapefileException( + "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( + i, _i + ) + ) + + # Seek to the offset and read the shape + shp.seek(offset) + return self.__shape(oid=i, bbox=bbox) + + def shapes(self, bbox=None): + """Returns all shapes in a shapefile. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shapes = Shapes() + shapes.extend(self.iterShapes(bbox=bbox)) + return shapes + + def iterShapes(self, bbox=None): + """Returns a generator of shapes in a shapefile. Useful + for handling large shapefiles. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shp = self.__getFileObj(self.shp) + # Found shapefiles which report incorrect + # shp file length in the header. Can't trust + # that so we seek to the end of the file + # and figure it out. + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + + if self.numShapes: + # Iterate exactly the number of shapes from shx header + for i in xrange(self.numShapes): + # MAYBE: check if more left of file or exit early? + shape = self.__shape(oid=i, bbox=bbox) + if shape: + yield shape + else: + # No shx file, unknown nr of shapes + # Instead iterate until reach end of file + # Collect the offset indices during iteration + i = 0 + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + shape = self.__shape(oid=i, bbox=bbox) + pos = shp.tell() + if shape: + yield shape + i += 1 + # Entire shp file consumed + # Update the number of shapes and list of offsets + assert i == len(offsets) + self.numShapes = i + self._offsets = offsets + + def __dbfHeader(self): + """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" + if not self.dbf: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" + ) + dbf = self.dbf + # read relevant header parts + dbf.seek(0) + self.numRecords, self.__dbfHdrLength, self.__recordLength = unpack( + " 0: + px, py = list(zip(*s.points))[:2] + x.extend(px) + y.extend(py) + else: + # this should not happen. + # any shape that is not null should have at least one point, and only those should be sent here. + # could also mean that earlier code failed to add points to a non-null shape. + raise Exception( + "Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." + % s.shapeType + ) + bbox = [min(x), min(y), max(x), max(y)] + # update global + if self._bbox: + # compare with existing + self._bbox = [ + min(bbox[0], self._bbox[0]), + min(bbox[1], self._bbox[1]), + max(bbox[2], self._bbox[2]), + max(bbox[3], self._bbox[3]), + ] + else: + # first time bbox is being set + self._bbox = bbox + return bbox + + def __zbox(self, s): + z = [] + for p in s.points: + try: + z.append(p[2]) + except IndexError: + # point did not have z value + # setting it to 0 is probably ok, since it means all are on the same elevation + z.append(0) + zbox = [min(z), max(z)] + # update global + if self._zbox: + # compare with existing + self._zbox = [min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])] + else: + # first time zbox is being set + self._zbox = zbox + return zbox + + def __mbox(self, s): + mpos = 3 if s.shapeType in (11, 13, 15, 18, 31) else 2 + m = [] + for p in s.points: + try: + if p[mpos] is not None: + # mbox should only be calculated on valid m values + m.append(p[mpos]) + except IndexError: + # point did not have m value so is missing + # mbox should only be calculated on valid m values + pass + if not m: + # only if none of the shapes had m values, should mbox be set to missing m values + m.append(NODATA) + mbox = [min(m), max(m)] + # update global + if self._mbox: + # compare with existing + self._mbox = [min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])] + else: + # first time mbox is being set + self._mbox = mbox + return mbox + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def bbox(self): + """Returns the current bounding box for the shapefile which is + the lower-left and upper-right corners. It does not contain the + elevation or measure extremes.""" + return self._bbox + + def zbox(self): + """Returns the current z extremes for the shapefile.""" + return self._zbox + + def mbox(self): + """Returns the current m extremes for the shapefile.""" + return self._mbox + + def __shapefileHeader(self, fileObj, headerType="shp"): + """Writes the specified header type to the specified file-like object. + Several of the shapefile formats are so similar that a single generic + method to read or write them is warranted.""" + f = self.__getFileObj(fileObj) + f.seek(0) + # File code, Unused bytes + f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) + # File length (Bytes / 2 = 16-bit words) + if headerType == "shp": + f.write(pack(">i", self.__shpFileLength())) + elif headerType == "shx": + f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) + # Version, Shape type + if self.shapeType is None: + self.shapeType = NULL + f.write(pack("<2i", 1000, self.shapeType)) + # The shapefile's bounding box (lower left, upper right) + if self.shapeType != 0: + try: + bbox = self.bbox() + if bbox is None: + # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. + # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. + # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. + # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. + bbox = [0, 0, 0, 0] + f.write(pack("<4d", *bbox)) + except error: + raise ShapefileException( + "Failed to write shapefile bounding box. Floats required." + ) + else: + f.write(pack("<4d", 0, 0, 0, 0)) + # Elevation + if self.shapeType in (11, 13, 15, 18): + # Z values are present in Z type + zbox = self.zbox() + if zbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + zbox = [0, 0] + else: + # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s + zbox = [0, 0] + # Measure + if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): + # M values are present in M or Z type + mbox = self.mbox() + if mbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + mbox = [0, 0] + else: + # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s + mbox = [0, 0] + # Try writing + try: + f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) + except error: + raise ShapefileException( + "Failed to write shapefile elevation and measure values. Floats required." + ) + + def __dbfHeader(self): + """Writes the dbf header and field descriptors.""" + f = self.__getFileObj(self.dbf) + f.seek(0) + version = 3 + year, month, day = time.localtime()[:3] + year -= 1900 + # Get all fields, ignoring DeletionFlag if specified + fields = [field for field in self.fields if field[0] != "DeletionFlag"] + # Ensure has at least one field + if not fields: + raise ShapefileException( + "Shapefile dbf file must contain at least one field." + ) + numRecs = self.recNum + numFields = len(fields) + headerLength = numFields * 32 + 33 + if headerLength >= 65535: + raise ShapefileException( + "Shapefile dbf header length exceeds maximum length." + ) + recordLength = sum([int(field[2]) for field in fields]) + 1 + header = pack( + "2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if s.shapeType != NULL and s.shapeType != self.shapeType: + raise Exception( + "The shape's type (%s) must match the type of the shapefile (%s)." + % (s.shapeType, self.shapeType) + ) + f.write(pack(" 2 else 0)) for p in s.points] + except error: + raise ShapefileException( + "Failed to write elevation values for record %s. Expected floats." + % self.shpNum + ) + # Write m extremes and values + # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA + # Note: missing m values are autoset to NODATA. + if s.shapeType in (13, 15, 18, 23, 25, 28, 31): + try: + f.write(pack("<2d", *self.__mbox(s))) + except error: + raise ShapefileException( + "Failed to write measure extremes for record %s. Expected floats" + % self.shpNum + ) + try: + if hasattr(s, "m"): + # if m values are stored in attribute + f.write( + pack( + "<%sd" % len(s.m), + *[m if m is not None else NODATA for m in s.m], + ) + ) + else: + # if m values are stored as 3rd/4th dimension + # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) + mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 + [ + f.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) + ) + for p in s.points + ] + except error: + raise ShapefileException( + "Failed to write measure values for record %s. Expected floats" + % self.shpNum + ) + # Write a single point + if s.shapeType in (1, 11, 21): + try: + f.write(pack("<2d", s.points[0][0], s.points[0][1])) + except error: + raise ShapefileException( + "Failed to write point for record %s. Expected floats." + % self.shpNum + ) + # Write a single Z value + # Note: missing z values are autoset to 0, but not sure if this is ideal. + if s.shapeType == 11: + # update the global z box + self.__zbox(s) + # then write value + if hasattr(s, "z"): + # if z values are stored in attribute + try: + if not s.z: + s.z = (0,) + f.write(pack("i", length)) + f.seek(finish) + return offset, length + + def __shxRecord(self, offset, length): + """Writes the shx records.""" + f = self.__getFileObj(self.shx) + try: + f.write(pack(">i", offset // 2)) + except error: + raise ShapefileException( + "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." + ) + f.write(pack(">i", length)) + + def record(self, *recordList, **recordDict): + """Creates a dbf attribute record. You can submit either a sequence of + field values or keyword arguments of field names and values. Before + adding records you must add fields for the record values using the + field() method. If the record values exceed the number of fields the + extra ones won't be added. In the case of using keyword arguments to specify + field/value pairs only fields matching the already registered fields + will be added.""" + # Balance if already not balanced + if self.autoBalance and self.recNum > self.shpNum: + self.balance() + + fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) + if recordList: + record = list(recordList) + while len(record) < fieldCount: + record.append("") + elif recordDict: + record = [] + for field in self.fields: + if field[0] == "DeletionFlag": + continue # ignore deletionflag field in case it was specified + if field[0] in recordDict: + val = recordDict[field[0]] + if val is None: + record.append("") + else: + record.append(val) + else: + record.append("") # need empty value for missing dict entries + else: + # Blank fields for empty record + record = ["" for _ in range(fieldCount)] + self.__dbfRecord(record) + + def __dbfRecord(self, record): + """Writes the dbf records.""" + f = self.__getFileObj(self.dbf) + if self.recNum == 0: + # first records, so all fields should be set + # allowing us to write the dbf header + # cannot change the fields after this point + self.__dbfHeader() + # first byte of the record is deletion flag, always disabled + f.write(b" ") + # begin + self.recNum += 1 + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified + for (fieldName, fieldType, size, deci), value in zip(fields, record): + # write + fieldType = fieldType.upper() + size = int(size) + if fieldType in ("N", "F"): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + value = b"*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + value = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + value = float(value) + value = format(value, ".%sf" % deci)[:size].rjust( + size + ) # caps the size if exceeds the field size + elif fieldType == "D": + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + value = "{:04d}{:02d}{:02d}".format( + value.year, value.month, value.day + ) + elif isinstance(value, list) and len(value) == 3: + value = "{:04d}{:02d}{:02d}".format(*value) + elif value in MISSING: + value = b"0" * 8 # QGIS NULL for date type + elif is_string(value) and len(value) == 8: + pass # value is already a date string + else: + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value in MISSING: + value = b" " # missing is set to space + elif value in [True, 1]: + value = b"T" + elif value in [False, 0]: + value = b"F" + else: + value = b" " # unknown is set to space + else: + # anything else is forced to string, truncated to the length of the field + value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) + if not isinstance(value, bytes): + # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) + value = b( + value, "ascii", self.encodingErrors + ) # should be default ascii encoding + if len(value) != size: + raise ShapefileException( + "Shapefile Writer unable to pack incorrect sized value" + " (size %d) into field '%s' (size %d)." + % (len(value), fieldName, size) + ) + f.write(value) + + def balance(self): + """Adds corresponding empty attributes or null geometry records depending + on which type of record was created to make sure all three files + are in synch.""" + while self.recNum > self.shpNum: + self.null() + while self.recNum < self.shpNum: + self.record() + + def null(self): + """Creates a null shape.""" + self.shape(Shape(NULL)) + + def point(self, x, y): + """Creates a POINT shape.""" + shapeType = POINT + pointShape = Shape(shapeType) + pointShape.points.append([x, y]) + self.shape(pointShape) + + def pointm(self, x, y, m=None): + """Creates a POINTM shape. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTM + pointShape = Shape(shapeType) + pointShape.points.append([x, y, m]) + self.shape(pointShape) + + def pointz(self, x, y, z=0, m=None): + """Creates a POINTZ shape. + If the z (elevation) value is not set, it defaults to 0. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTZ + pointShape = Shape(shapeType) + pointShape.points.append([x, y, z, m]) + self.shape(pointShape) + + def multipoint(self, points): + """Creates a MULTIPOINT shape. + Points is a list of xy values.""" + shapeType = MULTIPOINT + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointm(self, points): + """Creates a MULTIPOINTM shape. + Points is a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTM + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointz(self, points): + """Creates a MULTIPOINTZ shape. + Points is a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTZ + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def line(self, lines): + """Creates a POLYLINE shape. + Lines is a collection of lines, each made up of a list of xy values.""" + shapeType = POLYLINE + self._shapeparts(parts=lines, shapeType=shapeType) + + def linem(self, lines): + """Creates a POLYLINEM shape. + Lines is a collection of lines, each made up of a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEM + self._shapeparts(parts=lines, shapeType=shapeType) + + def linez(self, lines): + """Creates a POLYLINEZ shape. + Lines is a collection of lines, each made up of a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEZ + self._shapeparts(parts=lines, shapeType=shapeType) + + def poly(self, polys): + """Creates a POLYGON shape. + Polys is a collection of polygons, each made up of a list of xy values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction.""" + shapeType = POLYGON + self._shapeparts(parts=polys, shapeType=shapeType) + + def polym(self, polys): + """Creates a POLYGONM shape. + Polys is a collection of polygons, each made up of a list of xym values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONM + self._shapeparts(parts=polys, shapeType=shapeType) + + def polyz(self, polys): + """Creates a POLYGONZ shape. + Polys is a collection of polygons, each made up of a list of xyzm values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONZ + self._shapeparts(parts=polys, shapeType=shapeType) + + def multipatch(self, parts, partTypes): + """Creates a MULTIPATCH shape. + Parts is a collection of 3D surface patches, each made up of a list of xyzm values. + PartTypes is a list of types that define each of the surface patches. + The types can be any of the following module constants: TRIANGLE_STRIP, + TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPATCH + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + polyShape.partTypes = partTypes + # write the shape + self.shape(polyShape) + + def _shapeparts(self, parts, shapeType): + """Internal method for adding a shape that has multiple collections of points (parts): + lines, polygons, and multipoint shapes. + """ + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + # Make sure polygon rings (parts) are closed + if shapeType in (5, 15, 25, 31): + for part in parts: + if part[0] != part[-1]: + part.append(part[0]) + # Add points and part indexes + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + # write the shape + self.shape(polyShape) + + def field(self, name, fieldType="C", size="50", decimal=0): + """Adds a dbf field descriptor to the shapefile.""" + if fieldType == "D": + size = "8" + decimal = 0 + elif fieldType == "L": + size = "1" + decimal = 0 + if len(self.fields) >= 2046: + raise ShapefileException( + "Shapefile Writer reached maximum number of fields: 2046." + ) + self.fields.append((name, fieldType, size, decimal)) + + +# Begin Testing +def test(**kwargs): + import doctest + + doctest.NORMALIZE_WHITESPACE = 1 + verbosity = kwargs.get("verbose", 0) + if verbosity == 0: + print("Running doctests...") + + # ignore py2-3 unicode differences + import re + + class Py23DocChecker(doctest.OutputChecker): + def check_output(self, want, got, optionflags): + if sys.version_info[0] == 2: + got = re.sub("u'(.*?)'", "'\\1'", got) + got = re.sub('u"(.*?)"', '"\\1"', got) + res = doctest.OutputChecker.check_output(self, want, got, optionflags) + return res + + def summarize(self): + doctest.OutputChecker.summarize(True) + + # run tests + runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) + with open("README.md", "rb") as fobj: + test = doctest.DocTestParser().get_doctest( + string=fobj.read().decode("utf8").replace("\r\n", "\n"), + globs={}, + name="README", + filename="README.md", + lineno=0, + ) + failure_count, test_count = runner.run(test) + + # print results + if verbosity: + runner.summarize(True) + else: + if failure_count == 0: + print("All test passed successfully") + elif failure_count > 0: + runner.summarize(verbosity) + + return failure_count + + +if __name__ == "__main__": + """ + Doctests are contained in the file 'README.md', and are tested using the built-in + testing libraries. + """ + failure_count = test() + sys.exit(failure_count) From cd35609c173694b5229044e073e13de007bc63e7 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:21:08 +0100 Subject: [PATCH 19/40] Try running the pre-commit hooks in CI (build.yml) --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5387f4b..ebe0dc7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,6 +29,7 @@ jobs: steps: - uses: actions/checkout@v4 + - uses: pre-commit/action@v3.0.1 - name: Install dependencies run: | python -m pip install --upgrade pip From 3089d36655dd849f93fefdd8bf26687608fd7c37 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:22:29 +0100 Subject: [PATCH 20/40] Revert "Try running the pre-commit hooks in CI (build.yml)" This reverts commit cd35609c173694b5229044e073e13de007bc63e7. --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ebe0dc7..5387f4b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,6 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: pre-commit/action@v3.0.1 - name: Install dependencies run: | python -m pip install --upgrade pip From ecceae26498486be4ad2415eb52d96a2eb122d0d Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:27:03 +0100 Subject: [PATCH 21/40] Use requirements.test.txt to install Pytest. Install and run pre-commit hooks manually. --- .github/workflows/build.yml | 7 ++++++- requirements.test.txt | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5387f4b..3519d37 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -32,8 +32,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install pytest if [ -f requirements.test.txt ]; then pip install -r requirements.test.txt; fi + - name: Install pre-commit hooks + run: | + pre-commit install + - name: Run pre-commit hooks + run: | + pre-commit run --all-files - name: Test with doctest run: | python shapefile.py diff --git a/requirements.test.txt b/requirements.test.txt index 27472ef..0faff64 100644 --- a/requirements.test.txt +++ b/requirements.test.txt @@ -1,2 +1,3 @@ pytest setuptools +pre-commit \ No newline at end of file From 3194cd8b8d0c5b06c414356967a577f4c5272b45 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:35:19 +0100 Subject: [PATCH 22/40] Install Git --- .github/workflows/build.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3519d37..7950738 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,6 +29,12 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Install Git + run: | + apt-get update + apt-get install -y --no-install-recommends git + apt-get purge -y --auto-remove + rm -rf /var/lib/apt/lists/* - name: Install dependencies run: | python -m pip install --upgrade pip From c9918271646d5ec8bedabefa08e5fed5eb29cc10 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:36:44 +0100 Subject: [PATCH 23/40] Revert "Install Git" This reverts commit 3194cd8b8d0c5b06c414356967a577f4c5272b45. --- .github/workflows/build.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7950738..3519d37 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,12 +29,6 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install Git - run: | - apt-get update - apt-get install -y --no-install-recommends git - apt-get purge -y --auto-remove - rm -rf /var/lib/apt/lists/* - name: Install dependencies run: | python -m pip install --upgrade pip From 0d33f93f7bd1702137b1df4c9aca74131c7ec4ca Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:37:02 +0100 Subject: [PATCH 24/40] Revert "Use requirements.test.txt to install Pytest. Install and run pre-commit hooks manually." This reverts commit ecceae26498486be4ad2415eb52d96a2eb122d0d. --- .github/workflows/build.yml | 7 +------ requirements.test.txt | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3519d37..5387f4b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -32,13 +32,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + python -m pip install pytest if [ -f requirements.test.txt ]; then pip install -r requirements.test.txt; fi - - name: Install pre-commit hooks - run: | - pre-commit install - - name: Run pre-commit hooks - run: | - pre-commit run --all-files - name: Test with doctest run: | python shapefile.py diff --git a/requirements.test.txt b/requirements.test.txt index 0faff64..27472ef 100644 --- a/requirements.test.txt +++ b/requirements.test.txt @@ -1,3 +1,2 @@ pytest setuptools -pre-commit \ No newline at end of file From 2616d2367385a6ac26476f5e8ed837fbf7da1ae1 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:39:43 +0100 Subject: [PATCH 25/40] Run pre-commit legacy action in a separate job --- .github/workflows/build.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5387f4b..dc025f1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,8 +10,14 @@ on: workflow_dispatch: jobs: - build: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - uses: pre-commit/action@v3.0.1 + build: strategy: fail-fast: false matrix: From fc5d7261b49a5cdc2e3243613d46906d7e0f61c0 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:44:17 +0100 Subject: [PATCH 26/40] Disable end-of-file-fixer. Autofix locally. --- .dmypy.json | 2 +- .github/ISSUE_TEMPLATE/bug.yml | 8 +- .github/ISSUE_TEMPLATE/newfeature.yml | 2 +- .github/ISSUE_TEMPLATE/question.yml | 2 +- .github/ISSUE_TEMPLATE/unexpected.yml | 8 +- .pre-commit-config.yaml | 1 - LICENSE.TXT | 2 +- README.md | 408 +++++++++++++------------- 8 files changed, 216 insertions(+), 217 deletions(-) diff --git a/.dmypy.json b/.dmypy.json index 4eab44d..9267165 100644 --- a/.dmypy.json +++ b/.dmypy.json @@ -1 +1 @@ -{"pid": 7632, "connection_name": "\\\\.\\pipe\\dmypy-_CCjf3E3.pipe"} +{"pid": 1124, "connection_name": "\\\\.\\pipe\\dmypy-9O9HsdYf.pipe"} diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index abd5383..aa7e47b 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -7,7 +7,7 @@ body: id: pyshp-version attributes: label: PyShp Version - description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. placeholder: ... validations: required: true @@ -15,7 +15,7 @@ body: id: python-version attributes: label: Python Version - description: Please input the version of the Python executable. + description: Please input the version of the Python executable. placeholder: ... validations: required: true @@ -23,7 +23,7 @@ body: id: your-code attributes: label: Your code - description: Please copy-paste the relevant parts of your code or script that triggered the error. + description: Please copy-paste the relevant parts of your code or script that triggered the error. placeholder: ... render: shell validations: @@ -41,7 +41,7 @@ body: id: notes attributes: label: Other notes - description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? placeholder: ... validations: required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/newfeature.yml b/.github/ISSUE_TEMPLATE/newfeature.yml index f35326d..afb043a 100644 --- a/.github/ISSUE_TEMPLATE/newfeature.yml +++ b/.github/ISSUE_TEMPLATE/newfeature.yml @@ -15,7 +15,7 @@ body: id: contribute attributes: label: Contributions - description: Would you be interested to contribute code that adds this functionality through a Pull Request? We gladly accept PRs - it's much faster and you'll be added a contributor. + description: Would you be interested to contribute code that adds this functionality through a Pull Request? We gladly accept PRs - it's much faster and you'll be added a contributor. options: - label: I am interested in implementing the described feature request and submit as a PR. required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index 76dfb68..d8c0cd0 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -6,7 +6,7 @@ body: - type: textarea id: question attributes: - label: What's your question? + label: What's your question? description: Please describe what you would like to know about PyShp, e.g. how to do something. placeholder: ... validations: diff --git a/.github/ISSUE_TEMPLATE/unexpected.yml b/.github/ISSUE_TEMPLATE/unexpected.yml index 07ed85c..bf0a577 100644 --- a/.github/ISSUE_TEMPLATE/unexpected.yml +++ b/.github/ISSUE_TEMPLATE/unexpected.yml @@ -7,7 +7,7 @@ body: id: pyshp-version attributes: label: PyShp Version - description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. placeholder: ... validations: required: true @@ -15,7 +15,7 @@ body: id: python-version attributes: label: Python Version - description: Please input the version of the Python executable. + description: Please input the version of the Python executable. placeholder: ... validations: required: true @@ -23,7 +23,7 @@ body: id: your-code attributes: label: Your code - description: Please copy-paste the relevant parts of your code or script that you tried to run. + description: Please copy-paste the relevant parts of your code or script that you tried to run. placeholder: ... render: shell validations: @@ -48,7 +48,7 @@ body: id: notes attributes: label: Other notes - description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? placeholder: ... validations: required: false \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ad9670..2c32b6e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,6 @@ repos: rev: v2.3.0 hooks: - id: check-yaml - - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.6.4 diff --git a/LICENSE.TXT b/LICENSE.TXT index 3ab02f3..eb66e40 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -1,5 +1,5 @@ The MIT License (MIT) - + Copyright � 2013 Joel Lawhead Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the �Software�), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/README.md b/README.md index f16f808..c679f0b 100644 --- a/README.md +++ b/README.md @@ -76,14 +76,14 @@ despite the numerous ways to store and exchange GIS data available today. Pyshp is compatible with Pythons >= 3.9. -This document provides examples for using PyShp to read and write shapefiles. However +This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), -and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). +and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). Currently the sample census blockgroup shapefile referenced in the examples is available on the GitHub project site at [https://github.com/GeospatialPython/pyshp](https://github.com/GeospatialPython/pyshp). These examples are straight-forward and you can also easily run them against your -own shapefiles with minimal modification. +own shapefiles with minimal modification. Important: If you are new to GIS you should read about map projections. Please visit: [https://github.com/GeospatialPython/pyshp/wiki/Map-Projections](https://github.com/GeospatialPython/pyshp/wiki/Map-Projections) @@ -105,7 +105,7 @@ part of your geospatial project. ### New Features: -- Added support for pathlib and path-like shapefile filepaths (@mwtoews). +- Added support for pathlib and path-like shapefile filepaths (@mwtoews). - Allow reading individual file extensions via filepaths. ### Improvements: @@ -119,7 +119,7 @@ part of your geospatial project. - More robust handling of corrupt shapefiles (fixes #235) - Fix errors when writing to individual file-handles (fixes #237) - Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) -- Fix test issues in environments without network access (@sebastic, @musicinmybrain). +- Fix test issues in environments without network access (@sebastic, @musicinmybrain). ## 2.2.0 @@ -132,7 +132,7 @@ part of your geospatial project. ### Improvements: -- More examples and restructuring of README. +- More examples and restructuring of README. - More informative Shape to geojson warnings (see #219). - Add shapefile.VERBOSE flag to control warnings verbosity (default True). - Shape object information when calling repr(). @@ -189,7 +189,7 @@ part of your geospatial project. ### New Features: -- Added back read/write support for unicode field names. +- Added back read/write support for unicode field names. - Improved Record representation - More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() @@ -201,38 +201,38 @@ part of your geospatial project. ## 2.0.0 -The newest version of PyShp, version 2.0 introduced some major new improvements. +The newest version of PyShp, version 2.0 introduced some major new improvements. A great thanks to all who have contributed code and raised issues, and for everyone's -patience and understanding during the transition period. -Some of the new changes are incompatible with previous versions. +patience and understanding during the transition period. +Some of the new changes are incompatible with previous versions. Users of the previous version 1.x should therefore take note of the following changes -(Note: Some contributor attributions may be missing): +(Note: Some contributor attributions may be missing): ### Major Changes: -- Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. -- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. +- Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. +- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. - Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. - New ways of inspecting shapefile metadata by printing. [@megies] - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] -- Add more support and documentation for MultiPatch 3D shapes. -- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. -- Better documentation of previously unclear aspects, such as field types. + - More convenient shape type name checking. [@megies] +- Add more support and documentation for MultiPatch 3D shapes. +- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. +- Better documentation of previously unclear aspects, such as field types. ### Important Fixes: - More reliable/robust: - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. + - Improved parsing of field value types, fixed errors and made more flexible. - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - Fix some geo interface errors, including checking polygon directions. - Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] @@ -275,7 +275,7 @@ OR >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") OR any of the other 5+ formats which are potentially part of a shapefile. The -library does not care about file extensions. You can also specify that you only +library does not care about file extensions. You can also specify that you only want to read some of the file extensions through the use of keyword arguments: @@ -283,7 +283,7 @@ want to read some of the file extensions through the use of keyword arguments: #### Reading Shapefiles from Zip Files -If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: +If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: >>> sf = shapefile.Reader("shapefiles/blockgroups.zip") @@ -295,7 +295,7 @@ If the zip file contains multiple shapefiles, just specify which shapefile to re #### Reading Shapefiles from URLs -Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: +Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: >>> # from a zipped shapefile on website @@ -337,8 +337,8 @@ objects are properly closed when done reading the data: #### Reading Shapefile Meta-Data Shapefiles have a number of attributes for inspecting the file contents. -A shapefile is a container for a specific type of geometry, and this can be checked using the -shapeType attribute. +A shapefile is a container for a specific type of geometry, and this can be checked using the +shapeType attribute. >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") @@ -364,7 +364,7 @@ the existing shape types are not sequential: - POLYGONM = 25 - MULTIPOINTM = 28 - MULTIPATCH = 31 - + Based on this we can see that our blockgroups shapefile contains Polygon type shapes. The shape types are also defined as constants in the shapefile module, so that we can compare types more intuitively: @@ -378,8 +378,8 @@ For convenience, you can also get the name of the shape type as a string: >>> sf.shapeTypeName == 'POLYGON' True - -Other pieces of meta-data that we can check include the number of features + +Other pieces of meta-data that we can check include the number of features and the bounding box area the shapefile covers: @@ -387,10 +387,10 @@ and the bounding box area the shapefile covers: 663 >>> sf.bbox [-122.515048, 37.652916, -122.327622, 37.863433] - + Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose -some information in the process, such as z- and m-values: +some information in the process, such as z- and m-values: >>> sf.__geo_interface__['type'] @@ -415,7 +415,7 @@ each shape record. >>> len(shapes) 663 - + To read a single shape by calling its index use the shape() method. The index is the shape's count from 0. So to read the 8th shape record you would use its index which is 7. @@ -457,12 +457,12 @@ shapeType Point do not have a bounding box 'bbox'. >>> shapes[3].shapeType 5 - * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. + * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. >>> shapes[3].shapeTypeName 'POLYGON' - + * `bbox`: If the shape type contains multiple points this tuple describes the lower left (x,y) coordinate and upper right corner coordinate creating a complete box around the points. If the shapeType is a @@ -496,7 +496,7 @@ shapeType Point do not have a bounding box 'bbox'. >>> ['%.3f' % coord for coord in shape] ['-122.471', '37.787'] -In most cases, however, if you need to do more than just type or bounds checking, you may want +In most cases, however, if you need to do more than just type or bounds checking, you may want to convert the geometry to the more human-readable [GeoJSON format](http://geojson.org), where lines and polygons are grouped for you: @@ -505,7 +505,7 @@ where lines and polygons are grouped for you: >>> geoj = s.__geo_interface__ >>> geoj["type"] 'MultiPolygon' - + The results from the shapes() method similarly supports converting to GeoJSON: @@ -514,12 +514,12 @@ The results from the shapes() method similarly supports converting to GeoJSON: Note: In some cases, if the conversion from shapefile geometry to GeoJSON encountered any problems or potential issues, a warning message will be displayed with information about the affected -geometry. To ignore or suppress these warnings, you can disable this behavior by setting the -module constant VERBOSE to False: +geometry. To ignore or suppress these warnings, you can disable this behavior by setting the +module constant VERBOSE to False: >>> shapefile.VERBOSE = False - + ### Reading Records @@ -534,12 +534,12 @@ You can call the "fields" attribute of the shapefile as a Python list. Each field is a Python list with the following information: * Field name: the name describing the data at this column index. - * Field type: the type of data at this column index. Types can be: + * Field type: the type of data at this column index. Types can be: * "C": Characters, text. * "N": Numbers, with or without decimals. * "F": Floats (same as "N"). - * "L": Logical, for boolean True/False values. - * "D": Dates. + * "L": Logical, for boolean True/False values. + * "D": Dates. * "M": Memo, has no meaning within a GIS and is part of the xbase spec instead. * Field length: the length of the data found at this column index. Older GIS software may truncate this length to 8 or 11 characters for "Character" @@ -571,11 +571,11 @@ attribute: ... ["UNITS3_9", "N", 8, 0], ["UNITS10_49", "N", 8, 0], ... ["UNITS50_UP", "N", 8, 0], ["MOBILEHOME", "N", 7, 0]] -The first field of a dbf file is always a 1-byte field called "DeletionFlag", -which indicates records that have been deleted but not removed. However, -since this flag is very rarely used, PyShp currently will return all records -regardless of their deletion flag, and the flag is also not included in the list of -record values. In other words, the DeletionFlag field has no real purpose, and +The first field of a dbf file is always a 1-byte field called "DeletionFlag", +which indicates records that have been deleted but not removed. However, +since this flag is very rarely used, PyShp currently will return all records +regardless of their deletion flag, and the flag is also not included in the list of +record values. In other words, the DeletionFlag field has no real purpose, and should in most cases be ignored. For instance, to get a list of all fieldnames: @@ -593,10 +593,10 @@ To read a single record call the record() method with the record's index: >>> rec = sf.record(3) - + Each record is a list-like Record object containing the values corresponding to each field in the field list (except the DeletionFlag). A record's values can be accessed by positional indexing or slicing. -For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id +For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id and the 1990 population count of that San Francisco blockgroup: @@ -604,7 +604,7 @@ and the 1990 population count of that San Francisco blockgroup: ['060750601001', 4715] For simpler access, the fields of a record can also accessed via the name of the field, -either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile +either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile can also be retrieved as: @@ -613,7 +613,7 @@ can also be retrieved as: >>> rec.BKG_KEY '060750601001' - + The record values can be easily integrated with other programs by converting it to a field-value dictionary: @@ -621,13 +621,13 @@ The record values can be easily integrated with other programs by converting it >>> sorted(dct.items()) [('AGE_18_29', 1467), ('AGE_30_49', 1681), ('AGE_50_64', 92), ('AGE_5_17', 848), ('AGE_65_UP', 30), ('AGE_UNDER5', 597), ('AMERI_ES', 6), ('AREA', 2.34385), ('ASIAN_PI', 452), ('BKG_KEY', '060750601001'), ('BLACK', 1007), ('DIVORCED', 149), ('FEMALES', 2095), ('FHH_CHILD', 16), ('HISPANIC', 416), ('HOUSEHOLDS', 1195), ('HSEHLD_1_F', 40), ('HSEHLD_1_M', 22), ('HSE_UNITS', 1258), ('MALES', 2620), ('MARHH_CHD', 79), ('MARHH_NO_C', 958), ('MARRIED', 2021), ('MEDIANRENT', 739), ('MEDIAN_VAL', 337500), ('MHH_CHILD', 0), ('MOBILEHOME', 0), ('NEVERMARRY', 703), ('OTHER', 288), ('OWNER_OCC', 66), ('POP1990', 4715), ('POP90_SQMI', 2011.6), ('RENTER_OCC', 3733), ('SEPARATED', 49), ('UNITS10_49', 49), ('UNITS2', 160), ('UNITS3_9', 672), ('UNITS50_UP', 0), ('UNITS_1ATT', 302), ('UNITS_1DET', 43), ('VACANT', 93), ('WHITE', 2962), ('WIDOWED', 37)] -If at a later point you need to check the record's index position in the original +If at a later point you need to check the record's index position in the original shapefile, you can do this through the "oid" attribute: >>> rec.oid 3 - + ### Reading Geometry and Records Simultaneously You may want to examine both the geometry and the attributes for a record at @@ -663,13 +663,13 @@ To get the 4th shape record from the blockgroups shapefile use the third index: >>> shapeRec = sf.shapeRecord(3) >>> shapeRec.record[1:3] ['060750601001', 4715] - + Each individual shape record also supports the _\_geo_interface\_\_ to convert it to a GeoJSON feature: >>> shapeRec.__geo_interface__['type'] 'Feature' - + ## Writing Shapefiles @@ -697,7 +697,7 @@ the file path and name to save to: >>> w = shapefile.Writer('shapefiles/test/testfile') >>> w.field('field1', 'C') - + File extensions are optional when reading or writing shapefiles. If you specify them PyShp ignores them anyway. When you save files you can specify a base file name that is used for all three file types. Or you can specify a name for @@ -706,9 +706,9 @@ one or more file types: >>> w = shapefile.Writer(dbf='shapefiles/test/onlydbf.dbf') >>> w.field('field1', 'C') - + In that case, any file types not assigned will not -save and only file types with file names will be saved. +save and only file types with file names will be saved. #### Writing Shapefiles to File-Like Objects @@ -738,14 +738,14 @@ write to them: >>> r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) >>> len(r) 1 - - + + #### Writing Shapefiles Using the Context Manager The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. -In case of a crash and to make the code more readable, it is nevertheless recommended -you do this manually by calling the "close()" method: +In case of a crash and to make the code more readable, it is nevertheless recommended +you do this manually by calling the "close()" method: >>> w.close() @@ -757,15 +757,15 @@ objects are properly closed and final headers written once you exit the with-cla >>> with shapefile.Writer("shapefiles/test/contextwriter") as w: ... w.field('field1', 'C') ... pass - + #### Setting the Shape Type The shape type defines the type of geometry contained in the shapefile. All of the shapes must match the shape type setting. -There are three ways to set the shape type: - * Set it when creating the class instance. - * Set it by assigning a value to an existing class instance. +There are three ways to set the shape type: + * Set it when creating the class instance. + * Set it by assigning a value to an existing class instance. * Set it automatically to the type of the first non-null shape by saving the shapefile. To manually set the shape type for a Writer object when creating the Writer: @@ -784,14 +784,14 @@ OR you can set it after the Writer is created: >>> w.shapeType 1 - + ### Adding Records -Before you can add records you must first create the fields that define what types of -values will go into each attribute. +Before you can add records you must first create the fields that define what types of +values will go into each attribute. -There are several different field types, all of which support storing None values as NULL. +There are several different field types, all of which support storing None values as NULL. Text fields are created using the 'C' type, and the third 'size' argument can be customized to the expected length of text values to save space: @@ -804,12 +804,12 @@ length of text values to save space: >>> w.null() >>> w.record('Hello', 'World', 'World'*50) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == ['Hello', 'World', 'World'*50] -Date fields are created using the 'D' type, and can be created using either -date objects, lists, or a YYYYMMDD formatted string. +Date fields are created using the 'D' type, and can be created using either +date objects, lists, or a YYYYMMDD formatted string. Field length or decimal have no impact on this type: @@ -825,18 +825,18 @@ Field length or decimal have no impact on this type: >>> w.record('19980130') >>> w.record(None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [date(1898,1,30)] >>> assert r.record(1) == [date(1998,1,30)] >>> assert r.record(2) == [date(1998,1,30)] >>> assert r.record(3) == [None] -Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). -By default the fourth decimal argument is set to zero, essentially creating an integer field. -To store floats you must set the decimal argument to the precision of your choice. -To store very large numbers you must increase the field length size to the total number of digits -(including comma and minus). +Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). +By default the fourth decimal argument is set to zero, essentially creating an integer field. +To store floats you must set the decimal argument to the precision of your choice. +To store very large numbers you must increase the field length size to the total number of digits +(including comma and minus). >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -852,15 +852,15 @@ To store very large numbers you must increase the field length size to the total >>> w.record(INT=nr, LOWPREC=nr, MEDPREC=nr, HIGHPREC=-3.2302e-25, FTYPE=nr, LARGENR=int(nr)*10**100) >>> w.record(None, None, None, None, None, None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [1, 1.32, 1.3217328, -3.2302e-25, 1.3217328, 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000] >>> assert r.record(1) == [None, None, None, None, None, None] - -Finally, we can create boolean fields by setting the type to 'L'. -This field can take True or False values, or 1 (True) or 0 (False). -None is interpreted as missing. + +Finally, we can create boolean fields by setting the type to 'L'. +This field can take True or False values, or 1 (True) or 0 (False). +None is interpreted as missing. >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -878,7 +878,7 @@ None is interpreted as missing. >>> w.record(None) >>> w.record("Nonsense") >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> r.record(0) Record #0: [True] @@ -892,7 +892,7 @@ None is interpreted as missing. Record #4: [None] >>> r.record(5) Record #5: [None] - + You can also add attributes using keyword arguments where the keys are field names. @@ -909,12 +909,12 @@ You can also add attributes using keyword arguments where the keys are field nam Geometry is added using one of several convenience methods. The "null" method is used for null shapes, "point" is used for point shapes, "multipoint" is used for multipoint shapes, "line" for lines, -"poly" for polygons. +"poly" for polygons. **Adding a Null shape** -A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. -Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. +A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. +Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. >>> w = shapefile.Writer('shapefiles/test/null') @@ -928,59 +928,59 @@ Because Null shape types (shape type 0) have no geometry the "null" method is ca **Adding a Point shape** Point shapes are added using the "point" method. A point is specified by an x and -y value. +y value. >>> w = shapefile.Writer('shapefiles/test/point') >>> w.field('name', 'C') - - >>> w.point(122, 37) + + >>> w.point(122, 37) >>> w.record('point1') - + >>> w.close() **Adding a MultiPoint shape** -If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. -These are specified as a list of xy point coordinates. +If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. +These are specified as a list of xy point coordinates. >>> w = shapefile.Writer('shapefiles/test/multipoint') >>> w.field('name', 'C') - - >>> w.multipoint([[122,37], [124,32]]) + + >>> w.multipoint([[122,37], [124,32]]) >>> w.record('multipoint1') - + >>> w.close() - + **Adding a LineString shape** -For LineString shapefiles, each shape is given as a list of one or more linear features. -Each of the linear features must have at least two points. - - +For LineString shapefiles, each shape is given as a list of one or more linear features. +Each of the linear features must have at least two points. + + >>> w = shapefile.Writer('shapefiles/test/line') >>> w.field('name', 'C') - + >>> w.line([ ... [[1,5],[5,5],[5,1],[3,3],[1,1]], # line 1 ... [[3,2],[2,6]] # line 2 ... ]) - + >>> w.record('linestring1') - + >>> w.close() - + **Adding a Polygon shape** Similarly to LineString, Polygon shapes consist of multiple polygons, and must be given as a list of polygons. -The main difference is that polygons must have at least 4 points and the last point must be the same as the first. +The main difference is that polygons must have at least 4 points and the last point must be the same as the first. It's also okay if you forget to repeat the first point at the end; PyShp automatically checks and closes the polygons if you don't. It's important to note that for Polygon shapefiles, your polygon coordinates must be ordered in a clockwise direction. If any of the polygons have holes, then the hole polygon coordinates must be ordered in a counterclockwise direction. -The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. +The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. >>> w = shapefile.Writer('shapefiles/test/polygon') @@ -992,13 +992,13 @@ The direction of your polygons determines how shapefile readers will distinguish ... [[15,2], [17,6], [22,7]] # poly 2 ... ]) >>> w.record('polygon1') - + >>> w.close() - + **Adding from an existing Shape object** Finally, geometry can be added by passing an existing "Shape" object to the "shape" method. -You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. +You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. This can be particularly useful for copying from one file to another: @@ -1011,14 +1011,14 @@ This can be particularly useful for copying from one file to another: >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape) - + >>> # or GeoJSON dicts >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape.__geo_interface__) - - >>> w.close() - + + >>> w.close() + ### Geometry and Record Balancing @@ -1027,17 +1027,17 @@ number of records equals the number of shapes to create a valid shapefile. You must take care to add records and shapes in the same order so that the record data lines up with the geometry data. For example: - + >>> w = shapefile.Writer('shapefiles/test/balancing', shapeType=shapefile.POINT) >>> w.field("field1", "C") >>> w.field("field2", "C") - + >>> w.record("row", "one") >>> w.point(1, 1) - + >>> w.record("row", "two") >>> w.point(2, 2) - + To help prevent accidental misalignment PyShp has an "auto balance" feature to make sure when you add either a shape or a record the two sides of the equation line up. This way if you forget to update an entry the @@ -1050,7 +1050,7 @@ the attribute autoBalance to 1 or True: >>> w.record("row", "three") >>> w.record("row", "four") >>> w.point(4, 4) - + >>> w.recNum == w.shpNum True @@ -1059,7 +1059,7 @@ to ensure the other side is up to date. When balancing is used null shapes are created on the geometry side or records with a value of "NULL" for each field is created on the attribute side. This gives you flexibility in how you build the shapefile. -You can create all of the shapes and then create all of the records or vice versa. +You can create all of the shapes and then create all of the records or vice versa. >>> w.autoBalance = 0 @@ -1069,16 +1069,16 @@ You can create all of the shapes and then create all of the records or vice vers >>> w.point(5, 5) >>> w.point(6, 6) >>> w.balance() - + >>> w.recNum == w.shpNum True If you do not use the autoBalance() or balance() method and forget to manually balance the geometry and attributes the shapefile will be viewed as corrupt by most shapefile software. - + ### Writing .prj files -A .prj file, or projection file, is a simple text file that stores a shapefile's map projection and coordinate reference system to help mapping software properly locate the geometry on a map. If you don't have one, you may get confusing errors when you try and use the shapefile you created. The GIS software may complain that it doesn't know the shapefile's projection and refuse to accept it, it may assume the shapefile is the same projection as the rest of your GIS project and put it in the wrong place, or it might assume the coordinates are an offset in meters from latitude and longitude 0,0 which will put your data in the middle of the ocean near Africa. The text in the .prj file is a [Well-Known-Text (WKT) projection string](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems). Projection strings can be quite long so they are often referenced using numeric codes call EPSG codes. The .prj file must have the same base name as your shapefile. So for example if you have a shapefile named "myPoints.shp", the .prj file must be named "myPoints.prj". +A .prj file, or projection file, is a simple text file that stores a shapefile's map projection and coordinate reference system to help mapping software properly locate the geometry on a map. If you don't have one, you may get confusing errors when you try and use the shapefile you created. The GIS software may complain that it doesn't know the shapefile's projection and refuse to accept it, it may assume the shapefile is the same projection as the rest of your GIS project and put it in the wrong place, or it might assume the coordinates are an offset in meters from latitude and longitude 0,0 which will put your data in the middle of the ocean near Africa. The text in the .prj file is a [Well-Known-Text (WKT) projection string](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems). Projection strings can be quite long so they are often referenced using numeric codes call EPSG codes. The .prj file must have the same base name as your shapefile. So for example if you have a shapefile named "myPoints.shp", the .prj file must be named "myPoints.prj". If you're using the same projection over and over, the following is a simple way to create the .prj file assuming your base filename is stored in a variable called "filename": @@ -1092,17 +1092,17 @@ If you're using the same projection over and over, the following is a simple way prj.write(wkt) ``` -If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. +If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. # Advanced Use ## Common Errors and Fixes -Below we list some commonly encountered errors and ways to fix them. +Below we list some commonly encountered errors and ways to fix them. ### Warnings and Logging -By default, PyShp chooses to be transparent and provide the user with logging information and warnings about non-critical issues when reading or writing shapefiles. This behavior is controlled by the module constant `VERBOSE` (which defaults to True). If you would rather suppress this information, you can simply set this to False: +By default, PyShp chooses to be transparent and provide the user with logging information and warnings about non-critical issues when reading or writing shapefiles. This behavior is controlled by the module constant `VERBOSE` (which defaults to True). If you would rather suppress this information, you can simply set this to False: >>> shapefile.VERBOSE = False @@ -1115,21 +1115,21 @@ All logging happens under the namespace `shapefile`. So another way to suppress ### Shapefile Encoding Errors -PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. -Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't have to specify the encoding. -If you encounter an encoding error when reading a shapefile, this means the shapefile was likely written in a non-utf8 encoding. +PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. +Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't have to specify the encoding. +If you encounter an encoding error when reading a shapefile, this means the shapefile was likely written in a non-utf8 encoding. For instance, when working with English language shapefiles, a common reason for encoding errors is that the shapefile was written in Latin-1 encoding. -For reading shapefiles in any non-utf8 encoding, such as Latin-1, just -supply the encoding option when creating the Reader class. +For reading shapefiles in any non-utf8 encoding, such as Latin-1, just +supply the encoding option when creating the Reader class. >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") >>> r.record(0) == [2, u'Ñandú'] True - -Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such -as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in -should give you the same unicode string you started with. + +Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such +as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in +should give you the same unicode string you started with. >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") @@ -1137,15 +1137,15 @@ should give you the same unicode string you started with. >>> w.record(*r.record(0)) >>> w.null() >>> w.close() - + >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") >>> r.record(0) == [2, u'Ñandú'] True - + If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This -applies to both reading and writing. +applies to both reading and writing. >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") @@ -1156,8 +1156,8 @@ applies to both reading and writing. ## Reading Large Shapefiles -Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions -of records and complex geometries. +Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions +of records and complex geometries. ### Iterating through a shapefile @@ -1167,22 +1167,22 @@ As an example, let's load this Natural Earth shapefile of more than 4000 global >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/10m_cultural/ne_10m_admin_1_states_provinces?raw=true") When first creating the Reader class, the library only reads the header information -and leaves the rest of the file contents alone. Once you call the records() and shapes() -methods however, it will attempt to read the entire file into memory at once. +and leaves the rest of the file contents alone. Once you call the records() and shapes() +methods however, it will attempt to read the entire file into memory at once. For very large files this can result in MemoryError. So when working with large files it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() -methods instead. These iterate through the file contents one at a time, enabling you to loop -through them while keeping memory usage at a minimum. +methods instead. These iterate through the file contents one at a time, enabling you to loop +through them while keeping memory usage at a minimum. >>> for shape in sf.iterShapes(): ... # do something here ... pass - + >>> for rec in sf.iterRecords(): ... # do something here ... pass - + >>> for shapeRec in sf.iterShapeRecords(): ... # do something here ... pass @@ -1202,7 +1202,7 @@ By default when reading the attribute records of a shapefile, pyshp unpacks and ... pass >>> rec Record #4595: ['Birgu', 'Malta'] - + ### Attribute filtering In many cases, we aren't interested in all entries of a shapefile, but rather only want to retrieve a small subset of records by filtering on some attribute. To avoid wasting time reading records and shapes that we don't need, we can start by iterating only the records and fields of interest, check if the record matches some condition as a way to filter the data, and finally load the full record and shape geometry for those that meet the condition: @@ -1222,7 +1222,7 @@ In many cases, we aren't interested in all entries of a shapefile, but rather on 'Maekel' 'Anseba' -Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. +Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. ### Spatial filtering @@ -1253,23 +1253,23 @@ Another common use-case is that we only want to read those records that are loca Record #2037: ['Al Hudaydah', 'Yemen'] Record #3741: ['Anseba', 'Eritrea'] -This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. +This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. ## Writing large shapefiles -Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory -usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately -writing each geometry and record to disk the moment they -are added using shape() or record(). Once the writer is closed, exited, or garbage -collected, the final header information is calculated and written to the beginning of -the file. +Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory +usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately +writing each geometry and record to disk the moment they +are added using shape() or record(). Once the writer is closed, exited, or garbage +collected, the final header information is calculated and written to the beginning of +the file. ### Merging multiple shapefiles -This means that it's possible to merge hundreds or thousands of shapefiles, as -long as you iterate through the source files to avoid loading everything into +This means that it's possible to merge hundreds or thousands of shapefiles, as +long as you iterate through the source files to avoid loading everything into memory. The following example copies the contents of a shapefile to a new file 10 times: >>> # create writer @@ -1295,12 +1295,12 @@ memory. The following example copies the contents of a shapefile to a new file 1 >>> # close the writer >>> w.close() -In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. +In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. ### Editing shapefiles -If you need to edit a shapefile you would have to read the -file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: +If you need to edit a shapefile you would have to read the +file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: >>> # create writer >>> w = shapefile.Writer('shapefiles/test/edit') @@ -1325,7 +1325,7 @@ file one record at a time, modify or filter the contents, and write it back out. ## 3D and Other Geometry Types Most shapefiles store conventional 2D points, lines, or polygons. But the shapefile format is also capable -of storing various other types of geometries as well, including complex 3D surfaces and objects. +of storing various other types of geometries as well, including complex 3D surfaces and objects. ### Shapefiles with measurement (M) values @@ -1338,107 +1338,107 @@ or by simply omitting the third M-coordinate. >>> w = shapefile.Writer('shapefiles/test/linem') >>> w.field('name', 'C') - + >>> w.linem([ ... [[1,5,0],[5,5],[5,1,3],[3,3,None],[1,1,0]], # line with one omitted and one missing M-value ... [[3,2],[2,6]] # line without any M-values ... ]) - + >>> w.record('linem1') - + >>> w.close() - + Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') - + >>> r.mbox # the lower and upper bound of M-values in the shapefile [0.0, 3.0] - + >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] - + ### Shapefiles with elevation (Z) values -Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. -Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". +Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. +Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". The Z-values are specified by adding a third Z value to each XY coordinate. Z-values do not support the concept of missing data, but if you omit the third Z-coordinate it will default to 0. Note that Z-type shapes also support measurement (M) values added -as a fourth M-coordinate. This too is optional. - - +as a fourth M-coordinate. This too is optional. + + >>> w = shapefile.Writer('shapefiles/test/linez') >>> w.field('name', 'C') - + >>> w.linez([ ... [[1,5,18],[5,5,20],[5,1,22],[3,3],[1,1]], # line with some omitted Z-values ... [[3,2],[2,6]], # line without any Z-values ... [[3,2,15,0],[2,6,13,3],[1,9,14,2]] # line with both Z- and M-values ... ]) - + >>> w.record('linez1') - + >>> w.close() - + To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') - + >>> r.zbox # the lower and upper bound of Z-values in the shapefile [0.0, 22.0] - + >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] ### 3D MultiPatch Shapefiles -Multipatch shapes are useful for storing composite 3-Dimensional objects. +Multipatch shapes are useful for storing composite 3-Dimensional objects. A MultiPatch shape represents a 3D object made up of one or more surface parts. Each surface in "parts" is defined by a list of XYZM values (Z and M values optional), and its corresponding type is -given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one +given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one of the following module constants: TRIANGLE_STRIP, TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. -For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent -its roof: +For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent +its roof: >>> from shapefile import TRIANGLE_STRIP, TRIANGLE_FAN - + >>> w = shapefile.Writer('shapefiles/test/multipatch') >>> w.field('name', 'C') - + >>> w.multipatch([ ... [[0,0,0],[0,0,3],[5,0,0],[5,0,3],[5,5,0],[5,5,3],[0,5,0],[0,5,3],[0,0,0],[0,0,3]], # TRIANGLE_STRIP for house walls ... [[2.5,2.5,5],[0,0,3],[5,0,3],[5,5,3],[0,5,3],[0,0,3]], # TRIANGLE_FAN for pointed house roof ... ], ... partTypes=[TRIANGLE_STRIP, TRIANGLE_FAN]) # one type for each part - + >>> w.record('house1') - + >>> w.close() - + For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this -ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). +ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). + - # Testing -The testing framework is pytest, and the tests are located in test_shapefile.py. -This includes an extensive set of unit tests of the various pyshp features, -and tests against various input data. Some of the tests that require -internet connectivity will be skipped in offline testing environments. -In the same folder as README.md and shapefile.py, from the command line run +The testing framework is pytest, and the tests are located in test_shapefile.py. +This includes an extensive set of unit tests of the various pyshp features, +and tests against various input data. Some of the tests that require +internet connectivity will be skipped in offline testing environments. +In the same folder as README.md and shapefile.py, from the command line run ``` $ python -m pytest -``` +``` -Additionally, all the code and examples located in this file, README.md, +Additionally, all the code and examples located in this file, README.md, is tested and verified with the builtin doctest framework. A special routine for invoking the doctest is run when calling directly on shapefile.py. -In the same folder as README.md and shapefile.py, from the command line run +In the same folder as README.md and shapefile.py, from the command line run ``` $ python shapefile.py -``` +``` Linux/Mac and similar platforms will need to run `$ dos2unix README.md` in order to correct line endings in README.md. From f9b9e2786b036890dd68f5d022b9b48a66d72c2f Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:47:32 +0100 Subject: [PATCH 27/40] Start mypy daemon and run in same step for CI --- .pre-commit-config.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2c32b6e..f16b8e9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,9 @@ repos: hooks: - id: mypy name: mypy - entry: dmypy + entry: | + dmypy start + dmypy files: \.py$ language: python require_serial: true From a544ffc4ed0c0a5056108c481e17c86c07e8029d Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:53:12 +0100 Subject: [PATCH 28/40] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f16b8e9..94eb6cd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: name: mypy entry: | dmypy start - dmypy + dmypy check files: \.py$ language: python require_serial: true From e403c016940f785fe2c215a19732a38fdcf9b721 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:59:01 +0100 Subject: [PATCH 29/40] Just run dmypy --- .pre-commit-config.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 94eb6cd..2c32b6e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,9 +15,7 @@ repos: hooks: - id: mypy name: mypy - entry: | - dmypy start - dmypy check + entry: dmypy files: \.py$ language: python require_serial: true From da6efc72c7593a4be59a1d9b9101656f1fbe3886 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 11:02:18 +0100 Subject: [PATCH 30/40] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 03827eb..cc48439 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ dist/ *.egg-info/ *.py[cod] .vscode +.dmpy.json From e05c719ca6e0dd636da88e68ca01449bf42dae3c Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 11:05:35 +0100 Subject: [PATCH 31/40] Delete .dmypy.json --- .dmypy.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .dmypy.json diff --git a/.dmypy.json b/.dmypy.json deleted file mode 100644 index 9267165..0000000 --- a/.dmypy.json +++ /dev/null @@ -1 +0,0 @@ -{"pid": 1124, "connection_name": "\\\\.\\pipe\\dmypy-9O9HsdYf.pipe"} From 97d3954faac758fa7a44fee75320ba5c805aca3e Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 11:05:46 +0100 Subject: [PATCH 32/40] Delete .dmypy.json --- .dmypy.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .dmypy.json diff --git a/.dmypy.json b/.dmypy.json deleted file mode 100644 index 9267165..0000000 --- a/.dmypy.json +++ /dev/null @@ -1 +0,0 @@ -{"pid": 1124, "connection_name": "\\\\.\\pipe\\dmypy-9O9HsdYf.pipe"} From 8522bafa66766edb9870577b985170a9e860dce6 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 11:24:48 +0100 Subject: [PATCH 33/40] Describe changes on this branch and set up of pre-commit --- README.md | 15 +++++++++++++++ changelog.txt | 8 ++++++++ shapefile.py | 2 +- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c679f0b..b828e28 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,15 @@ part of your geospatial project. # Version Changes +## 3.0.0-alpha +### Breaking changes +- Dropped Pythons <= 3.8 inclding Python 2 +### Dev tooling +- Add pre-commit framework, and run in CI +- Runs mypy (via dmypy) in CI +- Runs Ruff and Ruff-format in CI + + ## 2.3.1 ### Bug fixes: @@ -1419,7 +1428,11 @@ its roof: For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). +# Development environment +Pre-commit is used in CI to run mypy, Ruff and Ruff-format hooks. To run these hooks locally +as part of your Git workflow, install pre-commit (via pip or globally via pipx), clone PyShp, +and run `pre-commit install` in the repo's root dir. # Testing @@ -1459,10 +1472,12 @@ fiveham geospatialpython Hannes Ignacio Martinez Vazquez +James Parrott Jason Moujaes Jonty Wareing Karim Bahgat karanrn +Kurt Schwehr Kyle Kelley Louis Tiao Marcin Cuprjak diff --git a/changelog.txt b/changelog.txt index 533d704..b096672 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,12 @@ +VERSION 3.0.0-alpha + +2024-09-15 + * Dropped Pythons <= 3.8 inclding Python 2 + * Add pre-commit framework, and run in CI + * Run mypy (via dmypy) in CI + * Run Ruff and Ruff-format in CI + VERSION 2.3.1 2022-07-28 diff --git a/shapefile.py b/shapefile.py index ddd4a2e..b91f24a 100644 --- a/shapefile.py +++ b/shapefile.py @@ -6,7 +6,7 @@ Compatible with Python versions >= 3.9 """ -__version__ = "2.3.1" +__version__ = "3.0.0-alpha" import array from datetime import date From 708d669802382799110251524287e407521e0371 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 11:25:26 +0100 Subject: [PATCH 34/40] Update .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cc48439..f6d6332 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,4 @@ dist/ *.egg-info/ *.py[cod] .vscode -.dmpy.json +.dmypy.json From c6ef589d3f0beece94b6827abc6db9433f96e7b2 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 10:33:21 +0100 Subject: [PATCH 35/40] Added isort to pre-commit config , and ran it --- .pre-commit-config.yaml | 37 +++++++++++++++++++++---------------- shapefile.py | 12 +++++------- test_shapefile.py | 1 - 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2c32b6e..654613c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,23 +1,28 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 - hooks: - - id: check-yaml - - id: trailing-whitespace +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: trailing-whitespace +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.6.4 hooks: - id: ruff args: [ --fix ] - id: ruff-format -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.11.2 - hooks: - - id: mypy - name: mypy - entry: dmypy - files: \.py$ - language: python - require_serial: true - args: ["run", "--", "--implicit-reexport", "--warn-unused-ignores", "--cache-fine-grained", "--ignore-missing-imports"] - additional_dependencies: [tokenize-rt==6.0.0] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.11.2 + hooks: + - id: mypy + name: mypy + entry: dmypy + files: \.py$ + language: python + require_serial: true + args: ["run", "--", "--implicit-reexport", "--warn-unused-ignores", "--cache-fine-grained", "--ignore-missing-imports"] + additional_dependencies: [tokenize-rt==6.0.0] diff --git a/shapefile.py b/shapefile.py index b91f24a..b1c1360 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,22 +9,20 @@ __version__ = "3.0.0-alpha" import array -from datetime import date import io import logging import os -from struct import pack, unpack, calcsize, error, Struct import sys import tempfile import time import zipfile - +from collections.abc import Sequence +from datetime import date +from struct import Struct, calcsize, error, pack, unpack +from typing import Any, ByteString, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse -from urllib.request import urlopen, Request - -from typing import Any, Union, ByteString -from collections.abc import Sequence +from urllib.request import Request, urlopen # Create named logger logger = logging.getLogger(__name__) diff --git a/test_shapefile.py b/test_shapefile.py index a90d4d1..5245119 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -14,7 +14,6 @@ # our imports import shapefile - # define various test shape tuples of (type, points, parts indexes, and expected geo interface output) geo_interface_tests = [ ( From 376c9e076c0a362104888f31f3602903e1f4966a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:13:36 +0100 Subject: [PATCH 36/40] Update workflow files, to test on Windows and MacOS etc. --- .github/actions/test/action.yml | 33 +++++++++++++++ .github/workflows/deploy.yml | 28 ++++++++++++- .github/workflows/run_tests_and_hooks.yml | 51 +++++++++++++++++++++++ 3 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 .github/actions/test/action.yml create mode 100644 .github/workflows/run_tests_and_hooks.yml diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml new file mode 100644 index 0000000..86ec93f --- /dev/null +++ b/.github/actions/test/action.yml @@ -0,0 +1,33 @@ +name: + Test + +description: + Run pytest, and run the doctest runner (shapefile.py as a script). + +runs: + using: "composite" + steps: + # The Repo is required to already be checked out, e.g. by the calling workflow + + # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path + + - name: Doctests + shell: bash + run: python shapefile.py + + - name: Install test dependencies. + shell: bash + run: | + python -m pip install --upgrade pip + pip install -r requirements.test.txt + + - name: Pytest + shell: bash + run: | + pytest + + - name: Show versions for logs. + shell: bash + run: | + python --version + python -m pytest --version \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 70db5f7..9a1fa30 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,23 +13,47 @@ on: types: [published] jobs: - deploy: + test: + # In general, tests should be run after building a distribution, to test that distribution. + # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) + # then this would only test the packaging process, not so much the code as there are + # no binaries. runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' + + - name: Run tests and hooks + uses: ./.github/workflows/run_tests_and_hooks.yml + + deploy: + # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. + needs: test + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + + - name: Install dependencies run: | python -m pip install --upgrade pip pip install build - name: Build package run: python -m build + - name: Publish package + if: github.repository == 'GeospatialPython/pyshp' uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml new file mode 100644 index 0000000..14c4f81 --- /dev/null +++ b/.github/workflows/run_tests_and_hooks.yml @@ -0,0 +1,51 @@ +# This workflow will run the pre-commit hooks (including linters), and the tests with a variety of Python versions + +name: Run pre-commit hooks and tests + +on: + push: + pull_request: + branches: [ master ] + workflow_call: + workflow_dispatch: + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - uses: pre-commit/action@v3.0.1 + + + run_tests: + strategy: + fail-fast: false + matrix: + python-version: [ + "3.9", + "3.10", + "3.11", + "3.12", + "3.13.0-rc.2", + ] + os: [ + "macos-latest", + "ubuntu-latest", + "ubuntu-24.04", + "windows-latest", + ] + include: + - os: ubuntu-24.04 + python-version: "3.14.0-alpha.0" + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + uses: ./.github/actions/test \ No newline at end of file From 3eabda9a759f9edc2162046f785b948748e3ad09 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:14:36 +0100 Subject: [PATCH 37/40] Test Python 14 on Ubuntu 22.04 too --- .github/workflows/run_tests_and_hooks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index 14c4f81..7ad3d75 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -38,6 +38,8 @@ jobs: include: - os: ubuntu-24.04 python-version: "3.14.0-alpha.0" + - os: ubuntu-22.04 + python-version: "3.14.0-alpha.0" runs-on: ${{ matrix.os }} steps: From 94191e28f4e933be6660da1ffe0075f21593da80 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:33:03 +0100 Subject: [PATCH 38/40] Update shapefile.py --- shapefile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/shapefile.py b/shapefile.py index b1c1360..295020d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -19,11 +19,12 @@ from collections.abc import Sequence from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Any, ByteString, Union +from typing import Any, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen + # Create named logger logger = logging.getLogger(__name__) @@ -106,7 +107,7 @@ def b(v: Any, encoding="utf-8", encodingErrors="strict") -> bytes: def u( - v: Union[bytes, str, None, int, ByteString], + v: Union[bytes, str, None, int], encoding="utf-8", encodingErrors="strict", ) -> str: From 2778fc0de8e755109f5e6fd5a912d6dd08d01f44 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:35:23 +0100 Subject: [PATCH 39/40] Delete build.yml (renamed) and run isort --- .github/workflows/build.yml | 48 ------------------------------------- shapefile.py | 1 - 2 files changed, 49 deletions(-) delete mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index dc025f1..0000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,48 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: -# https://docs.github.com/en/actions/use-cases-and-examples/building-and-testing/building-and-testing-python - -name: build - -on: - push: - pull_request: - workflow_dispatch: - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - uses: pre-commit/action@v3.0.1 - - build: - strategy: - fail-fast: false - matrix: - python-version: [ - "3.9", - "3.10", - "3.11", - "3.12", - "3.13.0a2", - ] - - runs-on: ubuntu-latest - container: - image: python:${{ matrix.python-version }}-slim - - steps: - - uses: actions/checkout@v4 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install pytest - if [ -f requirements.test.txt ]; then pip install -r requirements.test.txt; fi - - name: Test with doctest - run: | - python shapefile.py - - name: Test with pytest - run: | - pytest diff --git a/shapefile.py b/shapefile.py index 295020d..e9212c8 100644 --- a/shapefile.py +++ b/shapefile.py @@ -24,7 +24,6 @@ from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen - # Create named logger logger = logging.getLogger(__name__) From 30f18e3a765c85ade1ff01fb03de0d06f0b69b77 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:01:02 +0100 Subject: [PATCH 40/40] Repeat updates to main (Py2/3) branch. --- ...ooks.yml => run_tests_hooks_and_tools.yml} | 12 ++++++++++ README.md | 2 +- pyproject.toml | 22 +++++++++++++++++++ requirements.test.txt | 2 +- test_shapefile.py | 18 +++++++-------- 5 files changed, 45 insertions(+), 11 deletions(-) rename .github/workflows/{run_tests_and_hooks.yml => run_tests_hooks_and_tools.yml} (73%) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_hooks_and_tools.yml similarity index 73% rename from .github/workflows/run_tests_and_hooks.yml rename to .github/workflows/run_tests_hooks_and_tools.yml index 7ad3d75..9a71aac 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -17,6 +17,18 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 + pylint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: install Pylint and plugin + run: | + python -m pip install --upgrade pip + pip install pytest pylint pylint-per-file-ignores + - name: run Pylint for errors and warnings only, on test_shapefile.py + run: | + pylint --disable=R,C test_shapefile.py run_tests: strategy: diff --git a/README.md b/README.md index b828e28..8ef4994 100644 --- a/README.md +++ b/README.md @@ -1453,7 +1453,7 @@ In the same folder as README.md and shapefile.py, from the command line run $ python shapefile.py ``` -Linux/Mac and similar platforms will need to run `$ dos2unix README.md` in order +Linux/Mac and similar platforms may need to run `$ dos2unix README.md` in order to correct line endings in README.md. # Contributors diff --git a/pyproject.toml b/pyproject.toml index e6c43f4..9e09da1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,3 +64,25 @@ skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. line-ending = "auto" + + + +[tool.pylint.MASTER] +load-plugins=[ + "pylint_per_file_ignores", +] + +[tool.pylint.'MESSAGES CONTROL'] +# Silence warning: shapefile.py:2076:20: W0212: Access to a protected +# member _from_geojson of a client class (protected-access) +# +# Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: +# W0212: Access to a protected member _offsets of a +# client class (protected-access) +# +# Toml multi-line string used instead of array due to: +# https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 +per-file-ignores = """ + shapefile.py:W0212 + test_shapefile.py:W0212 +""" \ No newline at end of file diff --git a/requirements.test.txt b/requirements.test.txt index 27472ef..1114173 100644 --- a/requirements.test.txt +++ b/requirements.test.txt @@ -1,2 +1,2 @@ -pytest +pytest >= 3.7 setuptools diff --git a/test_shapefile.py b/test_shapefile.py index 5245119..f560479 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -436,7 +436,7 @@ def test_empty_shape_geo_interface(): """ shape = shapefile.Shape() with pytest.raises(Exception): - shape.__geo_interface__ + getattr(shape, "__geo_interface__") @pytest.mark.parametrize("typ,points,parts,expected", geo_interface_tests) @@ -486,14 +486,14 @@ def test_reader_url(): # test with extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true" with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test without extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True @@ -507,7 +507,7 @@ def test_reader_url(): # test reading zipfile from url url = "https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip" with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True @@ -519,7 +519,7 @@ def test_reader_zip(): """ # test reading zipfile only with shapefile.Reader("shapefiles/blockgroups.zip") as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True @@ -533,7 +533,7 @@ def test_reader_zip(): with shapefile.Reader( "shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp" ) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True @@ -542,7 +542,7 @@ def test_reader_zip(): with shapefile.Reader( "shapefiles/blockgroups_multishapefile.zip/blockgroups2" ) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True @@ -1032,7 +1032,7 @@ def test_reader_offsets(): # shx offsets should not be read during loading assert not sf._offsets # reading a shape index should trigger reading offsets from shx file - __shape = sf.shape(3) + sf.shape(3) assert len(sf._offsets) == len(sf.shapes()) @@ -1049,7 +1049,7 @@ def test_reader_offsets_no_shx(): assert not sf._offsets # reading a shape index should iterate to the shape # but the list of offsets should remain empty - __shape = sf.shape(3) + sf.shape(3) assert not sf._offsets # reading all the shapes should build the list of offsets shapes = sf.shapes()