Skip to content

Commit ca6cf56

Browse files
authored
Add pathlib._abc.PathModuleBase (#113893)
Path modules provide a subset of the `os.path` API, specifically those functions needed to provide `PurePathBase` functionality. Each `PurePathBase` subclass references its path module via a `pathmod` class attribute. This commit adds a new `PathModuleBase` class, which provides abstract methods that unconditionally raise `UnsupportedOperation`. An instance of this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`. As a result, `PurePathBase` is no longer POSIX-y by default, and all its methods raise `UnsupportedOperation` courtesy of `pathmod`. Users who subclass `PurePathBase` or `PathBase` should choose the path syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their own subclass of `PathModuleBase`, as circumstances demand.
1 parent c280843 commit ca6cf56

File tree

4 files changed

+182
-59
lines changed

4 files changed

+182
-59
lines changed

Lib/pathlib/__init__.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@
3333
]
3434

3535

36+
# Reference for Windows paths can be found at
37+
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
38+
_WIN_RESERVED_NAMES = frozenset(
39+
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
40+
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
41+
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
42+
)
43+
44+
3645
class _PathParents(Sequence):
3746
"""This object provides sequence-like access to the logical ancestors
3847
of a path. Don't try to construct it yourself."""
@@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase):
7685
"""
7786

7887
__slots__ = (
88+
# The `_raw_paths` slot stores unnormalized string paths. This is set
89+
# in the `__init__()` method.
90+
'_raw_paths',
91+
7992
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
8093
# normalized parts of the path. They are set when any of the `drive`,
8194
# `root` or `_tail` properties are accessed for the first time. The
@@ -141,6 +154,26 @@ def __init__(self, *args):
141154
# Avoid calling super().__init__, as an optimisation
142155
self._raw_paths = paths
143156

157+
def joinpath(self, *pathsegments):
158+
"""Combine this path with one or several arguments, and return a
159+
new path representing either a subpath (if all arguments are relative
160+
paths) or a totally different path (if one of the arguments is
161+
anchored).
162+
"""
163+
return self.with_segments(self, *pathsegments)
164+
165+
def __truediv__(self, key):
166+
try:
167+
return self.with_segments(self, key)
168+
except TypeError:
169+
return NotImplemented
170+
171+
def __rtruediv__(self, key):
172+
try:
173+
return self.with_segments(key, self)
174+
except TypeError:
175+
return NotImplemented
176+
144177
def __reduce__(self):
145178
# Using the parts tuple helps share interned path parts
146179
# when pickling related paths.
@@ -386,6 +419,33 @@ def is_relative_to(self, other, /, *_deprecated):
386419
other = self.with_segments(other)
387420
return other == self or other in self.parents
388421

422+
def is_absolute(self):
423+
"""True if the path is absolute (has both a root and, if applicable,
424+
a drive)."""
425+
if self.pathmod is posixpath:
426+
# Optimization: work with raw paths on POSIX.
427+
for path in self._raw_paths:
428+
if path.startswith('/'):
429+
return True
430+
return False
431+
return self.pathmod.isabs(self)
432+
433+
def is_reserved(self):
434+
"""Return True if the path contains one of the special names reserved
435+
by the system, if any."""
436+
if self.pathmod is not ntpath or not self.name:
437+
return False
438+
439+
# NOTE: the rules for reserved names seem somewhat complicated
440+
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
441+
# exist). We err on the side of caution and return True for paths
442+
# which are not considered reserved by Windows.
443+
if self.drive.startswith('\\\\'):
444+
# UNC paths are never reserved.
445+
return False
446+
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
447+
return name.upper() in _WIN_RESERVED_NAMES
448+
389449
def as_uri(self):
390450
"""Return the path as a URI."""
391451
if not self.is_absolute():

Lib/pathlib/_abc.py

Lines changed: 67 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,13 @@
1212
"""
1313

1414
import functools
15-
import posixpath
1615
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
1716
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
1817

1918
#
2019
# Internals
2120
#
2221

23-
# Reference for Windows paths can be found at
24-
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
25-
_WIN_RESERVED_NAMES = frozenset(
26-
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
27-
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
28-
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
29-
)
30-
3122
_WINERROR_NOT_READY = 21 # drive exists but is not accessible
3223
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
3324
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
@@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError):
144135
pass
145136

146137

138+
class PathModuleBase:
139+
"""Base class for path modules, which do low-level path manipulation.
140+
141+
Path modules provide a subset of the os.path API, specifically those
142+
functions needed to provide PurePathBase functionality. Each PurePathBase
143+
subclass references its path module via a 'pathmod' class attribute.
144+
145+
Every method in this base class raises an UnsupportedOperation exception.
146+
"""
147+
148+
@classmethod
149+
def _unsupported(cls, attr):
150+
raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported")
151+
152+
@property
153+
def sep(self):
154+
"""The character used to separate path components."""
155+
self._unsupported('sep')
156+
157+
def join(self, path, *paths):
158+
"""Join path segments."""
159+
self._unsupported('join()')
160+
161+
def split(self, path):
162+
"""Split the path into a pair (head, tail), where *head* is everything
163+
before the final path separator, and *tail* is everything after.
164+
Either part may be empty.
165+
"""
166+
self._unsupported('split()')
167+
168+
def splitroot(self, path):
169+
"""Split the pathname path into a 3-item tuple (drive, root, tail),
170+
where *drive* is a device name or mount point, *root* is a string of
171+
separators after the drive, and *tail* is everything after the root.
172+
Any part may be empty."""
173+
self._unsupported('splitroot()')
174+
175+
def normcase(self, path):
176+
"""Normalize the case of the path."""
177+
self._unsupported('normcase()')
178+
179+
def isabs(self, path):
180+
"""Returns whether the path is absolute, i.e. unaffected by the
181+
current directory or drive."""
182+
self._unsupported('isabs()')
183+
184+
147185
class PurePathBase:
148186
"""Base class for pure path objects.
149187
@@ -154,19 +192,19 @@ class PurePathBase:
154192
"""
155193

156194
__slots__ = (
157-
# The `_raw_paths` slot stores unnormalized string paths. This is set
158-
# in the `__init__()` method.
159-
'_raw_paths',
195+
# The `_raw_path` slot store a joined string path. This is set in the
196+
# `__init__()` method.
197+
'_raw_path',
160198

161199
# The '_resolving' slot stores a boolean indicating whether the path
162200
# is being processed by `PathBase.resolve()`. This prevents duplicate
163201
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
164202
'_resolving',
165203
)
166-
pathmod = posixpath
204+
pathmod = PathModuleBase()
167205

168-
def __init__(self, *paths):
169-
self._raw_paths = paths
206+
def __init__(self, path, *paths):
207+
self._raw_path = self.pathmod.join(path, *paths) if paths else path
170208
self._resolving = False
171209

172210
def with_segments(self, *pathsegments):
@@ -176,11 +214,6 @@ def with_segments(self, *pathsegments):
176214
"""
177215
return type(self)(*pathsegments)
178216

179-
@property
180-
def _raw_path(self):
181-
"""The joined but unnormalized path."""
182-
return self.pathmod.join(*self._raw_paths)
183-
184217
def __str__(self):
185218
"""Return the string representation of the path, suitable for
186219
passing to system calls."""
@@ -194,7 +227,7 @@ def as_posix(self):
194227
@property
195228
def drive(self):
196229
"""The drive prefix (letter or UNC path), if any."""
197-
return self.pathmod.splitdrive(self._raw_path)[0]
230+
return self.pathmod.splitroot(self._raw_path)[0]
198231

199232
@property
200233
def root(self):
@@ -210,7 +243,7 @@ def anchor(self):
210243
@property
211244
def name(self):
212245
"""The final path component, if any."""
213-
return self.pathmod.basename(self._raw_path)
246+
return self.pathmod.split(self._raw_path)[1]
214247

215248
@property
216249
def suffix(self):
@@ -251,10 +284,10 @@ def stem(self):
251284

252285
def with_name(self, name):
253286
"""Return a new path with the file name changed."""
254-
dirname = self.pathmod.dirname
255-
if dirname(name):
287+
split = self.pathmod.split
288+
if split(name)[0]:
256289
raise ValueError(f"Invalid name {name!r}")
257-
return self.with_segments(dirname(self._raw_path), name)
290+
return self.with_segments(split(self._raw_path)[0], name)
258291

259292
def with_stem(self, stem):
260293
"""Return a new path with the stem changed."""
@@ -336,17 +369,17 @@ def joinpath(self, *pathsegments):
336369
paths) or a totally different path (if one of the arguments is
337370
anchored).
338371
"""
339-
return self.with_segments(*self._raw_paths, *pathsegments)
372+
return self.with_segments(self._raw_path, *pathsegments)
340373

341374
def __truediv__(self, key):
342375
try:
343-
return self.joinpath(key)
376+
return self.with_segments(self._raw_path, key)
344377
except TypeError:
345378
return NotImplemented
346379

347380
def __rtruediv__(self, key):
348381
try:
349-
return self.with_segments(key, *self._raw_paths)
382+
return self.with_segments(key, self._raw_path)
350383
except TypeError:
351384
return NotImplemented
352385

@@ -371,7 +404,7 @@ def _stack(self):
371404
def parent(self):
372405
"""The logical parent of the path."""
373406
path = self._raw_path
374-
parent = self.pathmod.dirname(path)
407+
parent = self.pathmod.split(path)[0]
375408
if path != parent:
376409
parent = self.with_segments(parent)
377410
parent._resolving = self._resolving
@@ -381,43 +414,20 @@ def parent(self):
381414
@property
382415
def parents(self):
383416
"""A sequence of this path's logical parents."""
384-
dirname = self.pathmod.dirname
417+
split = self.pathmod.split
385418
path = self._raw_path
386-
parent = dirname(path)
419+
parent = split(path)[0]
387420
parents = []
388421
while path != parent:
389422
parents.append(self.with_segments(parent))
390423
path = parent
391-
parent = dirname(path)
424+
parent = split(path)[0]
392425
return tuple(parents)
393426

394427
def is_absolute(self):
395428
"""True if the path is absolute (has both a root and, if applicable,
396429
a drive)."""
397-
if self.pathmod is posixpath:
398-
# Optimization: work with raw paths on POSIX.
399-
for path in self._raw_paths:
400-
if path.startswith('/'):
401-
return True
402-
return False
403-
else:
404-
return self.pathmod.isabs(self._raw_path)
405-
406-
def is_reserved(self):
407-
"""Return True if the path contains one of the special names reserved
408-
by the system, if any."""
409-
if self.pathmod is posixpath or not self.name:
410-
return False
411-
412-
# NOTE: the rules for reserved names seem somewhat complicated
413-
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
414-
# exist). We err on the side of caution and return True for paths
415-
# which are not considered reserved by Windows.
416-
if self.drive.startswith('\\\\'):
417-
# UNC paths are never reserved.
418-
return False
419-
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
420-
return name.upper() in _WIN_RESERVED_NAMES
430+
return self.pathmod.isabs(self._raw_path)
421431

422432
def match(self, path_pattern, *, case_sensitive=None):
423433
"""
@@ -726,7 +736,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
726736
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
727737

728738
pattern_parts = list(path_pattern.parts)
729-
if not self.pathmod.basename(pattern):
739+
if not self.pathmod.split(pattern)[1]:
730740
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
731741
pattern_parts.append('')
732742

Lib/test/test_pathlib/test_pathlib.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,7 @@ def tempdir(self):
11511151

11521152
def test_matches_pathbase_api(self):
11531153
our_names = {name for name in dir(self.cls) if name[0] != '_'}
1154+
our_names.remove('is_reserved') # only present in PurePath
11541155
path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
11551156
self.assertEqual(our_names, path_names)
11561157
for attr_name in our_names:

0 commit comments

Comments
 (0)