Skip to content

Commit 474e419

Browse files
srinivasreddymerwokserhiy-storchaka
authored
gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520)
It now supports docstrings with single quotes, escape sequences, raw string literals, and other Python syntax. Co-authored-by: Éric <[email protected]> Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent cdfb8bc commit 474e419

File tree

3 files changed

+104
-15
lines changed

3 files changed

+104
-15
lines changed

Lib/pydoc.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class or function within a module or module in a package. If the
5353
# the current directory is changed with os.chdir(), an incorrect
5454
# path will be displayed.
5555

56+
import ast
5657
import __future__
5758
import builtins
5859
import importlib._bootstrap
@@ -384,21 +385,29 @@ def ispackage(path):
384385
return False
385386

386387
def source_synopsis(file):
387-
line = file.readline()
388-
while line[:1] == '#' or not line.strip():
389-
line = file.readline()
390-
if not line: break
391-
line = line.strip()
392-
if line[:4] == 'r"""': line = line[1:]
393-
if line[:3] == '"""':
394-
line = line[3:]
395-
if line[-1:] == '\\': line = line[:-1]
396-
while not line.strip():
397-
line = file.readline()
398-
if not line: break
399-
result = line.split('"""')[0].strip()
400-
else: result = None
401-
return result
388+
"""Return the one-line summary of a file object, if present"""
389+
390+
string = ''
391+
try:
392+
tokens = tokenize.generate_tokens(file.readline)
393+
for tok_type, tok_string, _, _, _ in tokens:
394+
if tok_type == tokenize.STRING:
395+
string += tok_string
396+
elif tok_type == tokenize.NEWLINE:
397+
with warnings.catch_warnings():
398+
# Ignore the "invalid escape sequence" warning.
399+
warnings.simplefilter("ignore", SyntaxWarning)
400+
docstring = ast.literal_eval(string)
401+
if not isinstance(docstring, str):
402+
return None
403+
return docstring.strip().split('\n')[0].strip()
404+
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
405+
string += tok_string
406+
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
407+
return None
408+
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
409+
return None
410+
return None
402411

403412
def synopsis(filename, cache={}):
404413
"""Get the one-line summary out of a module file."""

Lib/test/test_pydoc/test_pydoc.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import contextlib
55
import importlib.util
66
import inspect
7+
import io
78
import pydoc
89
import py_compile
910
import keyword
@@ -899,6 +900,82 @@ def test_synopsis(self):
899900
synopsis = pydoc.synopsis(TESTFN, {})
900901
self.assertEqual(synopsis, 'line 1: h\xe9')
901902

903+
def test_source_synopsis(self):
904+
def check(source, expected, encoding=None):
905+
if isinstance(source, str):
906+
source_file = StringIO(source)
907+
else:
908+
source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
909+
with source_file:
910+
result = pydoc.source_synopsis(source_file)
911+
self.assertEqual(result, expected)
912+
913+
check('"""Single line docstring."""',
914+
'Single line docstring.')
915+
check('"""First line of docstring.\nSecond line.\nThird line."""',
916+
'First line of docstring.')
917+
check('"""First line of docstring.\\nSecond line.\\nThird line."""',
918+
'First line of docstring.')
919+
check('""" Whitespace around docstring. """',
920+
'Whitespace around docstring.')
921+
check('import sys\n"""No docstring"""',
922+
None)
923+
check(' \n"""Docstring after empty line."""',
924+
'Docstring after empty line.')
925+
check('# Comment\n"""Docstring after comment."""',
926+
'Docstring after comment.')
927+
check(' # Indented comment\n"""Docstring after comment."""',
928+
'Docstring after comment.')
929+
check('""""""', # Empty docstring
930+
'')
931+
check('', # Empty file
932+
None)
933+
check('"""Embedded\0null byte"""',
934+
None)
935+
check('"""Embedded null byte"""\0',
936+
None)
937+
check('"""Café and résumé."""',
938+
'Café and résumé.')
939+
check("'''Triple single quotes'''",
940+
'Triple single quotes')
941+
check('"Single double quotes"',
942+
'Single double quotes')
943+
check("'Single single quotes'",
944+
'Single single quotes')
945+
check('"""split\\\nline"""',
946+
'splitline')
947+
check('"""Unrecognized escape \\sequence"""',
948+
'Unrecognized escape \\sequence')
949+
check('"""Invalid escape seq\\uence"""',
950+
None)
951+
check('r"""Raw \\stri\\ng"""',
952+
'Raw \\stri\\ng')
953+
check('b"""Bytes literal"""',
954+
None)
955+
check('f"""f-string"""',
956+
None)
957+
check('"""Concatenated""" \\\n"string" \'literals\'',
958+
'Concatenatedstringliterals')
959+
check('"""String""" + """expression"""',
960+
None)
961+
check('("""In parentheses""")',
962+
'In parentheses')
963+
check('("""Multiple lines """\n"""in parentheses""")',
964+
'Multiple lines in parentheses')
965+
check('()', # tuple
966+
None)
967+
check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
968+
'€uro sign', encoding='iso-8859-15')
969+
check(b'"""\xa4"""', # Decoding error
970+
None, encoding='utf-8')
971+
972+
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
973+
temp_file.write('"""Real file test."""\n')
974+
temp_file.flush()
975+
temp_file.seek(0)
976+
result = pydoc.source_synopsis(temp_file)
977+
self.assertEqual(result, "Real file test.")
978+
902979
@requires_docstrings
903980
def test_synopsis_sourceless(self):
904981
os = import_helper.import_fresh_module('os')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
2+
It now supports docstrings with single quotes, escape sequences,
3+
raw string literals, and other Python syntax.

0 commit comments

Comments
 (0)