Skip to content

Commit e13f0bd

Browse files
aitikguptajkseppan
andauthored
Type42 subsetting in PS/PDF (matplotlib#20391)
* Proof of concept: Type42 subsetting in pdf * flake8 * Filter out just the py23 warning * More flake8 * Implement subsetting for PS backend * Move getSubset to common pdf/ps backend * Handle file-like objects instead of saving * Fix doc and warning * Change function doc and context * Log the correct way * Add fonttools min version for testing * Add fonttools in test workflow * Use ASCII characters for logging * Add unit test for get_glyphs_subset * Remove seek() * Add prefix to subsetted font names according to PDF spec * Use charmap for prefix * Update fonttools requirements * Drop PfEd table * flush before reading the contents back from tmp file * Fix testing for subsetting * Add whatsnew entry for Type42 subsetting * Fix subset tests * Add PS test for multiple fonttypes * Use TemporaryDirectory instead of NamedTemporaryFile * Add fontTools in dependencies.rst * Add API changenote for new dependency * Rebase tests.yml for packaging * Keep a reference to non-subsetted font for XObjects Co-authored-by: Jouni K. Seppänen <[email protected]>
1 parent 0f767cf commit e13f0bd

File tree

13 files changed

+199
-22
lines changed

13 files changed

+199
-22
lines changed

.github/workflows/tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ jobs:
145145
146146
# Install dependencies from PyPI.
147147
python -m pip install --upgrade $PRE \
148-
cycler kiwisolver numpy packaging pillow pyparsing python-dateutil \
149-
setuptools-scm \
148+
cycler fonttools kiwisolver numpy packaging pillow pyparsing \
149+
python-dateutil setuptools-scm \
150150
-r requirements/testing/all.txt \
151151
${{ matrix.extra-requirements }}
152152
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
fontTools for type 42 subsetting
2+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3+
4+
A new dependency known as `fontTools <https://fonttools.readthedocs.io/>`_
5+
is integrated in with Maptlotlib 3.5
6+
7+
It is designed to be used with PS/EPS and PDF documents; and handles
8+
Type 42 font subsetting.

doc/devel/dependencies.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ reference.
2222
* `kiwisolver <https://github.com/nucleic/kiwi>`_ (>= 1.0.1)
2323
* `Pillow <https://pillow.readthedocs.io/en/latest/>`_ (>= 6.2)
2424
* `pyparsing <https://pypi.org/project/pyparsing/>`_ (>=2.2.1)
25+
* `fontTools <https://fonttools.readthedocs.io/en/latest/>`_ (>=4.22.0)
2526

2627

2728
.. _optional_dependencies:
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Type 42 Subsetting is now enabled for PDF/PS backends
2+
-----------------------------------------------------
3+
4+
`~matplotlib.backends.backend_pdf` and `~matplotlib.backends.backend_ps` now use
5+
a unified Type 42 font subsetting interface, with the help of `fontTools <https://fonttools.readthedocs.io/en/latest/>`_
6+
7+
Set `~matplotlib.RcParams`'s *fonttype* value as ``42`` to trigger this workflow:
8+
9+
.. code-block::
10+
11+
# for PDF backend
12+
plt.rcParams['pdf.fonttype'] = 42
13+
14+
# for PS backend
15+
plt.rcParams['ps.fonttype'] = 42
16+
17+
18+
fig, ax = plt.subplots()
19+
ax.text(0.4, 0.5, 'subsetted document is smaller in size!')
20+
21+
fig.savefig("document.pdf")
22+
fig.savefig("document.ps")

lib/matplotlib/backends/_backend_pdf_ps.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
Common functionality between the PDF and PS backends.
33
"""
44

5+
from io import BytesIO
56
import functools
67

8+
from fontTools import subset
9+
710
import matplotlib as mpl
811
from .. import font_manager, ft2font
912
from ..afm import AFM
@@ -16,6 +19,35 @@ def _cached_get_afm_from_fname(fname):
1619
return AFM(fh)
1720

1821

22+
def get_glyphs_subset(fontfile, characters):
23+
"""
24+
Subset a TTF font
25+
26+
Reads the named fontfile and restricts the font to the characters.
27+
Returns a serialization of the subset font as file-like object.
28+
29+
Parameters
30+
----------
31+
symbol : str
32+
Path to the font file
33+
characters : str
34+
Continuous set of characters to include in subset
35+
"""
36+
37+
options = subset.Options(glyph_names=True, recommended_glyphs=True)
38+
39+
# prevent subsetting FontForge Timestamp and other tables
40+
options.drop_tables += ['FFTM', 'PfEd']
41+
42+
with subset.load_font(fontfile, options) as font:
43+
subsetter = subset.Subsetter(options=options)
44+
subsetter.populate(text=characters)
45+
subsetter.subset(font)
46+
fh = BytesIO()
47+
font.save(fh, reorderTables=False)
48+
return fh
49+
50+
1951
class CharacterTracker:
2052
"""
2153
Helper for font subsetting by the pdf and ps backends.

lib/matplotlib/backends/backend_pdf.py

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
import math
1515
import os
1616
import re
17+
import string
1718
import struct
19+
import sys
1820
import time
1921
import types
2022
import warnings
@@ -36,7 +38,7 @@
3638
import matplotlib.type1font as type1font
3739
import matplotlib.dviread as dviread
3840
from matplotlib.ft2font import (FIXED_WIDTH, ITALIC, LOAD_NO_SCALE,
39-
LOAD_NO_HINTING, KERNING_UNFITTED)
41+
LOAD_NO_HINTING, KERNING_UNFITTED, FT2Font)
4042
from matplotlib.mathtext import MathTextParser
4143
from matplotlib.transforms import Affine2D, BboxBase
4244
from matplotlib.path import Path
@@ -768,6 +770,22 @@ def newTextnote(self, text, positionRect=[-100, -100, 0, 0]):
768770
}
769771
self.pageAnnotations.append(theNote)
770772

773+
def _get_subsetted_psname(self, ps_name, charmap):
774+
def toStr(n, base):
775+
if n < base:
776+
return string.ascii_uppercase[n]
777+
else:
778+
return (
779+
toStr(n // base, base) + string.ascii_uppercase[n % base]
780+
)
781+
782+
# encode to string using base 26
783+
hashed = hash(frozenset(charmap.keys())) % ((sys.maxsize + 1) * 2)
784+
prefix = toStr(hashed, 26)
785+
786+
# get first 6 characters from prefix
787+
return prefix[:6] + "+" + ps_name
788+
771789
def finalize(self):
772790
"""Write out the various deferred objects and the pdf end matter."""
773791

@@ -1209,6 +1227,26 @@ def embedTTFType42(font, characters, descriptor):
12091227
wObject = self.reserveObject('Type 0 widths')
12101228
toUnicodeMapObject = self.reserveObject('ToUnicode map')
12111229

1230+
_log.debug(
1231+
"SUBSET %s characters: %s",
1232+
filename, "".join(chr(c) for c in characters)
1233+
)
1234+
fontdata = _backend_pdf_ps.get_glyphs_subset(
1235+
filename, "".join(chr(c) for c in characters)
1236+
)
1237+
_log.debug(
1238+
"SUBSET %s %d -> %d", filename,
1239+
os.stat(filename).st_size, fontdata.getbuffer().nbytes
1240+
)
1241+
1242+
# We need this ref for XObjects
1243+
full_font = font
1244+
1245+
# reload the font object from the subset
1246+
# (all the necessary data could probably be obtained directly
1247+
# using fontLib.ttLib)
1248+
font = FT2Font(fontdata)
1249+
12121250
cidFontDict = {
12131251
'Type': Name('Font'),
12141252
'Subtype': Name('CIDFontType2'),
@@ -1233,21 +1271,12 @@ def embedTTFType42(font, characters, descriptor):
12331271

12341272
# Make fontfile stream
12351273
descriptor['FontFile2'] = fontfileObject
1236-
length1Object = self.reserveObject('decoded length of a font')
12371274
self.beginStream(
12381275
fontfileObject.id,
12391276
self.reserveObject('length of font stream'),
1240-
{'Length1': length1Object})
1241-
with open(filename, 'rb') as fontfile:
1242-
length1 = 0
1243-
while True:
1244-
data = fontfile.read(4096)
1245-
if not data:
1246-
break
1247-
length1 += len(data)
1248-
self.currentstream.write(data)
1277+
{'Length1': fontdata.getbuffer().nbytes})
1278+
self.currentstream.write(fontdata.getvalue())
12491279
self.endStream()
1250-
self.writeObject(length1Object, length1)
12511280

12521281
# Make the 'W' (Widths) array, CidToGidMap and ToUnicode CMap
12531282
# at the same time
@@ -1299,10 +1328,10 @@ def embedTTFType42(font, characters, descriptor):
12991328
glyph_ids = []
13001329
for ccode in characters:
13011330
if not _font_supports_char(fonttype, chr(ccode)):
1302-
gind = font.get_char_index(ccode)
1331+
gind = full_font.get_char_index(ccode)
13031332
glyph_ids.append(gind)
13041333

1305-
bbox = [cvt(x, nearest=False) for x in font.bbox]
1334+
bbox = [cvt(x, nearest=False) for x in full_font.bbox]
13061335
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
13071336
for charname in sorted(rawcharprocs):
13081337
stream = rawcharprocs[charname]
@@ -1352,7 +1381,11 @@ def embedTTFType42(font, characters, descriptor):
13521381

13531382
# Beginning of main embedTTF function...
13541383

1355-
ps_name = font.postscript_name.encode('ascii', 'replace')
1384+
ps_name = self._get_subsetted_psname(
1385+
font.postscript_name,
1386+
font.get_charmap()
1387+
)
1388+
ps_name = ps_name.encode('ascii', 'replace')
13561389
ps_name = Name(ps_name)
13571390
pclt = font.get_sfnt_table('pclt') or {'capHeight': 0, 'xHeight': 0}
13581391
post = font.get_sfnt_table('post') or {'italicAngle': (0, 0)}

lib/matplotlib/backends/backend_ps.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77
from enum import Enum
88
import functools
99
import glob
10-
from io import StringIO
10+
from io import StringIO, TextIOWrapper
1111
import logging
1212
import math
1313
import os
1414
import pathlib
15+
import tempfile
1516
import re
1617
import shutil
1718
from tempfile import TemporaryDirectory
@@ -27,7 +28,7 @@
2728
GraphicsContextBase, RendererBase)
2829
from matplotlib.cbook import is_writable_file_like, file_requires_unicode
2930
from matplotlib.font_manager import get_font
30-
from matplotlib.ft2font import LOAD_NO_HINTING, LOAD_NO_SCALE
31+
from matplotlib.ft2font import LOAD_NO_HINTING, LOAD_NO_SCALE, FT2Font
3132
from matplotlib._ttconv import convert_ttf_to_ps
3233
from matplotlib.mathtext import MathTextParser
3334
from matplotlib._mathtext_data import uni2type1
@@ -954,8 +955,40 @@ def print_figure_impl(fh):
954955
fh.write(_font_to_ps_type3(font_path, glyph_ids))
955956
else:
956957
try:
957-
convert_ttf_to_ps(os.fsencode(font_path),
958-
fh, fonttype, glyph_ids)
958+
_log.debug(
959+
"SUBSET %s characters: %s", font_path,
960+
''.join(chr(c) for c in chars)
961+
)
962+
fontdata = _backend_pdf_ps.get_glyphs_subset(
963+
font_path, "".join(chr(c) for c in chars)
964+
)
965+
_log.debug(
966+
"SUBSET %s %d -> %d", font_path,
967+
os.stat(font_path).st_size,
968+
fontdata.getbuffer().nbytes
969+
)
970+
971+
# give ttconv a subsetted font
972+
# along with updated glyph_ids
973+
with TemporaryDirectory() as tmpdir:
974+
tmpfile = os.path.join(tmpdir, "tmp.ttf")
975+
font = FT2Font(fontdata)
976+
glyph_ids = [
977+
font.get_char_index(c) for c in chars
978+
]
979+
980+
with open(tmpfile, 'wb') as tmp:
981+
tmp.write(fontdata.getvalue())
982+
tmp.flush()
983+
984+
# TODO: allow convert_ttf_to_ps
985+
# to input file objects (BytesIO)
986+
convert_ttf_to_ps(
987+
os.fsencode(tmpfile),
988+
fh,
989+
fonttype,
990+
glyph_ids,
991+
)
959992
except RuntimeError:
960993
_log.warning(
961994
"The PostScript backend does not currently "

lib/matplotlib/testing/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ def pytest_configure(config):
1919
("markers", "pytz: Tests that require pytz to be installed."),
2020
("markers", "network: Tests that reach out to the network."),
2121
("filterwarnings", "error"),
22+
("filterwarnings",
23+
"ignore:.*The py23 module has been deprecated:DeprecationWarning"),
2224
]:
2325
config.addinivalue_line(key, value)
2426

lib/matplotlib/tests/test_backend_pdf.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010

1111
import matplotlib as mpl
1212
from matplotlib import dviread, pyplot as plt, checkdep_usetex, rcParams
13+
from matplotlib.cbook import _get_data_path
14+
from matplotlib.ft2font import FT2Font
15+
from matplotlib.backends._backend_pdf_ps import get_glyphs_subset
1316
from matplotlib.backends.backend_pdf import PdfPages
17+
1418
from matplotlib.testing.decorators import check_figures_equal, image_comparison
1519

1620

@@ -339,3 +343,28 @@ def test_kerning():
339343
s = "AVAVAVAVAVAVAVAV€AAVV"
340344
fig.text(0, .25, s, size=5)
341345
fig.text(0, .75, s, size=20)
346+
347+
348+
def test_glyphs_subset():
349+
fpath = str(_get_data_path("fonts/ttf/DejaVuSerif.ttf"))
350+
chars = "these should be subsetted! 1234567890"
351+
352+
# non-subsetted FT2Font
353+
nosubfont = FT2Font(fpath)
354+
nosubfont.set_text(chars)
355+
356+
# subsetted FT2Font
357+
subfont = FT2Font(get_glyphs_subset(fpath, chars))
358+
subfont.set_text(chars)
359+
360+
nosubcmap = nosubfont.get_charmap()
361+
subcmap = subfont.get_charmap()
362+
363+
# all unique chars must be available in subsetted font
364+
assert set(chars) == set(chr(key) for key in subcmap.keys())
365+
366+
# subsetted font's charmap should have less entries
367+
assert len(subcmap) < len(nosubcmap)
368+
369+
# since both objects are assigned same characters
370+
assert subfont.get_num_glyphs() == nosubfont.get_num_glyphs()

lib/matplotlib/tests/test_backend_ps.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,18 @@ def test_type42_font_without_prep():
207207
mpl.rcParams["mathtext.fontset"] = "stix"
208208

209209
plt.figtext(0.5, 0.5, "Mass $m$")
210+
211+
212+
@pytest.mark.parametrize('fonttype', ["3", "42"])
213+
def test_fonttype(fonttype):
214+
mpl.rcParams["ps.fonttype"] = fonttype
215+
fig, ax = plt.subplots()
216+
217+
ax.text(0.25, 0.5, "Forty-two is the answer to everything!")
218+
219+
buf = io.BytesIO()
220+
fig.savefig(buf, format="ps")
221+
222+
test = b'/FontType ' + bytes(f"{fonttype}", encoding='utf-8') + b' def'
223+
224+
assert re.search(test, buf.getvalue(), re.MULTILINE)

0 commit comments

Comments
 (0)