@@ -321,6 +321,21 @@ def pdfRepr(obj):
321321 .format (type (obj )))
322322
323323
324+ def _font_supports_char (fonttype , char ):
325+ """
326+ Returns True if the font is able to provide *char* in a PDF.
327+
328+ For a Type 3 font, this method returns True only for single-byte
329+ chars. For Type 42 fonts this method return True if the char is from
330+ the Basic Multilingual Plane.
331+ """
332+ if fonttype == 3 :
333+ return ord (char ) <= 255
334+ if fonttype == 42 :
335+ return ord (char ) <= 65535
336+ raise NotImplementedError ()
337+
338+
324339class Reference :
325340 """
326341 PDF reference object.
@@ -1268,13 +1283,48 @@ def embedTTFType42(font, characters, descriptor):
12681283
12691284 unicode_bfrange = []
12701285 for start , end in unicode_groups :
1286+ # Ensure the CID map contains only chars from BMP
1287+ if start > 65535 :
1288+ continue
1289+ end = min (65535 , end )
1290+
12711291 unicode_bfrange .append (
12721292 b"<%04x> <%04x> [%s]" %
12731293 (start , end ,
12741294 b" " .join (b"<%04x>" % x for x in range (start , end + 1 ))))
12751295 unicode_cmap = (self ._identityToUnicodeCMap %
12761296 (len (unicode_groups ), b"\n " .join (unicode_bfrange )))
12771297
1298+ # Add XObjects for unsupported chars
1299+ glyph_ids = []
1300+ for ccode in characters :
1301+ if not _font_supports_char (fonttype , chr (ccode )):
1302+ gind = font .get_char_index (ccode )
1303+ glyph_ids .append (gind )
1304+
1305+ bbox = [cvt (x , nearest = False ) for x in font .bbox ]
1306+ rawcharprocs = _get_pdf_charprocs (filename , glyph_ids )
1307+ for charname in sorted (rawcharprocs ):
1308+ stream = rawcharprocs [charname ]
1309+ charprocDict = {'Length' : len (stream )}
1310+ charprocDict ['Type' ] = Name ('XObject' )
1311+ charprocDict ['Subtype' ] = Name ('Form' )
1312+ charprocDict ['BBox' ] = bbox
1313+ # Each glyph includes bounding box information,
1314+ # but xpdf and ghostscript can't handle it in a
1315+ # Form XObject (they segfault!!!), so we remove it
1316+ # from the stream here. It's not needed anyway,
1317+ # since the Form XObject includes it in its BBox
1318+ # value.
1319+ stream = stream [stream .find (b"d1" ) + 2 :]
1320+ charprocObject = self .reserveObject ('charProc' )
1321+ self .beginStream (charprocObject .id , None , charprocDict )
1322+ self .currentstream .write (stream )
1323+ self .endStream ()
1324+
1325+ name = self ._get_xobject_symbol_name (filename , charname )
1326+ self .multi_byte_charprocs [name ] = charprocObject
1327+
12781328 # CIDToGIDMap stream
12791329 cid_to_gid_map = "" .join (cid_to_gid_map ).encode ("utf-16be" )
12801330 self .beginStream (cidToGidMapObject .id ,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062156 self .check_gc (gc , gc ._rgb )
21072157 prev_font = None , None
21082158 oldx , oldy = 0 , 0
2109- type3_multibytes = []
2159+ unsupported_chars = []
21102160
21112161 self .file .output (Op .begin_text )
21122162 for font , fontsize , num , ox , oy in glyphs :
2113- self .file ._character_tracker .track (font , chr (num ))
2163+ char = chr (num )
2164+ self .file ._character_tracker .track (font , char )
21142165 fontname = font .fname
2115- if fonttype == 3 and num > 255 :
2116- # For Type3 fonts, multibyte characters must be emitted
2117- # separately (below).
2118- type3_multibytes .append ((font , fontsize , ox , oy , num ))
2166+ if not _font_supports_char ( fonttype , char ) :
2167+ # Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168+ # Type 42) must be emitted separately (below).
2169+ unsupported_chars .append ((font , fontsize , ox , oy , num ))
21192170 else :
21202171 self ._setup_textpos (ox , oy , 0 , oldx , oldy )
21212172 oldx , oldy = ox , oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272178 Op .show )
21282179 self .file .output (Op .end_text )
21292180
2130- for font , fontsize , ox , oy , num in type3_multibytes :
2181+ for font , fontsize , ox , oy , num in unsupported_chars :
21312182 self ._draw_xobject_glyph (
21322183 font , fontsize , font .get_char_index (num ), ox , oy )
21332184
@@ -2236,20 +2287,6 @@ def encode_string(self, s, fonttype):
22362287 return s .encode ('cp1252' , 'replace' )
22372288 return s .encode ('utf-16be' , 'replace' )
22382289
2239- @staticmethod
2240- def _font_supports_char (fonttype , char ):
2241- """
2242- Returns True if the font is able to provided the char in a PDF
2243-
2244- For a Type 3 font, this method returns True only for single-byte
2245- chars. For Type 42 fonts this method always returns True.
2246- """
2247- if fonttype == 3 :
2248- return ord (char ) <= 255
2249- if fonttype == 42 :
2250- return True
2251- raise NotImplementedError ()
2252-
22532290 def draw_text (self , gc , x , y , s , prop , angle , ismath = False , mtext = None ):
22542291 # docstring inherited
22552292
@@ -2313,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23132350 prev_was_multibyte = True
23142351 for item in _text_helpers .layout (
23152352 s , font , kern_mode = KERNING_UNFITTED ):
2316- if self . _font_supports_char (fonttype , item .char ):
2353+ if _font_supports_char (fonttype , item .char ):
23172354 if prev_was_multibyte :
23182355 singlebyte_chunks .append ((item .x , []))
23192356 if item .prev_kern :
0 commit comments