218 lines
5.6 KiB
Python
218 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# Parse a CMap file and dump it as a C struct.
|
|
|
|
import sys
|
|
|
|
# Decode a subset of CMap syntax (only what is needed for our built-in resources)
|
|
# We require that tokens are whitespace separated.
|
|
|
|
def dumpcmap(filename):
|
|
codespacerange = []
|
|
usecmap = ""
|
|
cmapname = ""
|
|
wmode = 0
|
|
|
|
map = {}
|
|
|
|
def tocode(s):
|
|
if s[0] == '<' and s[-1] == '>':
|
|
return int(s[1:-1], 16)
|
|
return int(s, 10)
|
|
|
|
def map_cidchar(lo, v):
|
|
map[lo] = v
|
|
|
|
def map_cidrange(lo, hi, v):
|
|
while lo <= hi:
|
|
map[lo] = v
|
|
lo = lo + 1
|
|
v = v + 1
|
|
|
|
def add_bf(lo, v):
|
|
# Decode unicode surrogate pairs
|
|
if len(v) == 2 and v[0] >= 0xd800 and v[0] <= 0xdbff and v[1] >= 0xdc00 and v[1] <= 0xdfff:
|
|
map[lo] = ((v[0] - 0xd800) << 10) + (v[1] - 0xdc00) + 0x10000
|
|
elif len(v) == 1:
|
|
map[lo] = v[0]
|
|
elif len(v) <= 8:
|
|
map[lo] = v[:]
|
|
else:
|
|
print("/* warning: too long one-to-many mapping: %s */" % (v))
|
|
|
|
def map_bfchar(lo, bf):
|
|
bf = bf[1:-1] # drop < >
|
|
v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
|
|
add_bf(lo, v)
|
|
|
|
def map_bfrange(lo, hi, bf):
|
|
bf = bf[1:-1] # drop < >
|
|
v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
|
|
while lo <= hi:
|
|
add_bf(lo, v)
|
|
lo = lo + 1
|
|
v[-1] = v[-1] + 1
|
|
|
|
current = None
|
|
for line in open(filename, "r").readlines():
|
|
if line[0] == '%':
|
|
continue
|
|
line = line.strip().split()
|
|
if len(line) == 0:
|
|
continue
|
|
if line[0] == '/CMapName':
|
|
cmapname = line[1][1:]
|
|
elif line[0] == '/WMode':
|
|
wmode = int(line[1])
|
|
elif len(line) > 1 and line[1] == 'usecmap':
|
|
usecmap = line[0][1:]
|
|
elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
|
|
elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
|
|
elif len(line) > 1 and line[1] == 'beginbfrange': current = 'bfrange'
|
|
elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
|
|
elif len(line) > 1 and line[1] == 'beginbfchar': current = 'bfchar'
|
|
elif line[0] == 'begincodespacerange': current = 'codespacerange'
|
|
elif line[0] == 'begincidrange': current = 'cidrange'
|
|
elif line[0] == 'beginbfrange': current = 'bfrange'
|
|
elif line[0] == 'begincidchar': current = 'cidchar'
|
|
elif line[0] == 'beginbfchar': current = 'bfchar'
|
|
elif line[0].startswith("end"):
|
|
current = None
|
|
elif current == 'codespacerange' and len(line) == 2:
|
|
n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
|
|
codespacerange.append((n, a, b))
|
|
elif current == 'cidrange' and len(line) == 3:
|
|
a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
|
|
map_cidrange(a, b, c)
|
|
elif current == 'cidchar' and len(line) == 2:
|
|
a, b = tocode(line[0]), tocode(line[1])
|
|
map_cidchar(a, b)
|
|
elif current == 'bfchar' and len(line) == 2:
|
|
a, b = tocode(line[0]), line[1]
|
|
map_bfchar(a, b)
|
|
elif current == 'bfrange' and len(line) == 3:
|
|
a, b, c = tocode(line[0]), tocode(line[1]), line[2]
|
|
map_bfrange(a, b, c)
|
|
|
|
# Create ranges
|
|
|
|
ranges = []
|
|
xranges = []
|
|
mranges = []
|
|
mdata = []
|
|
|
|
out_lo = -100
|
|
out_hi = -100
|
|
out_v_lo = 0
|
|
out_v_hi = 0
|
|
|
|
def flush_range():
|
|
if out_lo >= 0:
|
|
if out_lo > 0xffff or out_hi > 0xffff or out_v_lo > 0xffff:
|
|
xranges.append((out_lo, out_hi, out_v_lo))
|
|
else:
|
|
ranges.append((out_lo, out_hi, out_v_lo))
|
|
|
|
keys = list(map.keys())
|
|
keys.sort()
|
|
for code in keys:
|
|
v = map[code]
|
|
if type(v) is not int:
|
|
flush_range()
|
|
out_lo = out_hi = -100
|
|
mranges.append((code, len(mdata)))
|
|
mdata.append(len(v))
|
|
mdata.extend(v)
|
|
else:
|
|
if code != out_hi + 1 or v != out_v_hi + 1:
|
|
flush_range()
|
|
out_lo = out_hi = code
|
|
out_v_lo = out_v_hi = v
|
|
else:
|
|
out_hi = out_hi + 1
|
|
out_v_hi = out_v_hi + 1
|
|
flush_range()
|
|
|
|
# Print C file
|
|
|
|
cname = cmapname.replace('-', '_')
|
|
|
|
print()
|
|
print("/*", cmapname, "*/")
|
|
print()
|
|
|
|
if len(ranges) > 0:
|
|
print("static const pdf_range cmap_%s_ranges[] = {" % cname)
|
|
for r in ranges:
|
|
print("{%d,%d,%d}," % r)
|
|
print("};")
|
|
print()
|
|
if len(xranges) > 0:
|
|
print("static const pdf_xrange cmap_%s_xranges[] = {" % cname)
|
|
for r in xranges:
|
|
print("{%d,%d,%d}," % r)
|
|
print("};")
|
|
print()
|
|
if len(mranges) > 0:
|
|
print("static const pdf_mrange cmap_%s_mranges[] = {" % cname)
|
|
for r in mranges:
|
|
print("{%d,%d}," % r)
|
|
print("};")
|
|
print()
|
|
print("static const int cmap_%s_table[] = {" % cname)
|
|
n = mdata[0]
|
|
i = 0
|
|
for r in mdata:
|
|
if i <= n:
|
|
sys.stdout.write("%d," % r)
|
|
i = i + 1
|
|
else:
|
|
sys.stdout.write("\n%d," % r)
|
|
i = 1
|
|
n = r
|
|
sys.stdout.write("\n")
|
|
print("};")
|
|
print()
|
|
|
|
print("static pdf_cmap cmap_%s = {" % cname)
|
|
print("\t{ -1, pdf_drop_cmap_imp },")
|
|
print("\t/* cmapname */ \"%s\"," % cmapname)
|
|
print("\t/* usecmap */ \"%s\", NULL," % usecmap)
|
|
print("\t/* wmode */ %d," % wmode)
|
|
print("\t/* codespaces */ %d, {" % len(codespacerange))
|
|
if len(codespacerange) > 0:
|
|
for codespace in codespacerange:
|
|
fmt = "\t\t{ %%d, 0x%%0%dx, 0x%%0%dx }," % (codespace[0]*2, codespace[0]*2)
|
|
print(fmt % codespace)
|
|
else:
|
|
print("\t\t{ 0, 0, 0 },")
|
|
print("\t},")
|
|
|
|
if len(ranges) > 0:
|
|
print("\t%d, %d, (pdf_range*)cmap_%s_ranges," % (len(ranges),len(ranges),cname))
|
|
else:
|
|
print("\t0, 0, NULL, /* ranges */")
|
|
|
|
if len(xranges) > 0:
|
|
print("\t%d, %d, (pdf_xrange*)cmap_%s_xranges," % (len(xranges),len(xranges),cname))
|
|
else:
|
|
print("\t0, 0, NULL, /* xranges */")
|
|
|
|
if len(mranges) > 0:
|
|
print("\t%d, %d, (pdf_mrange*)cmap_%s_mranges," % (len(mranges),len(mranges),cname))
|
|
else:
|
|
print("\t0, 0, NULL, /* mranges */")
|
|
|
|
if len(mdata) > 0:
|
|
print("\t%d, %d, (int*)cmap_%s_table," % (len(mdata),len(mdata),cname))
|
|
else:
|
|
print("\t0, 0, NULL, /* table */")
|
|
|
|
print("\t0, 0, 0, NULL /* splay tree */")
|
|
print("};")
|
|
|
|
print("/* This is an automatically generated file. Do not edit. */")
|
|
|
|
for arg in sys.argv[1:]:
|
|
dumpcmap(arg)
|