#!/usr/bin/python
from BeautifulSoup import BeautifulStoneSoup, Tag
import sys
import re
#Split all inkscape style=" " into known attributes for that tag
def splitInkscapeStyle(tag) :
if not tag["style"] :
return False
breakAttr= [ "font-face", "font-size", "font-family" ]
d={}
strStyle=tag["style"]
print "style is" + str(strStyle)
splitStyle=strStyle.split(";")
for i in splitStyle :
print "I is : " + i
if i:
splitter=i.rsplit(":")
tag[splitter[0] ] = splitter[1]
for t, val in d:
tag[t] = val
del tag['style']
#By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list
def tagRemove(tag, tagname):
# Locate the span tag's position
origIndex = tag.parent.contents.index(tag)
# For each element in tag.contents, insert it before this tag
# Use a list here, otherwise the list will shrink as elements are
# disconnected from 'tag' and inserted into tag.parent
for i, content in enumerate(tag.contents):
tag.parent.insert(i+origIndex, content)
# Excise the now empty span tag
tag.extract()
def epsilon():
eps=1.0
while eps + 1.0 > 1.0 :
eps //= 2
return eps
def hasFontFace(tag):
if not tag.string:
return False
#Check for encoded font base64
return tag.string.find("@font-face")
#Takes a stone-soup tag and applies various
#workaround fixes of dubious effectiveness
def fontFix(tag):
bold=False
italic=False
dejavu=False
dejaVuRe=re.compile("'?(?i)dejavusans.*")
boldRe=re.compile("(?i).*-bold.*")
italicRe=re.compile("(?i).*-italic.*")
fontRe=re.compile("(?i)-.*")
for i in tag.attrs :
if i[0] == "font-family" :
#Check the font types and perform font substitution
bold=boldRe.match(i[1])
italic=italicRe.match(i[1])
dejavu=dejaVuRe.match(i[1])
#Strip font bold/italic embed
tmp = fontRe.split(i[1])
fontAttr=tmp[0]
i = (i[0],fontAttr)
break
#if none of the above apply we can skip
if not bold and not italic and not dejavu:
return
str=""
if bold:
str+="bad bolding method "
if italic:
str+="bad italicising method "
if dejavu:
str+="wrong font name"
print "Fixing tag : " + str
print tag
#Otherwise we have work to do!
haveWeight=False
haveStyle=False
for i in tag.attrs:
#find any bold font-weight tag
if i[0] == "font-weight":
haveWeight=True
continue
if i[0] == "font-style":
haveStyle=True
continue
#Check for bold
if bold:
if haveWeight:
if not re.match(i[1],".*(?i)bold.*"):
tag["font-weight"]+=";Bold"
else:
tag["font-weight"]="Bold"
tag["font-family"]=re.sub("(?i)-Bold","",tag["font-family"])
#Check for italics
if italic and haveStyle:
if not re.match(i[1],".*(?i)italic.*"):
tag["font-style"]+=";Italic"
else:
if italic and not haveStyle:
tag["font-style"]="Italic"
#Fix dejavu vs Deja Vu
if dejavu:
tag["font-family"]="DejaVu Sans"
#Check to see if a small font is being used in conjunction with
def fontSizeFix(tag):
#without a transformation there is nothing we can do
if "transform" not in tag:
return False
#Find the parent tag with the font-size parameter
haveFontSize=False
thisParent=tag
while not haveFontSize :
if thisParent.has_key("font-size") :
haveFontSize=True
else :
haveFontSize=False
if thisParent.parent :
thisParent=thisParent.parent
else :
break
#check to see that we found the correct parent tag
if not haveFontSize:
return False
else :
parentTag=thisParent
matrixRe=re.compile(".*(?i)matrix\(")
scaleRe=re.compile(".*(?i)scale\(")
if matrixRe.match(tag["transform"]) :
#grab the matrix
trans=re.sub(".*(?i)matrix\(","",tag["transform"])
trans=re.sub("\)","",trans)
#split the transformation matirx
m = re.split("(\ |,)",trans)
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
else:
if scaleRe.match(tag["transform"]) :
#grab the matrix components (11,22)
trans=re.sub(".*(?i)scale\(","",tag["transform"])
trans=re.sub("\)","",trans)
#split the transformation matirx
m = re.split("(\ |,)",trans)
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
assert len(m) == 2
#construct m as a list in Mx+b form
m = [ m[0] , "0" ,"0" ,m[1] ,"0", "0" ]
#Transform should be of the form y=Mx+b
print m
assert len(m) == 6
mF=[]
for i in m:
mF.append(float(i))
m=mF
print m
EPSILON=0.001
if abs(m[1]) < EPSILON and abs(m[2]) < EPSILON:
#OK, so M is a diagonal matrix
print "so far so good"
if abs(m[0]) > abs(m[3]) :
factor=m[0]
else:
factor=m[3]
if factor > 1:
#Pump up the font size by factor, then reduce the matrix
fsStr=parentTag["font-size"]
fsStr=fsStr.strip("px")
fontSize =float(fsStr)
parentTag["font-size"] = fontSize*factor
m[0] = m[0]/factor
m[3] = m[3]/factor
tag["transform"] = "matrix(" + str(m[0]) + " " + str(m[1]) + " " + str(m[2]) + " " + str(m[3]) + " "+ str(m[4]) + " " + str(m[5]) + ")"
#Crappy font substitution routine
def fontSub(tag):
preferredFont = []
preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans"))
preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times"))
for i in tag.attrs :
if i[0] == "font-family" :
#Substitute fonts from our preferred font table
for j in preferredFont:
if j[0].match(i[1])
tag["font-family"]=j[1]
break
def main():
if len(sys.argv) != 3:
print "Usage: svgTinker.py inputFile outputFile"
quit(1)
f = open(sys.argv[1])
if not f :
print "File does not exist or could not be read"
quit(1)
xmlText = f.read()
soup=BeautifulStoneSoup(xmlText)
#find all style="..." tags
styleTags=soup.findAll(style=True)
for i in styleTags:
splitInkscapeStyle(i)
tags=soup.findAll("text")
#Correct all font tags
for i in tags:
fontFamilyTag=False
fontSizeTag=False
fontTrasnformTag=False
if i.attrs:
for j in i.attrs :
#Check to see what attrs this guy has
if re.match("(?i)font-family",j[0]):
fontFamilyTag=True
continue
if re.match("(?i)transform",j[0]):
fontTransformTag=True
continue
if re.match("(?i)font-size",j[0]):
fontSizeTag=True
if fontFamilyTag :
fontFix(i)
fontSub(i)
continue
if fontTransformTag :
fontSizeFix(i)
#Fonts can also be stored in g elements.
tags=soup.findAll("g")
for i in tags:
fontTag=False
if i.attrs:
for j in i.attrs :
if re.match("(?i)font-family",j[0]):
fontTag=True
break
if fontTag :
fontFix(i)
fontSub(i)
tags=soup.findAll("tspan")
#Nuke the tspans, preserving children
for i in tags:
tagRemove(i,"tspans")
tags=soup.findAll("style")
#Find base64 encoded data and destroy it
#FIXME: Not sure how to trick soup into inserting "" vs "<></>", so use <g></g> instead
emptyTag = Tag(soup, "g")
for i in tags:
if hasFontFace(i):
i.replaceWith(emptyTag)
try:
f=open(sys.argv[2],'w')
except:
print('Unable to open file for writing. aborting')
quit(1)
#prettify soup data
soup.prettify()
#save modified svg data
f.write(str(soup))
print("Wrote file : " + sys.argv[2])
if __name__ == "__main__":
main()