|
Server : Apache/2.2.17 (Unix) mod_ssl/2.2.17 OpenSSL/0.9.8e-fips-rhel5 DAV/2 PHP/5.2.17 System : Linux localhost 2.6.18-419.el5 #1 SMP Fri Feb 24 22:47:42 UTC 2017 x86_64 User : nobody ( 99) PHP Version : 5.2.17 Disable Function : NONE Directory : /proc/21572/root/usr/share/xml2po/ |
Upload File : |
# -*- coding: utf-8 -*-
# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
#
# This file is part of xml2po.
#
# xml2po is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# xml2po is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with xml2po; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# This implements special instructions for handling DocBook XML documents
# in a better way.
#
# This means:
# — better handling of nested complicated tags (i.e. definitions of
# ignored-tags and final-tags)
# — support for merging translator-credits back into DocBook articles
# — support for setting a language
#
# We use "currentXmlMode" class name for all modes
# -- it might be better to have it named docbookXmlMode, but it will make loading harder;
# it is also not necessary until we start supporting extracting strings from more
# than one document type at the same time
#
import re
import libxml2
import os
import md5
import sys
class docbookXmlMode:
"""Class for special handling of DocBook document types.
It sets lang attribute on article elements, and adds translators
to articleinfo/copyright."""
def __init__(self):
self.lists = ['itemizedlist', 'orderedlist', 'variablelist',
'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
self.objects = [ 'figure', 'textobject', 'imageobject', 'mediaobject',
'screenshot' ]
def getIgnoredTags(self):
"Returns array of tags to be ignored."
return self.objects + self.lists
def getFinalTags(self):
"Returns array of tags to be considered 'final'."
return ['para', 'formalpara', 'simpara',
'releaseinfo', 'revnumber', 'title',
'date', 'term', 'programlisting'] + self.objects + self.lists
def getSpacePreserveTags(self):
"Returns array of tags in which spaces are to be preserved."
return [
'classsynopsisinfo',
'computeroutput',
'funcsynopsisinfo',
'literallayout',
'programlisting',
'screen',
'synopsis',
'userinput'
]
def getStringForTranslators(self):
"""Returns string which will be used to credit translators."""
return "translator-credits"
def getCommentForTranslators(self):
"""Returns a comment to be added next to string for crediting translators."""
return """Put one translator per line, in the form of NAME <EMAIL>, YEAR1, YEAR2."""
def _find_articleinfo(self, node):
if node.name == 'articleinfo' or node.name == 'bookinfo':
return node
child = node.children
while child:
ret = self._find_articleinfo(child)
if ret:
return ret
child = child.next
return None
def _find_lastcopyright(self, node):
if not node.children:
return None
last = node.lastChild()
tmp = last
while tmp:
if tmp.name == "copyright":
last = tmp
break
tmp = tmp.prev
return last
def _md5_for_file(self, filename):
hash = md5.new()
input = open(filename, "rb")
read = input.read(4096)
while read:
hash.update(read)
read = input.read(4096)
input.close()
return hash.hexdigest()
def _output_images(self, node, msg):
if node and node.type=='element' and node.name=='imagedata':
# Use .fileref to construct new message
attr = node.prop("fileref")
if attr:
dir = os.path.dirname(msg.filename)
fullpath = os.path.join(dir, attr)
if os.path.exists(fullpath):
hash = self._md5_for_file(fullpath)
else:
hash = "THIS FILE DOESN'T EXIST"
print >>sys.stderr, "Warning: image file '%s' not found." % fullpath
msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),
"When image changes, this message will be marked fuzzy or untranslated for you.\n"+
"It doesn't matter what you translate it to: it's not used at all.")
elif node and node.children:
child = node.children
while child:
self._output_images(child,msg)
child = child.next
def preProcessXml(self, doc, msg):
"""Add additional messages of interest here."""
root = doc.getRootElement()
self._output_images(root,msg)
def postProcessXmlTranslation(self, doc, language, translators):
"""Sets a language and translators in "doc" tree.
"translators" is a string consisted of "Name <email>, years" pairs
of each translator, separated by newlines."""
root = doc.getRootElement()
# DocBook documents can be something other than article, handle that as well in the future
while root and root.name != 'article' and root.name != 'book':
root = root.next
if root and (root.name == 'article' or root.name == 'book'):
root.setProp('lang', language)
else:
return
if translators == self.getStringForTranslators():
return
elif translators:
# Now, lets find 'articleinfo' (it can be something else, but this goes along with 'article')
ai = self._find_articleinfo(root)
if not ai:
return
# Now, lets do one translator at a time
lines = translators.split("\n")
for line in lines:
line = line.strip()
match = re.match(r"^([^<,]+)\s*(?:<([^>,]+)>)?,\s*(.*)$", line)
if match:
last = self._find_lastcopyright(ai)
copy = libxml2.newNode("copyright")
if last:
copy = last.addNextSibling(copy)
else:
ai.addChild(copy)
if match.group(3):
copy.newChild(None, "year", match.group(3).encode('utf-8'))
if match.group(1) and match.group(2):
holder = match.group(1)+"(%s)" % match.group(2)
elif match.group(1):
holder = match.group(1)
elif match.group(2):
holder = match.group(2)
else:
holder = "???"
copy.newChild(None, "holder", holder.encode('utf-8'))
# Perform some tests when ran standalone
if __name__ == '__main__':
test = docbookXmlMode()
print "Ignored tags : " + repr(test.getIgnoredTags())
print "Final tags : " + repr(test.getFinalTags())
print "Space-preserve tags: " + repr(test.getSpacePreserveTags())
print "Credits from string: '%s'" % test.getStringForTranslators()
print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()