Changeset 4197
- Timestamp:
- 08/29/08 07:26:37 (3 months ago)
- Files:
-
- pagetodocintegration/0.11/pagetodoc/pagetodoc.py (modified) (10 diffs)
- pagetodocintegration/0.11/setup.py (modified) (1 diff)
- pagetodocintegration/html_filtered.xslt (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pagetodocintegration/0.11/pagetodoc/pagetodoc.py
r4026 r4197 11 11 import zipfile 12 12 import base64 13 import subprocess 14 import popen2 13 15 14 16 … … 23 25 images = [] 24 26 imagesubdir = 'img/' 27 logsubdir = 'log/' 28 img_max_x = '0' 29 img_max_y = '0' 30 31 verbose = False 25 32 26 33 # IContentConverter methods … … 29 36 30 37 31 def convert_content(self, req, input_type, source, output_type): 38 def convert_content(self, req, input_type, source, output_type): 39 40 # get parameters from trac ini file 41 self.img_max_x = self.env.config.get('pagetodoc', 'img_max_x', self.img_max_x) 42 self.img_max_y = self.env.config.get('pagetodoc', 'img_max_y', self.img_max_y) 43 44 # XSL-Transformation 45 xsltfilepath = self.env.config.get('pagetodoc', 'xsltfile', '') 46 # TBD: Fehler ausgeben, wenn xsltfile nicht gelesen werden kann 47 # TBD: Parameter aus der trac.ini an zentraler Stelle auslesen 48 if False: 49 if xsltfilepath is '': 50 pass 32 51 33 52 # maybe for later use … … 47 66 # temporary files and folders 48 67 self.tempdir = mkdtemp(prefix="page2doc") 49 htmlfilehandle, htmlfilepath = mkstemp( dir=self.tempdir)50 wordfilehandle, wordfilepath = mkstemp( dir=self.tempdir)68 htmlfilehandle, htmlfilepath = mkstemp(prefix='trac_', dir=self.tempdir) 69 wordfilehandle, wordfilepath = mkstemp(prefix='word_', dir=self.tempdir) 51 70 zipfilepath = os.path.join(self.tempdir, os.path.basename(str(req.path_info) + '.zip')) 52 71 … … 70 89 71 90 # clean up the HTML page using HTML Tidy 72 # maybe for later use73 #tidy_input_enc = self.env.config.get('pagetodoc', 'input-encoding', 'utf8')74 #tidy_output_enc = self.env.config.get('pagetodoc', 'output-encoding', 'latin1')75 #args = '-m -asxhtml --doctype omit --input-encoding %s --output-encoding %s' % (tidy_input_enc, tidy_output_enc)76 91 args = '-m -asxhtml -latin1 --doctype omit' 77 92 cmd = 'tidy %s %s' % (args, htmlfilepath) 78 os.system(cmd) 79 80 # Workaround 81 self.xmlns_workaround(htmlfilepath) 82 83 # XSL-Transformation 84 xsltfilepath = self.env.config.get('pagetodoc', 'xsltfile', '') 85 cmd = 'xsltproc -o %s %s %s' % (wordfilepath, xsltfilepath, htmlfilepath) 86 os.system(cmd) 93 self.execute_external_program(cmd) 94 95 # workaround namespace 96 self.perform_workarounds(htmlfilepath, 'html') 97 98 cmd = 'xsltproc %s -o %s %s %s' % (('-v' if self.verbose else ''), wordfilepath, xsltfilepath, htmlfilepath) 99 self.execute_external_program(cmd) 100 101 # workaround pre-tags 102 self.perform_workarounds(wordfilepath, 'pre') 103 87 104 88 105 # create a zip file and store all files into it 89 106 zipfilehandle = zipfile.ZipFile(zipfilepath, "w") 90 zipfilehandle.write(wordfilepath, os.path.basename(str(req.path_info) + '.htm')) 107 zipfilehandle.write(wordfilepath, os.path.basename(str(req.path_info) + '.htm')) 91 108 for image in self.images: 92 109 zipfilehandle.write(image, self.imagesubdir + os.path.basename(image)) … … 95 112 96 113 # delete temporary folders and files 114 self.remove_dir(os.path.join(self.tempdir, self.logsubdir)) 97 115 self.remove_dir(os.path.join(self.tempdir, self.imagesubdir)) 98 116 self.remove_dir(self.tempdir) … … 102 120 103 121 return (zip, 'application/zip') 122 123 124 def execute_external_program(self, command): 125 logdir = os.path.join(self.tempdir, self.logsubdir) 126 self.create_dir(logdir) 127 128 # Create output and error log files 129 outptr, outFile = mkstemp(dir=logdir) 130 errptr, errFile = mkstemp(dir=logdir) 131 132 # Call the subprocess using convenience method 133 retval = subprocess.call(command, shell=True, stderr=errptr, stdout=outptr, close_fds=True) 134 135 # read stdout and stderr 136 # its strange that all output goes to stderr instead of stdout, in both cases (error and no error) 137 # so always use stderr 138 errptr = file(errFile, "r") 139 errData = errptr.read() 140 errptr.close() 141 142 # log to trac.log 143 if self.verbose: 144 self.env.log.info('--------- EXTERNAL PROGRAM OUTPUT, command is ' + command) 145 self.env.log.info(errData) 146 147 # not needed right now 148 #outptr = file(outFile, "r") 149 #outData = outptr.read() 150 #outptr.close() 151 152 # Check the process exit code 153 if retval > 1: 154 raise Exception("Error executing command (return code = %s): %s" % (retval, errData)) 104 155 105 156 # remove the xml namespace from the file 106 157 # to be removed once I find out how to override this 107 def xmlns_workaround(self, htmlfilepath):158 def perform_workarounds(self, htmlfilepath, which=''): 108 159 # Workaround: Entferne die Namespace-Angabe in der HTML-Datei 109 160 htmlfilehandle = open(htmlfilepath, "r") … … 111 162 htmlfilehandle.close() 112 163 113 html = re.sub('(<html xmlns="http://www.w3.org/1999/xhtml">)', '<html>', html) 164 # replace namespace 165 if which is 'html': 166 html = re.sub('(<html xmlns="http://www.w3.org/1999/xhtml">)', '<html>', html) 167 168 # remove line feeds in <pre>-tags 169 if which is 'pre': 170 html = re.sub(r'<pre[^>]*>\n([^<]*)</pre>', self.remove_line_feeds, html) 114 171 115 172 htmlfilehandle = open(htmlfilepath, "w") 116 173 htmlfilehandle.write(html) 117 174 htmlfilehandle.close() 175 176 def remove_line_feeds(self, matchObj): 177 return '<pre>' + re.sub(r'\n', '<br />', matchObj.group(1)) + '<br /></pre>' 118 178 119 179 … … 121 181 imgdir = os.path.join(self.tempdir, self.imagesubdir) 122 182 123 if not os.path.isdir(imgdir): 124 os.mkdir(imgdir) 125 126 fh, fn = mkstemp(dir = imgdir) 183 # create path to imagedir, if not existing 184 self.create_dir(imgdir) 185 186 # save image to disk 187 (filename, fileext) = os.path.splitext(os.path.basename(matchObj.group(1))) 188 # remove any trailing GET-Parameters from the file extension e.g. '.jpg?format=raw') 189 # fileext = fileext[:fileext.find('?')] 190 191 # line above does not work due to encoding issues ('?' is something like '%3f' then) 192 # therefore just remove the file extention, and let ImageMagick detect the file type by itself 193 # this has been tested with ImageMagick 6.4.2 and PNG, JPG, TIFF and GIF files 194 fileext = '' 195 196 # create temporary file 197 fh, fn = mkstemp(prefix=filename, suffix=fileext, dir=imgdir) 127 198 os.close(fh) 128 129 199 urlretrieve(matchObj.group(1), fn) 200 201 # resize images, if wanted, using ImageMagick 202 if int(self.img_max_x) > 0 and int(self.img_max_y) > 0: 203 args = "-resize '%sx%s>'" % (self.img_max_x, self.img_max_y) 204 cmd = 'convert %s %s %s' % (fn, args, fn) 205 self.execute_external_program(cmd) 206 207 # add image to image list 130 208 self.images.append(fn) 131 209 … … 140 218 os.chmod(f, 0777) 141 219 142 143 #removes a directory including all files 220 # directory functions 221 def create_dir(self, dir): 222 if not os.path.isdir(dir): 223 os.mkdir(dir) 224 144 225 def remove_dir(self, dir): 145 for f in glob.glob(os.path.join(dir, '*')): 146 os.unlink(f) 147 os.rmdir(dir) 148 149 150 151 152 153 226 if os.path.isdir(dir): 227 for f in glob.glob(os.path.join(dir, '*')): 228 os.unlink(f) 229 os.rmdir(dir) 230 231 232 233 234 235 pagetodocintegration/0.11/setup.py
r4026 r4197 3 3 4 4 setup(name='PageToDoc', 5 version='0. 1.5',5 version='0.2', 6 6 packages=['pagetodoc'], 7 7 author='Lucas Eisenzimmer', pagetodocintegration/html_filtered.xslt
r4027 r4197 6 6 <xsl:variable name="bezeichner_bildbeschriftung">Abbildung</xsl:variable> 7 7 <xsl:variable name="bezeichner_tabellenbeschriftung">Tabelle</xsl:variable> 8 <xsl:variable name="quelle_tabellenbeschriftung"> Title</xsl:variable>8 <xsl:variable name="quelle_tabellenbeschriftung">title</xsl:variable> 9 9 10 10 <!-- Einstiegspunkt --> … … 36 36 37 37 38 <!-- Ãberschriften -->38 <!-- Ueberschriften --> 39 39 <xsl:template match="h1|h2|h3|h4|h5|h6|h7"> 40 40 <xsl:element name = "{name()}" > … … 72 72 <!-- Tabellendefinition --> 73 73 <xsl:template match="table"> 74 <table> 75 <xsl:apply-templates/> 76 </table> 74 77 <!-- Tabellenbeschriftung --> 75 76 <xsl:if test="tbody/tr/td[substring-before(text()[1],':')=$quelle_tabellenbeschriftung] or tr/td[substring-before(text()[1],':')=$quelle_tabellenbeschriftung]"> 78 <xsl:if test="substring-before(tbody/tr[last()]/td[1],':')=$quelle_tabellenbeschriftung or substring-before(tr[last()]/td[1],':')=$quelle_tabellenbeschriftung"> 77 79 <p class="MsoCaption"> 78 80 <xsl:value-of select="$bezeichner_tabellenbeschriftung"/> … … 85 87 <!-- Bezeichnung --> 86 88 <xsl:if test="tbody"> 87 <xsl:value-of select="substring-after(tbody/tr[ 1]/td[text()],':')"/>89 <xsl:value-of select="substring-after(tbody/tr[last()]/td[1],':')"/> 88 90 </xsl:if> 89 91 <xsl:if test="tr"> 90 <xsl:value-of select="substring-after(tr[ 1]/td[text()],':')"/>92 <xsl:value-of select="substring-after(tr[last()]/td[1],':')"/> 91 93 </xsl:if> 92 94 93 95 </p> 94 96 </xsl:if> 95 <table> 96 <xsl:apply-templates/> 97 </table> 97 98 98 </xsl:template> 99 99 … … 144 144 145 145 <!-- praeformatierter Text --> 146 <xsl:template match="pre|tt"> 147 <!-- WORKAROUND. Um den Urzustand herzustellen, in der folgenden Zeile tt durch {name()} ersetzen --> 148 <xsl:element name="tt"> 149 <xsl:apply-templates/> 150 </xsl:element> 146 <xsl:template match="tt"> 147 <tt><xsl:apply-templates/></tt> 148 </xsl:template> 149 150 <xsl:template match="pre"> 151 <xsl:variable name="pre_string"> 152 <xsl:call-template name="replace-string"> <!-- imported template --> 153 <xsl:with-param name="text" select="."/> 154 <xsl:with-param name="replace" select="'a'"/> 155 <xsl:with-param name="with" select="'a'"/> 156 </xsl:call-template> 157 </xsl:variable> 158 <pre> 159 <xsl:value-of select="$pre_string"/> 160 </pre> 151 161 </xsl:template> 152 162 … … 161 171 <!-- Links --> 162 172 <xsl:template match="a"> 163 <!-- Fuer interne Referenzen -->164 173 <!-- Fuer interne Referenzen: Links in Text konvertieren --> 174 <!-- normale Wiki-Links --> 165 175 <xsl:if test="@class='wiki'"> 166 <!-- Erzeugung Link Word-Stil-->167 <!--<xsl:text disable-output-escaping="yes">168 <![CDATA[<span style='mso-field-code:" REF ]]>169 </xsl:text>170 <xsl:value-of select="substring-after(@href,'#')"/>171 <xsl:text disable-output-escaping="yes">172 <![CDATA[ \\h "'>]]>173 </xsl:text>174 <xsl:value-of select="text()"/>175 <xsl:text disable-output-escaping="yes">176 <![CDATA[</span>]]>177 </xsl:text>-->178 179 <!-- Erzeugung Link HTML-Stil -->180 <!--<a href="#{substring-after(@href,'#')}">181 <xsl:value-of select="text()"/>182 </a>-->183 176 <xsl:apply-templates/> 177 </xsl:if> 178 <!-- fehlende Wiki-Links --> 179 <xsl:if test="@class='missing wiki'"> 180 <xsl:apply-templates /> 181 </xsl:if> 182 <!-- Links ins SVN --> 183 <xsl:if test="@class='source'"> 184 <xsl:apply-templates /> 184 185 </xsl:if> 185 186 … … 198 199 199 200 </xsl:template> 201 202 <!-- string replacement function http://www.dpawson.co.uk/xsl/sect2/replace.html#d9550e61 --> 203 <xsl:template name="replace-string"> 204 <xsl:param name="text"/> 205 <xsl:param name="replace"/> 206 <xsl:param name="with"/> 207 <xsl:choose> 208 <xsl:when test="contains($text,$replace)"> 209 <xsl:value-of select="substring-before($text,$replace)"/> 210 <xsl:value-of select="$with"/> 211 <xsl:call-template name="replace-string"> 212 <xsl:with-param name="text" 213 select="substring-after($text,$replace)"/> 214 <xsl:with-param name="replace" select="$replace"/> 215 <xsl:with-param name="with" select="$with"/> 216 </xsl:call-template> 217 </xsl:when> 218 <xsl:otherwise> 219 <xsl:value-of select="$text"/> 220 </xsl:otherwise> 221 </xsl:choose> 222 </xsl:template> 200 223 201 224 </xsl:stylesheet>
