root/page2docbookplugin/page-retriever/getDocbookDocuments.py

Revision 4131, 3.9 kB (checked in by FilipeCorreia, 5 months ago)
  • Committed the companion script.
Line 
1 import libxslt, urllib2, libxml2, sys, SocketServer, re
2 from os import makedirs
3
4 base_url = "http://trac.server.com/"
5 project_wiki_url = "myproject/wiki/"
6 project_attachments_url = "myproject/attachment/wiki/"
7 contents_url = "manuals/usermanual" # 'table of contents' wiki page. contains the links for each chapters
8
9 docbook_url_suffix = "?format=docbook"
10 raw_url_suffix = "?format=raw"
11 images_base_path = "figure/"
12 chapters_base_path = "chapter/"
13 useSVGsIfAvailable = False
14
15 contentsDocbook_stream = urllib2.urlopen(base_url + project_wiki_url + contents_url + docbook_url_suffix).read()
16 contentsDocbook_doc = libxml2.parseDoc(contentsDocbook_stream) #parseDoc always receives utf8, i think
17
18 contentsDocbook_xpc = contentsDocbook_doc.xpathNewContext()
19 nodes = contentsDocbook_xpc.xpathEval("//ulink/@url")
20
21 if len(nodes) == 0:
22     print "no results"
23     sys.exit(1)
24 else:
25
26     getOriginalUrl_re = re.compile("""<imagedata fileref="/(.*?)".*/>""")
27    
28     for result in nodes:
29         lastSlashIdx = str(result).rfind("/")
30         chapter_url = base_url + str(result)[7:-1]
31         chapter_slug = str(result)[lastSlashIdx+1:-1]
32         try:
33             chapterDocbook_stream = urllib2.urlopen(chapter_url + docbook_url_suffix).read()
34         except urllib2.HTTPError:
35             continue
36
37         #find image urls, change them, and determine the new paths
38         images_original_urls = getOriginalUrl_re.findall(chapterDocbook_stream) #myproject/attachment/wiki/manuals/usermanual/section1/untitled.png?format=raw
39         images_modified_urls = [url.replace(".png", ".svg") for url in images_original_urls] #myproject/attachment/wiki/manuals/usermanual/section1/untitled.svg?format=raw
40         images_newpath_pngfilenames = [images_base_path + url[len(project_attachments_url):-len(raw_url_suffix)] for url in images_original_urls] #figures/manuals/usermanual/untitled.png
41         images_newpath_svgfilenames = [images_base_path + url[len(project_attachments_url):-len(raw_url_suffix)] for url in images_modified_urls] #figures/manuals/usermanual/untitled.svg
42         images_newpath_filenames = []
43
44         #save images
45         for i in range(len(images_modified_urls)):
46             image_stream = None
47             if useSVGsIfAvailable:
48                 try:
49                     image_stream = urllib2.urlopen(base_url + images_modified_urls[i]).read()
50                     images_newpath_filenames.append(images_newpath_svgfilenames[i])
51                 except urllib2.HTTPError:
52                     print "Could not retrieve image resource: " + images_modified_urls[i]
53
54             if image_stream==None:
55                 try:
56                     image_stream = urllib2.urlopen(base_url + images_original_urls[i]).read()
57                     images_newpath_filenames.append(images_newpath_pngfilenames[i])
58                 except urllib2.HTTPError:
59                     print "Could not retrieve image resource: " + images_original_urls[i]
60                     sys.exit(1)
61
62             dirEndIdx = images_newpath_filenames[i].rfind("/")+1
63             try:
64                 makedirs(images_newpath_filenames[i][0:dirEndIdx])
65             except OSError, x:
66                 #[Errno 17] File exists:
67                 pass
68             image_file = file(images_newpath_filenames[i], "wb")
69             image_file.write(image_stream)
70             image_file.close()
71
72         #save docbook files, changing them to have the right image paths, and unique ids
73         for i in range(len(images_original_urls)):
74             chapterDocbook_stream = chapterDocbook_stream.replace("/" + images_original_urls[i], images_newpath_filenames[i])
75         chapterDocbook_stream = chapterDocbook_stream.replace("<section id=\"", "<section id=\"" + chapter_slug + "_")
76         dbfile = file(chapters_base_path + "chapter_" + chapter_slug + ".xml", "w")
77         dbfile.write(chapterDocbook_stream)
78         dbfile.close()
79
80 contentsDocbook_doc.freeDoc()
81 contentsDocbook_xpc.xpathFreeContext()
Note: See TracBrowser for help on using the browser.