| 1 |
import libxslt, urllib2, libxml2, sys, SocketServer, re |
|---|
| 2 |
from os import makedirs |
|---|
| 3 |
|
|---|
| 4 |
base_url = "http://trac.server.com/" |
|---|
| 5 |
project_wiki_url = "myproject/wiki/" |
|---|
| 6 |
project_attachments_url = "myproject/attachment/wiki/" |
|---|
| 7 |
contents_url = "manuals/usermanual" # 'table of contents' wiki page. contains the links for each chapters |
|---|
| 8 |
|
|---|
| 9 |
docbook_url_suffix = "?format=docbook" |
|---|
| 10 |
raw_url_suffix = "?format=raw" |
|---|
| 11 |
images_base_path = "figure/" |
|---|
| 12 |
chapters_base_path = "chapter/" |
|---|
| 13 |
useSVGsIfAvailable = False |
|---|
| 14 |
|
|---|
| 15 |
contentsDocbook_stream = urllib2.urlopen(base_url + project_wiki_url + contents_url + docbook_url_suffix).read() |
|---|
| 16 |
contentsDocbook_doc = libxml2.parseDoc(contentsDocbook_stream) #parseDoc always receives utf8, i think |
|---|
| 17 |
|
|---|
| 18 |
contentsDocbook_xpc = contentsDocbook_doc.xpathNewContext() |
|---|
| 19 |
nodes = contentsDocbook_xpc.xpathEval("//ulink/@url") |
|---|
| 20 |
|
|---|
| 21 |
if len(nodes) == 0: |
|---|
| 22 |
print "no results" |
|---|
| 23 |
sys.exit(1) |
|---|
| 24 |
else: |
|---|
| 25 |
|
|---|
| 26 |
getOriginalUrl_re = re.compile("""<imagedata fileref="/(.*?)".*/>""") |
|---|
| 27 |
|
|---|
| 28 |
for result in nodes: |
|---|
| 29 |
lastSlashIdx = str(result).rfind("/") |
|---|
| 30 |
chapter_url = base_url + str(result)[7:-1] |
|---|
| 31 |
chapter_slug = str(result)[lastSlashIdx+1:-1] |
|---|
| 32 |
try: |
|---|
| 33 |
chapterDocbook_stream = urllib2.urlopen(chapter_url + docbook_url_suffix).read() |
|---|
| 34 |
except urllib2.HTTPError: |
|---|
| 35 |
continue |
|---|
| 36 |
|
|---|
| 37 |
#find image urls, change them, and determine the new paths |
|---|
| 38 |
images_original_urls = getOriginalUrl_re.findall(chapterDocbook_stream) #myproject/attachment/wiki/manuals/usermanual/section1/untitled.png?format=raw |
|---|
| 39 |
images_modified_urls = [url.replace(".png", ".svg") for url in images_original_urls] #myproject/attachment/wiki/manuals/usermanual/section1/untitled.svg?format=raw |
|---|
| 40 |
images_newpath_pngfilenames = [images_base_path + url[len(project_attachments_url):-len(raw_url_suffix)] for url in images_original_urls] #figures/manuals/usermanual/untitled.png |
|---|
| 41 |
images_newpath_svgfilenames = [images_base_path + url[len(project_attachments_url):-len(raw_url_suffix)] for url in images_modified_urls] #figures/manuals/usermanual/untitled.svg |
|---|
| 42 |
images_newpath_filenames = [] |
|---|
| 43 |
|
|---|
| 44 |
#save images |
|---|
| 45 |
for i in range(len(images_modified_urls)): |
|---|
| 46 |
image_stream = None |
|---|
| 47 |
if useSVGsIfAvailable: |
|---|
| 48 |
try: |
|---|
| 49 |
image_stream = urllib2.urlopen(base_url + images_modified_urls[i]).read() |
|---|
| 50 |
images_newpath_filenames.append(images_newpath_svgfilenames[i]) |
|---|
| 51 |
except urllib2.HTTPError: |
|---|
| 52 |
print "Could not retrieve image resource: " + images_modified_urls[i] |
|---|
| 53 |
|
|---|
| 54 |
if image_stream==None: |
|---|
| 55 |
try: |
|---|
| 56 |
image_stream = urllib2.urlopen(base_url + images_original_urls[i]).read() |
|---|
| 57 |
images_newpath_filenames.append(images_newpath_pngfilenames[i]) |
|---|
| 58 |
except urllib2.HTTPError: |
|---|
| 59 |
print "Could not retrieve image resource: " + images_original_urls[i] |
|---|
| 60 |
sys.exit(1) |
|---|
| 61 |
|
|---|
| 62 |
dirEndIdx = images_newpath_filenames[i].rfind("/")+1 |
|---|
| 63 |
try: |
|---|
| 64 |
makedirs(images_newpath_filenames[i][0:dirEndIdx]) |
|---|
| 65 |
except OSError, x: |
|---|
| 66 |
#[Errno 17] File exists: |
|---|
| 67 |
pass |
|---|
| 68 |
image_file = file(images_newpath_filenames[i], "wb") |
|---|
| 69 |
image_file.write(image_stream) |
|---|
| 70 |
image_file.close() |
|---|
| 71 |
|
|---|
| 72 |
#save docbook files, changing them to have the right image paths, and unique ids |
|---|
| 73 |
for i in range(len(images_original_urls)): |
|---|
| 74 |
chapterDocbook_stream = chapterDocbook_stream.replace("/" + images_original_urls[i], images_newpath_filenames[i]) |
|---|
| 75 |
chapterDocbook_stream = chapterDocbook_stream.replace("<section id=\"", "<section id=\"" + chapter_slug + "_") |
|---|
| 76 |
dbfile = file(chapters_base_path + "chapter_" + chapter_slug + ".xml", "w") |
|---|
| 77 |
dbfile.write(chapterDocbook_stream) |
|---|
| 78 |
dbfile.close() |
|---|
| 79 |
|
|---|
| 80 |
contentsDocbook_doc.freeDoc() |
|---|
| 81 |
contentsDocbook_xpc.xpathFreeContext() |
|---|