1
|
#! /usr/bin/env python
|
2
|
|
3
|
import sys
|
4
|
import urllib2
|
5
|
from optparse import OptionParser
|
6
|
import re
|
7
|
import random
|
8
|
import os
|
9
|
|
10
|
docs = {
|
11
|
'wcs-fr': ('fr',
|
12
|
'https://wiki.entrouvert.org/wcs/Guide_de_l%27utilisateur',
|
13
|
'user-guide.html',
|
14
|
'labs.libre-entreprise.org:/home/groups/wcs/htdocs/doc/fr/'),
|
15
|
'wcs-admin-fr': ('fr',
|
16
|
'https://wiki.entrouvert.org/wcs/Guide_de_l%27administrateur',
|
17
|
'admin-guide.html',
|
18
|
'labs.libre-entreprise.org:/home/groups/wcs/htdocs/doc/fr/'),
|
19
|
'authentic-fr': ('fr',
|
20
|
'https://wiki.entrouvert.org/Authentic/Guide_de_l%27administrateur',
|
21
|
'authentic-admin.html',
|
22
|
'labs.libre-entreprise.org:/home/groups/authentic/htdocs/doc/fr/'),
|
23
|
'authentic-en': ('en',
|
24
|
'https://wiki.entrouvert.org/Authentic/Administrator_Guide',
|
25
|
'authentic-admin.html',
|
26
|
'labs.libre-entreprise.org:/home/groups/authentic/htdocs/doc/en/'),
|
27
|
}
|
28
|
|
29
|
|
30
|
usage = 'usage: %prog [options] doc-id'
|
31
|
parser = OptionParser(usage)
|
32
|
parser.add_option('-u', '--user', dest = 'scpuser',
|
33
|
help = 'user name on labs, for file upload via scp')
|
34
|
parser.add_option('--css', dest = 'css', default = 'default.css',
|
35
|
help = 'css file to use')
|
36
|
|
37
|
(options, args) = parser.parse_args()
|
38
|
if len(args) != 1:
|
39
|
parser.error('incorrect number of arguments')
|
40
|
|
41
|
doc_id = args[0]
|
42
|
css = options.css
|
43
|
|
44
|
if not docs.has_key(doc_id):
|
45
|
parser.error('invalid doc id')
|
46
|
|
47
|
tmpdir = '/tmp/.ud.%s' % random.random()
|
48
|
os.mkdir(tmpdir)
|
49
|
|
50
|
lang, url, filename, dest = docs[doc_id]
|
51
|
|
52
|
raw = urllib2.urlopen(url + '?action=raw').read()
|
53
|
t = re.findall('#pragma title (.*)', raw)
|
54
|
if t:
|
55
|
title = t[0]
|
56
|
title = title.replace('---', '—')
|
57
|
else:
|
58
|
title = url.rsplit('/')[-1].replace('_', ' ')
|
59
|
|
60
|
body = urllib2.urlopen(url + '?action=format&mimetype=text/html').read()
|
61
|
|
62
|
figures = []
|
63
|
|
64
|
def sub_img(matchobj):
|
65
|
img = matchobj.group(0)
|
66
|
src = re.findall('src="(.*?)"', img)[0]
|
67
|
filename = src.split('=')[-1]
|
68
|
figures.append((src, filename))
|
69
|
return '<img src="figures/%s" />' % filename
|
70
|
|
71
|
attachments = []
|
72
|
def sub_attach(matchobj):
|
73
|
target = matchobj.group(2)
|
74
|
src = (matchobj.group(1) + matchobj.group(2)).replace('&', '&')
|
75
|
attachments.append((src, target))
|
76
|
return 'href="attachments/%s"' % target
|
77
|
|
78
|
file('/tmp/aze.html', 'w').write(body)
|
79
|
if doc_id == 'wcs-fr':
|
80
|
body = body.replace('/wcs/Guide_de_l%27administrateur', 'admin-guide.html')
|
81
|
body = body.replace('../Guide', 'Guide')
|
82
|
if doc_id == 'wcs-admin-fr':
|
83
|
body = body.replace("http://wiki.entrouvert.org/wcs/Guide_de_l'utilisateur",
|
84
|
'user-guide.html')
|
85
|
body = re.sub('<img.*?>', sub_img, body)
|
86
|
body = re.sub(r'class="attachment" href="(.*?AttachFile.*?target=)(.*?\.wcs)"', sub_attach, body)
|
87
|
body = re.sub('<hr />.*Cat.*</a>', '', body)
|
88
|
|
89
|
out = file(os.path.join(tmpdir, filename), 'w')
|
90
|
print >> out, '''<?xml version="1.0" encoding="utf-8" ?>
|
91
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
92
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="%(lang)s" lang="%(lang)s">
|
93
|
<head>
|
94
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
95
|
<link rel="stylesheet" type="text/css" href="%(css)s" />
|
96
|
<title>%(title)s</title>
|
97
|
</head>
|
98
|
<body>
|
99
|
<div id="title">
|
100
|
<h1>%(title)s</h1>
|
101
|
</div>
|
102
|
%(body)s
|
103
|
</body>
|
104
|
</html>
|
105
|
''' % locals()
|
106
|
|
107
|
out.close()
|
108
|
|
109
|
if figures:
|
110
|
figdir = os.path.join(tmpdir, 'figures')
|
111
|
os.mkdir(figdir)
|
112
|
for (src, idest) in figures:
|
113
|
url = 'http://wiki.entrouvert.org' + src
|
114
|
try:
|
115
|
open(os.path.join(figdir, idest), 'w').write(urllib2.urlopen(url).read())
|
116
|
except urllib2.HTTPError:
|
117
|
os.unlink(os.path.join(figdir, idest))
|
118
|
print >> sys.stderr, 'failed to download', idest
|
119
|
|
120
|
if attachments:
|
121
|
attdir = os.path.join(tmpdir, 'attachments')
|
122
|
os.mkdir(attdir)
|
123
|
for (src, idest) in attachments:
|
124
|
if src.startswith('http://wiki.entrouvert.org'):
|
125
|
url = url
|
126
|
elif src.startswith('http://'):
|
127
|
continue
|
128
|
else:
|
129
|
url = 'http://wiki.entrouvert.org' + src
|
130
|
print 'url:', url
|
131
|
try:
|
132
|
open(os.path.join(attdir, idest), 'w').write(urllib2.urlopen(url).read())
|
133
|
except urllib2.HTTPError:
|
134
|
raise
|
135
|
os.unlink(os.path.join(attdir, idest))
|
136
|
print >> sys.stderr, 'failed to download', idest
|
137
|
|
138
|
if options.scpuser:
|
139
|
user = '%s@' % options.scpuser
|
140
|
else:
|
141
|
user = ''
|
142
|
|
143
|
scp_command = 'rsync -ravz %s/* %s%s' % (tmpdir, user, dest)
|
144
|
|
145
|
os.system(scp_command)
|
146
|
|