Projet

Général

Profil

Autre #851 » update_doc.py

Frédéric Péters, 12 novembre 2011 00:34

 
1
#! /usr/bin/env python
2

    
3
import sys
4
import urllib2
5
from optparse import OptionParser
6
import re
7
import random
8
import os
9

    
10
docs = {
11
    'wcs-fr': ('fr',
12
        'https://wiki.entrouvert.org/wcs/Guide_de_l%27utilisateur',
13
        'user-guide.html',
14
        'labs.libre-entreprise.org:/home/groups/wcs/htdocs/doc/fr/'),
15
    'wcs-admin-fr': ('fr',
16
        'https://wiki.entrouvert.org/wcs/Guide_de_l%27administrateur',
17
        'admin-guide.html',
18
        'labs.libre-entreprise.org:/home/groups/wcs/htdocs/doc/fr/'),
19
    'authentic-fr': ('fr',
20
        'https://wiki.entrouvert.org/Authentic/Guide_de_l%27administrateur',
21
        'authentic-admin.html',
22
        'labs.libre-entreprise.org:/home/groups/authentic/htdocs/doc/fr/'),
23
    'authentic-en': ('en',
24
        'https://wiki.entrouvert.org/Authentic/Administrator_Guide',
25
        'authentic-admin.html',
26
        'labs.libre-entreprise.org:/home/groups/authentic/htdocs/doc/en/'),
27
}
28

    
29

    
30
usage = 'usage: %prog [options] doc-id'
31
parser = OptionParser(usage)
32
parser.add_option('-u', '--user', dest = 'scpuser',
33
        help = 'user name on labs, for file upload via scp')
34
parser.add_option('--css', dest = 'css', default = 'default.css',
35
        help = 'css file to use')
36

    
37
(options, args) = parser.parse_args()
38
if len(args) != 1:
39
    parser.error('incorrect number of arguments')
40

    
41
doc_id = args[0]
42
css = options.css
43

    
44
if not docs.has_key(doc_id):
45
    parser.error('invalid doc id')
46

    
47
tmpdir = '/tmp/.ud.%s' % random.random()
48
os.mkdir(tmpdir)
49

    
50
lang, url, filename, dest = docs[doc_id]
51

    
52
raw = urllib2.urlopen(url + '?action=raw').read()
53
t = re.findall('#pragma title (.*)', raw)
54
if t:
55
    title = t[0]
56
    title = title.replace('---', '—')
57
else:
58
    title = url.rsplit('/')[-1].replace('_', ' ')
59

    
60
body = urllib2.urlopen(url + '?action=format&mimetype=text/html').read()
61

    
62
figures = []
63

    
64
def sub_img(matchobj):
65
    img = matchobj.group(0)
66
    src = re.findall('src="(.*?)"', img)[0]
67
    filename = src.split('=')[-1]
68
    figures.append((src, filename))
69
    return '<img src="figures/%s" />' % filename
70

    
71
attachments = []
72
def sub_attach(matchobj):
73
    target = matchobj.group(2)
74
    src = (matchobj.group(1) + matchobj.group(2)).replace('&amp;', '&')
75
    attachments.append((src, target))
76
    return 'href="attachments/%s"' % target
77

    
78
file('/tmp/aze.html', 'w').write(body)
79
if doc_id == 'wcs-fr':
80
    body = body.replace('/wcs/Guide_de_l%27administrateur', 'admin-guide.html')
81
    body = body.replace('../Guide', 'Guide')
82
if doc_id == 'wcs-admin-fr':
83
    body = body.replace("http://wiki.entrouvert.org/wcs/Guide_de_l'utilisateur", 
84
            'user-guide.html')
85
body = re.sub('<img.*?>', sub_img, body)
86
body = re.sub(r'class="attachment" href="(.*?AttachFile.*?target=)(.*?\.wcs)"', sub_attach, body)
87
body = re.sub('<hr />.*Cat.*</a>', '', body)
88

    
89
out = file(os.path.join(tmpdir, filename), 'w')
90
print >> out, '''<?xml version="1.0" encoding="utf-8" ?>
91
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
92
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="%(lang)s" lang="%(lang)s">
93
<head>
94
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
95
<link rel="stylesheet" type="text/css" href="%(css)s" />
96
<title>%(title)s</title>
97
</head>
98
<body>
99
<div id="title">
100
<h1>%(title)s</h1>
101
</div>
102
%(body)s
103
</body>
104
</html>
105
''' % locals()
106

    
107
out.close()
108

    
109
if figures:
110
    figdir = os.path.join(tmpdir, 'figures')
111
    os.mkdir(figdir)
112
    for (src, idest) in figures:
113
        url = 'http://wiki.entrouvert.org' + src
114
        try:
115
            open(os.path.join(figdir, idest), 'w').write(urllib2.urlopen(url).read())
116
        except urllib2.HTTPError:
117
            os.unlink(os.path.join(figdir, idest))
118
            print >> sys.stderr, 'failed to download', idest
119

    
120
if attachments:
121
    attdir = os.path.join(tmpdir, 'attachments')
122
    os.mkdir(attdir)
123
    for (src, idest) in attachments:
124
        if src.startswith('http://wiki.entrouvert.org'):
125
            url = url
126
        elif src.startswith('http://'):
127
            continue
128
        else:
129
            url = 'http://wiki.entrouvert.org' + src
130
        print 'url:', url
131
        try:
132
            open(os.path.join(attdir, idest), 'w').write(urllib2.urlopen(url).read())
133
        except urllib2.HTTPError:
134
            raise
135
            os.unlink(os.path.join(attdir, idest))
136
            print >> sys.stderr, 'failed to download', idest
137

    
138
if options.scpuser:
139
    user = '%s@' % options.scpuser
140
else:
141
    user = ''
142

    
143
scp_command = 'rsync -ravz %s/* %s%s' % (tmpdir, user, dest)
144

    
145
os.system(scp_command)
146