Projet

Général

Profil

0001-workflows-add-pdf-filing-in-export-to-model-action-2.patch

Benjamin Dauvergne, 07 juin 2018 18:09

Télécharger (16,8 ko)

Voir les différences:

Subject: [PATCH] workflows: add pdf filing in export-to-model action (#24364)

Using pdftk and vendoring small interface lib pypdftk.
 debian/control               |   3 +-
 wcs/qommon/vendor/pypdftk.py | 261 +++++++++++++++++++++++++++++++++++
 wcs/wf/export_to_model.py    |  93 ++++++++++++-
 3 files changed, 354 insertions(+), 3 deletions(-)
 create mode 100644 wcs/qommon/vendor/pypdftk.py
debian/control
24 24
    python-xlwt,
25 25
    python-qrcode,
26 26
    libjs-leaflet,
27
    python-magic
27
    python-magic,
28
    pdftk
28 29
Suggests: python-libxml2,
29 30
    python-lasso,
30 31
    python-psycopg2
wcs/qommon/vendor/pypdftk.py
1
# -*- encoding: UTF-8 -*-
2

  
3
''' pypdftk
4

  
5
Python module to drive the awesome pdftk binary.
6
See http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/
7

  
8
'''
9

  
10
import logging
11
import os
12
import shutil
13
import subprocess
14
import tempfile
15
import itertools
16

  
17
log = logging.getLogger(__name__)
18

  
19
if os.getenv('PDFTK_PATH'):
20
    PDFTK_PATH = os.getenv('PDFTK_PATH')
21
else:
22
    PDFTK_PATH = '/usr/bin/pdftk'
23
    if not os.path.isfile(PDFTK_PATH):
24
        PDFTK_PATH = 'pdftk'
25

  
26

  
27
def check_output(*popenargs, **kwargs):
28
    if 'stdout' in kwargs:
29
        raise ValueError('stdout argument not allowed, it will be overridden.')
30
    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
31
    output, unused_err = process.communicate()
32
    retcode = process.poll()
33
    if retcode:
34
        cmd = kwargs.get("args")
35
        if cmd is None:
36
            cmd = popenargs[0]
37
        raise subprocess.CalledProcessError(retcode, cmd, output=output)
38
    return output
39

  
40

  
41
def run_command(command, shell=False):
42
    ''' run a system command and yield output '''
43
    p = check_output(command, shell=shell)
44
    return p.split('\n')
45

  
46
def check_pdftk():
47
    try:
48
        run_command([PDFTK_PATH])
49
        return True
50
    except OSError:
51
        logging.warning('pdftk test call failed (PDFTK_PATH=%r).', PDFTK_PATH)
52
    return False
53

  
54

  
55
def get_num_pages(pdf_path):
56
    ''' return number of pages in a given PDF file '''
57
    for line in run_command([PDFTK_PATH, pdf_path, 'dump_data']):
58
        if line.lower().startswith('numberofpages'):
59
            return int(line.split(':')[1])
60
    return 0
61

  
62

  
63
def fill_form(pdf_path, datas={}, out_file=None, flatten=True):
64
    '''
65
        Fills a PDF form with given dict input data.
66
        Return temp file if no out_file provided.
67
    '''
68
    cleanOnFail = False
69
    tmp_fdf = gen_xfdf(datas)
70
    handle = None
71
    if not out_file:
72
        cleanOnFail = True
73
        handle, out_file = tempfile.mkstemp()
74

  
75
    cmd = "%s %s fill_form %s output %s" % (PDFTK_PATH, pdf_path, tmp_fdf, out_file)
76
    if flatten:
77
        cmd += ' flatten'
78
    try:
79
        run_command(cmd, True)
80
    except:
81
        if cleanOnFail:
82
            os.remove(tmp_fdf)
83
        raise
84
    finally:
85
        if handle:
86
            os.close(handle)
87
    return out_file
88

  
89
def dump_data_fields(pdf_path):
90
    '''
91
        Return list of dicts of all fields in a PDF.
92
    '''
93
    cmd = "%s %s dump_data_fields" % (PDFTK_PATH, pdf_path)
94
    field_data = map(lambda x: x.split(': ', 1), run_command(cmd, True))
95

  
96
    fields = [list(group) for k, group in itertools.groupby(field_data, lambda x: len(x) == 1) if not k]
97

  
98
    return map(dict, fields)
99

  
100
def concat(files, out_file=None):
101
    '''
102
        Merge multiples PDF files.
103
        Return temp file if no out_file provided.
104
    '''
105
    cleanOnFail = False
106
    if not out_file:
107
        cleanOnFail = True
108
        handle, out_file = tempfile.mkstemp()
109
    if len(files) == 1:
110
        shutil.copyfile(files[0], out_file)
111
    args = [PDFTK_PATH]
112
    args += files
113
    args += ['cat', 'output', out_file]
114
    try:
115
        run_command(args)
116
    except:
117
        if cleanOnFail:
118
            os.remove(out_file)
119
        raise
120
    return out_file
121

  
122

  
123
def split(pdf_path, out_dir=None):
124
    '''
125
        Split a single PDF file into pages.
126
        Use a temp directory if no out_dir provided.
127
    '''
128
    cleanOnFail = False
129
    if not out_dir:
130
        cleanOnFail = True
131
        out_dir = tempfile.mkdtemp()
132
    out_pattern = '%s/page_%%06d.pdf' % out_dir
133
    try:
134
        run_command((PDFTK_PATH, pdf_path, 'burst', 'output', out_pattern))
135
    except:
136
        if cleanOnFail:
137
            shutil.rmtree(out_dir)
138
        raise
139
    out_files = os.listdir(out_dir)
140
    out_files.sort()
141
    return [os.path.join(out_dir, filename) for filename in out_files]
142

  
143

  
144
def gen_xfdf(datas={}):
145
    ''' Generates a temp XFDF file suited for fill_form function, based on dict input data '''
146
    fields = []
147
    for key, value in datas.items():
148
        fields.append(u"""        <field name="%s"><value>%s</value></field>""" % (key, value))
149
    tpl = u"""<?xml version="1.0" encoding="UTF-8"?>
150
<xfdf xmlns="http://ns.adobe.com/xfdf/" xml:space="preserve">
151
    <fields>
152
%s
153
    </fields>
154
</xfdf>""" % "\n".join(fields)
155
    handle, out_file = tempfile.mkstemp()
156
    f = open(out_file, 'w')
157
    f.write(tpl.encode('UTF-8'))
158
    f.close()
159
    return out_file
160

  
161
def replace_page(pdf_path, page_number, pdf_to_insert_path):
162
    '''
163
    Replace a page in a PDF (pdf_path) by the PDF pointed by pdf_to_insert_path.
164
    page_number is the number of the page in pdf_path to be replaced. It is 1-based.
165
    '''
166
    A = 'A=' + pdf_path
167
    B = 'B=' + pdf_to_insert_path
168
    output_temp = tempfile.mktemp(suffix='.pdf')
169

  
170
    if page_number == 1:  # At begin
171
        upper_bound = 'A' + str(page_number + 1) + '-end'
172
        args = (
173
            PDFTK_PATH, A, B, 'cat', 'B', upper_bound, 'output', output_temp)
174
    elif page_number == get_num_pages(pdf_path):  # At end
175
        lower_bound = 'A1-' + str(page_number - 1)
176
        args = (PDFTK_PATH, A, B, 'cat', lower_bound, 'B', 'output', output_temp)
177
    else:  # At middle
178
        lower_bound = 'A1-' + str(page_number - 1)
179
        upper_bound = 'A' + str(page_number + 1) + '-end'
180
        args = (
181
            PDFTK_PATH, A, B, 'cat', lower_bound, 'B', upper_bound, 'output',
182
            output_temp)
183

  
184
    run_command(args)
185
    shutil.copy(output_temp, pdf_path)
186
    os.remove(output_temp)
187

  
188
def stamp(pdf_path, stamp_pdf_path, output_pdf_path=None):
189
    '''
190
    Applies a stamp (from stamp_pdf_path) to the PDF file in pdf_path. Useful for watermark purposes.
191
    If not output_pdf_path is provided, it returns a temporary file with the result PDF.
192
    '''
193
    output = output_pdf_path or tempfile.mktemp(suffix='.pdf')
194
    args = [PDFTK_PATH, pdf_path, 'multistamp', stamp_pdf_path, 'output', output]
195
    run_command(args)
196
    return output
197

  
198
def pdftk_cmd_util(pdf_path, action="compress",out_file=None, flatten=True):
199
    '''
200
    :type action: should valid action, in string format. Eg: "uncompress"
201
    :param pdf_path: input PDF file
202
    :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
203
    :param flatten: (default=True) : flatten the final PDF
204
    :return: name of the output file.
205
    '''
206
    actions = ["compress", "uncompress"]
207
    assert action in actions, "Unknown action. Failed to perform given action '%s'." % action
208

  
209
    handle = None
210
    cleanOnFail = False
211
    if not out_file:
212
        cleanOnFail = True
213
        handle, out_file = tempfile.mkstemp()
214

  
215
    cmd = "%s %s output %s %s" % (PDFTK_PATH, pdf_path, out_file, action)
216

  
217
    if flatten:
218
        cmd += ' flatten'
219
    try:
220
        run_command(cmd, True)
221
    except:
222
        if cleanOnFail:
223
            os.remove(out_file)
224
        raise
225
    finally:
226
        if handle:
227
            os.close(handle)
228
    return out_file
229

  
230

  
231

  
232
def compress(pdf_path, out_file=None, flatten=True):
233
    '''
234
    These are only useful when you want to edit PDF code in a text
235
    editor like vim or emacs.  Remove PDF page stream compression by
236
    applying the uncompress filter. Use the compress filter to
237
    restore compression.
238

  
239
    :param pdf_path: input PDF file
240
    :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
241
    :param flatten: (default=True) : flatten the final PDF
242
    :return: name of the output file.
243
    '''
244

  
245
    return pdftk_cmd_util(pdf_path, "compress", out_file, flatten)
246

  
247

  
248
def uncompress(pdf_path, out_file=None, flatten=True):
249
    '''
250
    These are only useful when you want to edit PDF code in a text
251
    editor like vim or emacs.  Remove PDF page stream compression by
252
    applying the uncompress filter. Use the compress filter to
253
    restore compression.
254

  
255
    :param pdf_path: input PDF file
256
    :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
257
    :param flatten: (default=True) : flatten the final PDF
258
    :return: name of the output file.
259
    '''
260

  
261
    return pdftk_cmd_util(pdf_path, "uncompress", out_file, flatten)
wcs/wf/export_to_model.py
14 14
# You should have received a copy of the GNU General Public License
15 15
# along with this program; if not, see <http://www.gnu.org/licenses/>.
16 16

  
17
import os
17 18
import base64
18 19
import collections
19 20
from StringIO import StringIO
......
33 34
from qommon.form import (SingleSelectWidget, WidgetList, CheckboxWidget,
34 35
                         StringWidget, UploadWidget, WysiwygTextWidget, Upload,
35 36
                         UploadedFile, UploadValidationError, VarnameWidget,
36
                         RadiobuttonsWidget, PicklableUpload, ComputedExpressionWidget)
37
                         RadiobuttonsWidget, PicklableUpload, ComputedExpressionWidget, HtmlWidget)
37 38
from qommon.errors import PublishError
38 39
from qommon.template import TemplateError
39 40
import qommon
41
from qommon.vendor import pypdftk
42

  
40 43

  
41 44
from wcs.fields import SubtitleField, TitleField, CommentField, PageField
42 45
from wcs.workflows import (WorkflowStatusItem, AttachmentEvolutionPart,
......
44 47
                           get_formdata_template_context, template_on_context)
45 48
from wcs.portfolio import has_portfolio, push_document
46 49

  
50
has_pdftk = pypdftk.check_pdftk()
51

  
47 52
OO_TEXT_NS = 'urn:oasis:names:tc:opendocument:xmlns:text:1.0'
48 53
OO_OFFICE_NS = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'
49 54
OO_DRAW_NS = 'urn:oasis:names:tc:opendocument:xmlns:drawing:1.0'
......
193 198
    return str2rtf(unicode(str(value), get_publisher().site_charset))
194 199

  
195 200

  
201
class PDFFormDirectory(Directory):
202
    _q_exports = ['']
203

  
204
    def __init__(self, item):
205
        self.item = item
206

  
207
    def _q_index(self):
208
        response = get_response()
209
        response.content_type = 'application/pdf'
210
        response.set_header('location', '..')
211
        data = {field: field for field in self.item.get_pdf_fields()}
212
        return self.item.fill_pdf_form(data).read()
213

  
214

  
196 215
class ExportToModel(WorkflowStatusItem):
197 216
    description = N_('Document Creation')
198 217
    key = 'export_to_model'
......
249 268
            base_url = formdata.get_url(backoffice=in_backoffice)
250 269
            return base_url + self.get_directory_name()
251 270

  
252
    def model_file_validation(self, upload):
271
    def model_file_validation(self, upload, ignore_error=False):
253 272
        if hasattr(upload, 'fp'):
254 273
            fp = upload.fp
255 274
        elif hasattr(upload, 'get_file'):
256 275
            fp = upload.get_file()
257 276
        else:
277
            if ignore_error:
278
                return None
258 279
            raise UploadValidationError('unknown upload object %r' % upload)
280
        if upload.content_type and upload.content_type == 'application/pdf' and has_pdftk:
281
            return 'pdf'
259 282
        if upload.content_type and upload.content_type == 'application/rtf':
260 283
            return 'rtf'
261 284
        if (upload.content_type and upload.content_type == 'application/octet-stream') or \
......
274 297
                return 'opendocument'
275 298
        if is_opendocument(fp):
276 299
            return 'opendocument'
300
        if ignore_error:
301
            return None
277 302
        raise UploadValidationError(_('Only RTF and OpenDocument files can be used'))
278 303

  
279 304
    def get_parameters(self):
......
284 309
        if has_portfolio():
285 310
            parameters += ('push_to_portfolio',)
286 311
        parameters += ('method', 'by', 'label', 'backoffice_info_text', 'filename', 'condition')
312
        if self.has_pdf_model():
313
            parameters += ('pdffield',)
314
            for field in self.get_pdf_fields():
315
                parameters += ('pdffield_%s' % field,)
287 316
        return parameters
288 317

  
289 318
    def add_parameters_widgets(self, form, parameters, prefix='',
......
388 417
        if 'filename' in parameters:
389 418
            form.add(ComputedExpressionWidget, name='%sfilename' % prefix, title=_('File name'),
390 419
                     value=self.filename)
420
        if 'pdffield' in parameters:
421
            form.add(HtmlWidget, htmltext('<label><a href="pdf-form/">PDF form example</a></label>'))
422
        pdf_fields_expressions = getattr(self, 'pdf_fields', {})
423
        pdf_fields = self.get_pdf_fields()
424
        for parameter in parameters:
425
            if parameter.startswith('pdffield_'):
426
                name = parameter[9:]
427
                form.add(ComputedExpressionWidget,
428
                         name=parameter,
429
                         title=_('PDF Field %s (%s)') % (name, pdf_fields.get(name, '')),
430
                         value=pdf_fields_expressions.get(name, ''))
431

  
432
    def has_pdf_model(self):
433
        return self.model_file and self.model_file_validation(self.model_file, ignore_error=True) == 'pdf'
434

  
435
    def submit_admin_form(self, form):
436
        super(ExportToModel, self).submit_admin_form(form)
437
        if self.has_pdf_model():
438
            pdf_fields = {}
439
            for field in self.get_pdf_fields():
440
                widget = form.get_widget('pdffield_%s' % field)
441
                if widget:
442
                    pdf_fields[field] = widget.parse()
443
            self.pdf_fields = pdf_fields
444

  
445
    def get_pdf_fields(self):
446
        pdf_fields = {}
447
        if self.model_file_validation(self.model_file, ignore_error=True) == 'pdf':
448
            if hasattr(self.model_file, '_pdf_fields'):
449
                return self.model_file._pdf_fields
450
            for field_def in pypdftk.dump_data_fields(self.model_file.build_file_path()):
451
                pdf_fields[field_def.get('FieldName')] = field_def.get('FieldType', 'Text')
452
            self.model_file._pdf_fields = pdf_fields
453
        return pdf_fields
391 454

  
392 455
    def get_filename(self):
393 456
        filename = None
......
408 471
            outstream = self.apply_rtf_template_to_formdata(formdata)
409 472
        elif kind == 'opendocument':
410 473
            outstream = self.apply_od_template_to_formdata(formdata)
474
        elif kind == 'pdf':
475
            outstream = self.apply_pdf_template_to_formdata(formdata)
411 476
        else:
412 477
            raise Exception('unsupported model kind %r' % kind)
413 478
        if self.convert_to_pdf:
......
416 481
            return transform_to_pdf(outstream)
417 482
        return outstream
418 483

  
484
    def fill_pdf_form(self, data):
485
        outfile = pypdftk.fill_form(self.model_file.build_file_path(), data)
486
        try:
487
            with open(outfile) as f:
488
                return StringIO(f.read())
489
        finally:
490
            os.unlink(outfile)
491

  
492
    def apply_pdf_template_to_formdata(self, formdata):
493
        pdf_fields = self.get_pdf_fields()
494
        data = {}
495
        for field in pdf_fields:
496
            expression = getattr(self, 'pdf_fields', {}).get(field)
497
            if not expression:
498
                continue
499
            data[field] = self.compute(data)
500
        return self.fill_pdf_form(data)
501

  
419 502
    def apply_rtf_template_to_formdata(self, formdata):
420 503
        try:
421 504
            # force ezt_only=True because an RTF file may contain {{ characters
......
554 637
                content_type,
555 638
                outstream.read())
556 639

  
640
    def q_admin_lookup(self, workflow, status, component, html_top):
641
        if component == 'pdf-form' and self.has_pdf_model():
642
            directory = PDFFormDirectory(self)
643
            directory.html_top = html_top
644
            return directory
645

  
557 646
register_item_class(ExportToModel)
558
-