0001-workflows-add-pdf-filing-in-export-to-model-action-2.patch

Benjamin Dauvergne, 07 juin 2018 18:09

Voir les différences: en ligne côte à côte

Subject: [PATCH] workflows: add pdf filing in export-to-model action (#24364)

Using pdftk and vendoring small interface lib pypdftk.

 debian/control               |   3 +-
 wcs/qommon/vendor/pypdftk.py | 261 +++++++++++++++++++++++++++++++++++
 wcs/wf/export_to_model.py    |  93 ++++++++++++-
 3 files changed, 354 insertions(+), 3 deletions(-)
 create mode 100644 wcs/qommon/vendor/pypdftk.py

         python-xlwt,
         python-qrcode,
         libjs-leaflet,
         python-magic
         python-magic,
         pdftk
     Suggests: python-libxml2,
         python-lasso,
         python-psycopg2

     # -*- encoding: UTF-8 -*-
     ''' pypdftk
     Python module to drive the awesome pdftk binary.
     See http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/
     '''
     import logging
     import os
     import shutil
     import subprocess
     import tempfile
     import itertools
     log = logging.getLogger(__name__)
     if os.getenv('PDFTK_PATH'):
         PDFTK_PATH = os.getenv('PDFTK_PATH')
     else:
         PDFTK_PATH = '/usr/bin/pdftk'
         if not os.path.isfile(PDFTK_PATH):
             PDFTK_PATH = 'pdftk'
     def check_output(*popenargs, **kwargs):
         if 'stdout' in kwargs:
             raise ValueError('stdout argument not allowed, it will be overridden.')
         process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
         output, unused_err = process.communicate()
         retcode = process.poll()
         if retcode:
             cmd = kwargs.get("args")
             if cmd is None:
                 cmd = popenargs[0]
             raise subprocess.CalledProcessError(retcode, cmd, output=output)
         return output
     def run_command(command, shell=False):
         ''' run a system command and yield output '''
         p = check_output(command, shell=shell)
         return p.split('\n')
     def check_pdftk():
         try:
             run_command([PDFTK_PATH])
             return True
         except OSError:
             logging.warning('pdftk test call failed (PDFTK_PATH=%r).', PDFTK_PATH)
         return False
     def get_num_pages(pdf_path):
         ''' return number of pages in a given PDF file '''
         for line in run_command([PDFTK_PATH, pdf_path, 'dump_data']):
             if line.lower().startswith('numberofpages'):
                 return int(line.split(':')[1])
         return 0
     def fill_form(pdf_path, datas={}, out_file=None, flatten=True):
         '''
             Fills a PDF form with given dict input data.
             Return temp file if no out_file provided.
         '''
         cleanOnFail = False
         tmp_fdf = gen_xfdf(datas)
         handle = None
         if not out_file:
             cleanOnFail = True
             handle, out_file = tempfile.mkstemp()
         cmd = "%s %s fill_form %s output %s" % (PDFTK_PATH, pdf_path, tmp_fdf, out_file)
         if flatten:
             cmd += ' flatten'
         try:
             run_command(cmd, True)
         except:
             if cleanOnFail:
                 os.remove(tmp_fdf)
             raise
         finally:
             if handle:
                 os.close(handle)
         return out_file
     def dump_data_fields(pdf_path):
         '''
             Return list of dicts of all fields in a PDF.
         '''
         cmd = "%s %s dump_data_fields" % (PDFTK_PATH, pdf_path)
         field_data = map(lambda x: x.split(': ', 1), run_command(cmd, True))
         fields = [list(group) for k, group in itertools.groupby(field_data, lambda x: len(x) == 1) if not k]
         return map(dict, fields)
     def concat(files, out_file=None):
         '''
             Merge multiples PDF files.
             Return temp file if no out_file provided.
         '''
         cleanOnFail = False
         if not out_file:
             cleanOnFail = True
             handle, out_file = tempfile.mkstemp()
         if len(files) == 1:
             shutil.copyfile(files[0], out_file)
         args = [PDFTK_PATH]
         args += files
         args += ['cat', 'output', out_file]
         try:
             run_command(args)
         except:
             if cleanOnFail:
                 os.remove(out_file)
             raise
         return out_file
     def split(pdf_path, out_dir=None):
         '''
             Split a single PDF file into pages.
             Use a temp directory if no out_dir provided.
         '''
         cleanOnFail = False
         if not out_dir:
             cleanOnFail = True
             out_dir = tempfile.mkdtemp()
         out_pattern = '%s/page_%%06d.pdf' % out_dir
         try:
             run_command((PDFTK_PATH, pdf_path, 'burst', 'output', out_pattern))
         except:
             if cleanOnFail:
                 shutil.rmtree(out_dir)
             raise
         out_files = os.listdir(out_dir)
         out_files.sort()
         return [os.path.join(out_dir, filename) for filename in out_files]
     def gen_xfdf(datas={}):
         ''' Generates a temp XFDF file suited for fill_form function, based on dict input data '''
         fields = []
         for key, value in datas.items():
             fields.append(u"""        <field name="%s"><value>%s</value></field>""" % (key, value))
         tpl = u"""<?xml version="1.0" encoding="UTF-8"?>
     <xfdf xmlns="http://ns.adobe.com/xfdf/" xml:space="preserve">
         <fields>
     %s
         </fields>
     </xfdf>""" % "\n".join(fields)
         handle, out_file = tempfile.mkstemp()
         f = open(out_file, 'w')
         f.write(tpl.encode('UTF-8'))
         f.close()
         return out_file
     def replace_page(pdf_path, page_number, pdf_to_insert_path):
         '''
         Replace a page in a PDF (pdf_path) by the PDF pointed by pdf_to_insert_path.
         page_number is the number of the page in pdf_path to be replaced. It is 1-based.
         '''
         A = 'A=' + pdf_path
         B = 'B=' + pdf_to_insert_path
         output_temp = tempfile.mktemp(suffix='.pdf')
         if page_number == 1:  # At begin
             upper_bound = 'A' + str(page_number + 1) + '-end'
             args = (
                 PDFTK_PATH, A, B, 'cat', 'B', upper_bound, 'output', output_temp)
         elif page_number == get_num_pages(pdf_path):  # At end
             lower_bound = 'A1-' + str(page_number - 1)
             args = (PDFTK_PATH, A, B, 'cat', lower_bound, 'B', 'output', output_temp)
         else:  # At middle
             lower_bound = 'A1-' + str(page_number - 1)
             upper_bound = 'A' + str(page_number + 1) + '-end'
             args = (
                 PDFTK_PATH, A, B, 'cat', lower_bound, 'B', upper_bound, 'output',
                 output_temp)
         run_command(args)
         shutil.copy(output_temp, pdf_path)
         os.remove(output_temp)
     def stamp(pdf_path, stamp_pdf_path, output_pdf_path=None):
         '''
         Applies a stamp (from stamp_pdf_path) to the PDF file in pdf_path. Useful for watermark purposes.
         If not output_pdf_path is provided, it returns a temporary file with the result PDF.
         '''
         output = output_pdf_path or tempfile.mktemp(suffix='.pdf')
         args = [PDFTK_PATH, pdf_path, 'multistamp', stamp_pdf_path, 'output', output]
         run_command(args)
         return output
     def pdftk_cmd_util(pdf_path, action="compress",out_file=None, flatten=True):
         '''
         :type action: should valid action, in string format. Eg: "uncompress"
         :param pdf_path: input PDF file
         :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
         :param flatten: (default=True) : flatten the final PDF
         :return: name of the output file.
         '''
         actions = ["compress", "uncompress"]
         assert action in actions, "Unknown action. Failed to perform given action '%s'." % action
         handle = None
         cleanOnFail = False
         if not out_file:
             cleanOnFail = True
             handle, out_file = tempfile.mkstemp()
         cmd = "%s %s output %s %s" % (PDFTK_PATH, pdf_path, out_file, action)
         if flatten:
             cmd += ' flatten'
         try:
             run_command(cmd, True)
         except:
             if cleanOnFail:
                 os.remove(out_file)
             raise
         finally:
             if handle:
                 os.close(handle)
         return out_file
     def compress(pdf_path, out_file=None, flatten=True):
         '''
         These are only useful when you want to edit PDF code in a text
         editor like vim or emacs.  Remove PDF page stream compression by
         applying the uncompress filter. Use the compress filter to
         restore compression.
         :param pdf_path: input PDF file
         :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
         :param flatten: (default=True) : flatten the final PDF
         :return: name of the output file.
         '''
         return pdftk_cmd_util(pdf_path, "compress", out_file, flatten)
     def uncompress(pdf_path, out_file=None, flatten=True):
         '''
         These are only useful when you want to edit PDF code in a text
         editor like vim or emacs.  Remove PDF page stream compression by
         applying the uncompress filter. Use the compress filter to
         restore compression.
         :param pdf_path: input PDF file
         :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
         :param flatten: (default=True) : flatten the final PDF
         :return: name of the output file.
         '''
         return pdftk_cmd_util(pdf_path, "uncompress", out_file, flatten)

     # You should have received a copy of the GNU General Public License
     # along with this program; if not, see <http://www.gnu.org/licenses/>.
     import os
     import base64
     import collections
     from StringIO import StringIO
-...
     from qommon.form import (SingleSelectWidget, WidgetList, CheckboxWidget,
                              StringWidget, UploadWidget, WysiwygTextWidget, Upload,
                              UploadedFile, UploadValidationError, VarnameWidget,
                              RadiobuttonsWidget, PicklableUpload, ComputedExpressionWidget)
                              RadiobuttonsWidget, PicklableUpload, ComputedExpressionWidget, HtmlWidget)
     from qommon.errors import PublishError
     from qommon.template import TemplateError
     import qommon
     from qommon.vendor import pypdftk
     from wcs.fields import SubtitleField, TitleField, CommentField, PageField
     from wcs.workflows import (WorkflowStatusItem, AttachmentEvolutionPart,
-...
                                get_formdata_template_context, template_on_context)
     from wcs.portfolio import has_portfolio, push_document
     has_pdftk = pypdftk.check_pdftk()
     OO_TEXT_NS = 'urn:oasis:names:tc:opendocument:xmlns:text:1.0'
     OO_OFFICE_NS = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'
     OO_DRAW_NS = 'urn:oasis:names:tc:opendocument:xmlns:drawing:1.0'
-...
         return str2rtf(unicode(str(value), get_publisher().site_charset))
     class PDFFormDirectory(Directory):
         _q_exports = ['']
         def __init__(self, item):
             self.item = item
         def _q_index(self):
             response = get_response()
             response.content_type = 'application/pdf'
             response.set_header('location', '..')
             data = {field: field for field in self.item.get_pdf_fields()}
             return self.item.fill_pdf_form(data).read()
     class ExportToModel(WorkflowStatusItem):
         description = N_('Document Creation')
         key = 'export_to_model'
-...
                 base_url = formdata.get_url(backoffice=in_backoffice)
                 return base_url + self.get_directory_name()
         def model_file_validation(self, upload):
         def model_file_validation(self, upload, ignore_error=False):
             if hasattr(upload, 'fp'):
                 fp = upload.fp
             elif hasattr(upload, 'get_file'):
                 fp = upload.get_file()
             else:
                 if ignore_error:
                     return None
                 raise UploadValidationError('unknown upload object %r' % upload)
             if upload.content_type and upload.content_type == 'application/pdf' and has_pdftk:
                 return 'pdf'
             if upload.content_type and upload.content_type == 'application/rtf':
                 return 'rtf'
             if (upload.content_type and upload.content_type == 'application/octet-stream') or \
-...
                     return 'opendocument'
             if is_opendocument(fp):
                 return 'opendocument'
             if ignore_error:
                 return None
             raise UploadValidationError(_('Only RTF and OpenDocument files can be used'))
         def get_parameters(self):
-...
             if has_portfolio():
                 parameters += ('push_to_portfolio',)
             parameters += ('method', 'by', 'label', 'backoffice_info_text', 'filename', 'condition')
             if self.has_pdf_model():
                 parameters += ('pdffield',)
                 for field in self.get_pdf_fields():
                     parameters += ('pdffield_%s' % field,)
             return parameters
         def add_parameters_widgets(self, form, parameters, prefix='',
-...
             if 'filename' in parameters:
                 form.add(ComputedExpressionWidget, name='%sfilename' % prefix, title=_('File name'),
                          value=self.filename)
             if 'pdffield' in parameters:
                 form.add(HtmlWidget, htmltext('<label><a href="pdf-form/">PDF form example</a></label>'))
             pdf_fields_expressions = getattr(self, 'pdf_fields', {})
             pdf_fields = self.get_pdf_fields()
             for parameter in parameters:
                 if parameter.startswith('pdffield_'):
                     name = parameter[9:]
                     form.add(ComputedExpressionWidget,
                              name=parameter,
                              title=_('PDF Field %s (%s)') % (name, pdf_fields.get(name, '')),
                              value=pdf_fields_expressions.get(name, ''))
         def has_pdf_model(self):
             return self.model_file and self.model_file_validation(self.model_file, ignore_error=True) == 'pdf'
         def submit_admin_form(self, form):
             super(ExportToModel, self).submit_admin_form(form)
             if self.has_pdf_model():
                 pdf_fields = {}
                 for field in self.get_pdf_fields():
                     widget = form.get_widget('pdffield_%s' % field)
                     if widget:
                         pdf_fields[field] = widget.parse()
                 self.pdf_fields = pdf_fields
         def get_pdf_fields(self):
             pdf_fields = {}
             if self.model_file_validation(self.model_file, ignore_error=True) == 'pdf':
                 if hasattr(self.model_file, '_pdf_fields'):
                     return self.model_file._pdf_fields
                 for field_def in pypdftk.dump_data_fields(self.model_file.build_file_path()):
                     pdf_fields[field_def.get('FieldName')] = field_def.get('FieldType', 'Text')
                 self.model_file._pdf_fields = pdf_fields
             return pdf_fields
         def get_filename(self):
             filename = None
-...
                 outstream = self.apply_rtf_template_to_formdata(formdata)
             elif kind == 'opendocument':
                 outstream = self.apply_od_template_to_formdata(formdata)
             elif kind == 'pdf':
                 outstream = self.apply_pdf_template_to_formdata(formdata)
             else:
                 raise Exception('unsupported model kind %r' % kind)
             if self.convert_to_pdf:
-...
                 return transform_to_pdf(outstream)
             return outstream
         def fill_pdf_form(self, data):
             outfile = pypdftk.fill_form(self.model_file.build_file_path(), data)
             try:
                 with open(outfile) as f:
                     return StringIO(f.read())
             finally:
                 os.unlink(outfile)
         def apply_pdf_template_to_formdata(self, formdata):
             pdf_fields = self.get_pdf_fields()
             data = {}
             for field in pdf_fields:
                 expression = getattr(self, 'pdf_fields', {}).get(field)
                 if not expression:
                     continue
                 data[field] = self.compute(data)
             return self.fill_pdf_form(data)
         def apply_rtf_template_to_formdata(self, formdata):
             try:
                 # force ezt_only=True because an RTF file may contain {{ characters
-...
                     content_type,
                     outstream.read())
         def q_admin_lookup(self, workflow, status, component, html_top):
             if component == 'pdf-form' and self.has_pdf_model():
                 directory = PDFFormDirectory(self)
                 directory.html_top = html_top
                 return directory
     register_item_class(ExportToModel)
+    -

Projet

Général

Profil

Produits Entr'ouvert » Passerelle

0001-workflows-add-pdf-filing-in-export-to-model-action-2.patch