Projet

Général

Profil

0001-misc-switch-html-cleanup-to-bleach-58808.patch

Frédéric Péters, 27 novembre 2021 10:48

Télécharger (11,4 ko)

Voir les différences:

Subject: [PATCH] misc: switch html cleanup to bleach (#58808)

 debian/control        |   2 +-
 setup.py              |   1 +
 tests/test_widgets.py |  52 +++----
 tox.ini               |   1 +
 wcs/qommon/form.py    | 310 +++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 336 insertions(+), 30 deletions(-)
debian/control
17 17
Package: wcs
18 18
Architecture: all
19 19
Depends: graphviz,
20
         python3-bleach,
20 21
         python3-django (>= 1:1.11),
21 22
         python3-django-ckeditor,
22 23
         python3-django-ratelimit,
23
         python3-feedparser,
24 24
         python3-hobo,
25 25
         python3-lasso,
26 26
         python3-pil,
setup.py
168 168
    maintainer_email="fpeters@entrouvert.com",
169 169
    url="http://wcs.labs.libre-entreprise.org",
170 170
    install_requires=[
171
        'bleach',
171 172
        'gadjo>=0.53',
172 173
        'django-ckeditor<4.5.4',
173 174
        'django-ratelimit<3',
tests/test_widgets.py
420 420
    assert not widget.has_error()
421 421
    assert widget.parse() == 'bla bla bla'
422 422

  
423
    import wcs.qommon.form
423
    widget = WysiwygTextWidget('test')
424
    mock_form_submission(req, widget, {'test': '<p>bla bla bla</p>'})
425
    assert not widget.has_error()
426
    assert widget.parse() == '<p>bla bla bla</p>'
424 427

  
425
    sanitize_html = wcs.qommon.form._sanitizeHTML
426
    if sanitize_html:
427
        widget = WysiwygTextWidget('test')
428
        mock_form_submission(req, widget, {'test': '<p>bla bla bla</p>'})
429
        assert not widget.has_error()
430
        assert widget.parse() == '<p>bla bla bla</p>'
428
    widget = WysiwygTextWidget('test')
429
    mock_form_submission(req, widget, {'test': '<a href="#">a</a>'})
430
    assert not widget.has_error()
431
    assert widget.parse() == '<a href="#">a</a>'
431 432

  
432
        widget = WysiwygTextWidget('test')
433
        mock_form_submission(req, widget, {'test': '<a href="#">a</a>'})
434
        assert not widget.has_error()
435
        assert widget.parse() == '<a href="#">a</a>'
433
    widget = WysiwygTextWidget('test')
434
    mock_form_submission(req, widget, {'test': '<a href="javascript:alert()">a</a>'})
435
    assert not widget.has_error()
436
    assert widget.parse() == '<a>a</a>'  # javascript: got filtered
436 437

  
437
        widget = WysiwygTextWidget('test')
438
        mock_form_submission(req, widget, {'test': '<a href="javascript:alert()">a</a>'})
439
        assert not widget.has_error()
440
        assert widget.parse() == '<a href="">a</a>'  # javascript: got filtered
438
    # check comments are kept
439
    widget = WysiwygTextWidget('test')
440
    mock_form_submission(req, widget, {'test': '<p>hello</p><!-- world --><p>.</p>'})
441
    assert not widget.has_error()
442
    assert widget.parse() == '<p>hello</p><!-- world --><p>.</p>'
443

  
444
    # check <script> are kept
445
    widget = WysiwygTextWidget('test')
446
    mock_form_submission(req, widget, {'test': '<p>hello</p><script>alert("test")</script>'})
447
    assert not widget.has_error()
448
    assert widget.parse() == '<p>hello</p><script>alert("test")</script>'
449

  
450
    # check <style> are kept
451
    widget = WysiwygTextWidget('test')
452
    mock_form_submission(req, widget, {'test': '<p>hello</p><style>p { color: blue; }</style>'})
453
    assert not widget.has_error()
454
    assert widget.parse() == '<p>hello</p><style>p { color: blue; }</style>'
441 455

  
442 456
    # check django syntax is kept intact
443 457
    widget = WysiwygTextWidget('test')
......
467 481
        == '<a href="{% if 1 > 2 %}héllo{% endif %}">{% if 2 > 1 %}{{plop|date:"Y"}}{% endif %}</a>'
468 482
    )
469 483

  
470
    # check we don't escape HTML if feedparser _sanitizeHTML is missing
471
    wcs.qommon.form._sanitizeHTML = None
472
    widget = WysiwygTextWidget('test')
473
    mock_form_submission(req, widget, {'test': '<p>bla bla bla</p>'})
474
    assert not widget.has_error()
475
    assert widget.parse() == '<p>bla bla bla</p>'
476
    wcs.qommon.form._sanitizeHTML = sanitize_html
477

  
478 484

  
479 485
def test_select_hint_widget():
480 486
    widget = SingleSelectHintWidget(
tox.ini
44 44
    Quixote>=3.0,<3.2
45 45
    pre-commit
46 46
    pyzbar
47
    bleach
47 48
commands =
48 49
    py.test -v {env:COVERAGE:} --junitxml=junit-{envname}.xml {posargs:tests/}
49 50
    pylint: ./pylint.sh wcs/ tests/
wcs/qommon/form.py
36 36
except ImportError:
37 37
    Image = None
38 38

  
39
try:
40
    from feedparser import _sanitizeHTML
41
except ImportError:
42
    _sanitizeHTML = None
39
import bleach
43 40

  
44 41
try:
45 42
    import DNS
......
1904 1901

  
1905 1902

  
1906 1903
class WysiwygTextWidget(TextWidget):
1904
    ALL_TAGS = [
1905
        'a',
1906
        'abbr',
1907
        'acronym',
1908
        'address',
1909
        'area',
1910
        'article',
1911
        'aside',
1912
        'audio',
1913
        'b',
1914
        'big',
1915
        'blockquote',
1916
        'br',
1917
        'button',
1918
        'canvas',
1919
        'caption',
1920
        'center',
1921
        'cite',
1922
        'code',
1923
        'col',
1924
        'colgroup',
1925
        'command',
1926
        'datagrid',
1927
        'datalist',
1928
        'dd',
1929
        'del',
1930
        'details',
1931
        'dfn',
1932
        'dialog',
1933
        'dir',
1934
        'div',
1935
        'dl',
1936
        'dt',
1937
        'em',
1938
        'event-source',
1939
        'fieldset',
1940
        'figcaption',
1941
        'figure',
1942
        'font',
1943
        'footer',
1944
        'form',
1945
        'h1',
1946
        'h2',
1947
        'h3',
1948
        'h4',
1949
        'h5',
1950
        'h6',
1951
        'header',
1952
        'hr',
1953
        'i',
1954
        'img',
1955
        'input',
1956
        'ins',
1957
        'kbd',
1958
        'keygen',
1959
        'label',
1960
        'legend',
1961
        'li',
1962
        'm',
1963
        'map',
1964
        'menu',
1965
        'meter',
1966
        'multicol',
1967
        'nav',
1968
        'nextid',
1969
        'noscript',
1970
        'ol',
1971
        'optgroup',
1972
        'option',
1973
        'output',
1974
        'p',
1975
        'pre',
1976
        'progress',
1977
        'q',
1978
        's',
1979
        'samp',
1980
        'script',
1981
        'section',
1982
        'select',
1983
        'small',
1984
        'sound',
1985
        'source',
1986
        'spacer',
1987
        'span',
1988
        'strike',
1989
        'strong',
1990
        'style',
1991
        'sub',
1992
        'sup',
1993
        'table',
1994
        'tbody',
1995
        'td',
1996
        'textarea',
1997
        'tfoot',
1998
        'th',
1999
        'thead',
2000
        'time',
2001
        'tr',
2002
        'tt',
2003
        'u',
2004
        'ul',
2005
        'var',
2006
        'video',
2007
    ]
2008
    ALL_ATTRS = [
2009
        'abbr',
2010
        'accept',
2011
        'accept-charset',
2012
        'accesskey',
2013
        'action',
2014
        'align',
2015
        'alt',
2016
        'autocomplete',
2017
        'autofocus',
2018
        'axis',
2019
        'background',
2020
        'balance',
2021
        'bgcolor',
2022
        'bgproperties',
2023
        'border',
2024
        'bordercolor',
2025
        'bordercolordark',
2026
        'bordercolorlight',
2027
        'bottompadding',
2028
        'cellpadding',
2029
        'cellspacing',
2030
        'ch',
2031
        'challenge',
2032
        'char',
2033
        'charoff',
2034
        'charset',
2035
        'checked',
2036
        'choff',
2037
        'cite',
2038
        'class',
2039
        'clear',
2040
        'color',
2041
        'cols',
2042
        'colspan',
2043
        'compact',
2044
        'contenteditable',
2045
        'controls',
2046
        'coords',
2047
        'data',
2048
        'datafld',
2049
        'datapagesize',
2050
        'datasrc',
2051
        'datetime',
2052
        'default',
2053
        'delay',
2054
        'dir',
2055
        'disabled',
2056
        'draggable',
2057
        'dynsrc',
2058
        'enctype',
2059
        'end',
2060
        'face',
2061
        'for',
2062
        'form',
2063
        'frame',
2064
        'galleryimg',
2065
        'gutter',
2066
        'headers',
2067
        'height',
2068
        'hidden',
2069
        'hidefocus',
2070
        'high',
2071
        'href',
2072
        'hreflang',
2073
        'hspace',
2074
        'icon',
2075
        'id',
2076
        'inputmode',
2077
        'ismap',
2078
        'keytype',
2079
        'label',
2080
        'lang',
2081
        'leftspacing',
2082
        'list',
2083
        'longdesc',
2084
        'loop',
2085
        'loopcount',
2086
        'loopend',
2087
        'loopstart',
2088
        'low',
2089
        'lowsrc',
2090
        'max',
2091
        'maxlength',
2092
        'media',
2093
        'method',
2094
        'min',
2095
        'multiple',
2096
        'name',
2097
        'nohref',
2098
        'noshade',
2099
        'nowrap',
2100
        'open',
2101
        'optimum',
2102
        'pattern',
2103
        'ping',
2104
        'point-size',
2105
        'poster',
2106
        'pqg',
2107
        'preload',
2108
        'prompt',
2109
        'radiogroup',
2110
        'readonly',
2111
        'rel',
2112
        'repeat-max',
2113
        'repeat-min',
2114
        'replace',
2115
        'required',
2116
        'rev',
2117
        'rightspacing',
2118
        'rows',
2119
        'rowspan',
2120
        'rules',
2121
        'scope',
2122
        'selected',
2123
        'shape',
2124
        'size',
2125
        'span',
2126
        'src',
2127
        'start',
2128
        'step',
2129
        'style',
2130
        'summary',
2131
        'suppress',
2132
        'tabindex',
2133
        'target',
2134
        'template',
2135
        'title',
2136
        'toppadding',
2137
        'type',
2138
        'unselectable',
2139
        'urn',
2140
        'usemap',
2141
        'valign',
2142
        'value',
2143
        'variable',
2144
        'volume',
2145
        'vrml',
2146
        'vspace',
2147
        'width',
2148
        'wrap',
2149
        'xml:lang',
2150
    ]
2151
    ALL_STYLES = [
2152
        'azimuth',
2153
        'background-color',
2154
        'border-bottom-color',
2155
        'border-collapse',
2156
        'border-color',
2157
        'border-left-color',
2158
        'border-right-color',
2159
        'border-top-color',
2160
        'clear',
2161
        'color',
2162
        'cursor',
2163
        'direction',
2164
        'display',
2165
        'elevation',
2166
        'float',
2167
        'font',
2168
        'font-family',
2169
        'font-size',
2170
        'font-style',
2171
        'font-variant',
2172
        'font-weight',
2173
        'height',
2174
        'letter-spacing',
2175
        'line-height',
2176
        'overflow',
2177
        'pause',
2178
        'pause-after',
2179
        'pause-before',
2180
        'pitch',
2181
        'pitch-range',
2182
        'richness',
2183
        'speak',
2184
        'speak-header',
2185
        'speak-numeral',
2186
        'speak-punctuation',
2187
        'speech-rate',
2188
        'stress',
2189
        'text-align',
2190
        'text-decoration',
2191
        'text-indent',
2192
        'unicode-bidi',
2193
        'vertical-align',
2194
        'voice-family',
2195
        'volume',
2196
        'white-space',
2197
        'width',
2198
    ]
2199

  
1907 2200
    def _parse(self, request):
1908 2201
        TextWidget._parse(self, request, use_validation_function=False)
1909 2202
        if self.value:
1910
            if _sanitizeHTML:
1911
                self.value = _sanitizeHTML(self.value, get_request().charset, 'text/html')
2203
            self.value = bleach.clean(
2204
                self.value,
2205
                tags=self.ALL_TAGS,
2206
                attributes=self.ALL_ATTRS,
2207
                styles=self.ALL_STYLES,
2208
                strip_comments=False,
2209
            )
1912 2210
            if self.value.startswith('<br />'):
1913 2211
                self.value = self.value[6:]
1914 2212
            if self.value.endswith('<br />'):
1915
-