Skip to content

Commit e4c0a9f

Browse files
committed
Apply proper HTML escaping on StreamField block comparisons
An individual StreamField block in the comparison view may be rendered as a plain value (for blocks that are unchanged, added or deleted) or a diff (for blocks that are changed). In both cases, the output is returned as HTML, but must not contain any unescaped editor-supplied HTML. For CharBlock, TextBlock and RawHTMLBlock, the block value is escaped so that any HTML tags in the content are shown verbatim. For RichTextBlock and any other block types that do not override the default comparison behaviour, we take the basic (non-templated) HTML rendering of the block and extract text-only content from it. This is then returned in HTML-escaped form for the plain view, and run through diff_text().to_html() for the diff view (which handles escaping itself).
1 parent 0b1485f commit e4c0a9f

4 files changed

Lines changed: 176 additions & 20 deletions

File tree

wagtail/admin/compare.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
from wagtail.core import blocks
1111

1212

13+
def text_from_html(val):
14+
# Return the unescaped text content of an HTML string
15+
return BeautifulSoup(force_str(val), 'html5lib').getText()
16+
17+
1318
class FieldComparison:
1419
is_field = True
1520
is_child_relation = False
@@ -52,15 +57,18 @@ def htmldiff(self):
5257
class RichTextFieldComparison(TextFieldComparison):
5358
def htmldiff(self):
5459
return diff_text(
55-
BeautifulSoup(force_str(self.val_a), 'html5lib').getText(),
56-
BeautifulSoup(force_str(self.val_b), 'html5lib').getText()
60+
text_from_html(self.val_a),
61+
text_from_html(self.val_b)
5762
).to_html()
5863

5964

6065
def get_comparison_class_for_block(block):
6166
if hasattr(block, 'get_comparison_class'):
6267
return block.get_comparison_class()
63-
elif isinstance(block, blocks.CharBlock):
68+
elif isinstance(block, (blocks.CharBlock, blocks.TextBlock)):
69+
return CharBlockComparison
70+
elif isinstance(block, blocks.RawHTMLBlock):
71+
# Compare raw HTML blocks as if they were plain text, so that tags are shown explicitly
6472
return CharBlockComparison
6573
elif isinstance(block, blocks.RichTextBlock):
6674
return RichTextBlockComparison
@@ -89,7 +97,19 @@ def has_changed(self):
8997
return self.val_a != self.val_b
9098

9199
def htmlvalue(self, val):
92-
return self.block.render_basic(val)
100+
"""
101+
Return an HTML representation of this block that is safe to be included
102+
in comparison views
103+
"""
104+
return escape(text_from_html(self.block.render_basic(val)))
105+
106+
def htmldiff(self):
107+
html_val_a = self.block.render_basic(self.val_a)
108+
html_val_b = self.block.render_basic(self.val_b)
109+
return diff_text(
110+
text_from_html(html_val_a),
111+
text_from_html(html_val_b)
112+
).to_html()
93113

94114

95115
class CharBlockComparison(BlockComparison):
@@ -99,13 +119,12 @@ def htmldiff(self):
99119
force_str(self.val_b)
100120
).to_html()
101121

122+
def htmlvalue(self, val):
123+
return escape(val)
124+
102125

103126
class RichTextBlockComparison(BlockComparison):
104-
def htmldiff(self):
105-
return diff_text(
106-
BeautifulSoup(force_str(self.val_a), 'html5lib').getText(),
107-
BeautifulSoup(force_str(self.val_b), 'html5lib').getText()
108-
).to_html()
127+
pass
109128

110129

111130
class StructBlockComparison(BlockComparison):
@@ -219,8 +238,8 @@ def htmldiff(self):
219238
else:
220239
# Fall back to diffing the HTML representation
221240
return diff_text(
222-
BeautifulSoup(force_str(self.val_a), 'html5lib').getText(),
223-
BeautifulSoup(force_str(self.val_b), 'html5lib').getText()
241+
text_from_html(self.val_a),
242+
text_from_html(self.val_b)
224243
).to_html()
225244

226245

wagtail/admin/tests/test_compare.py

Lines changed: 123 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -211,36 +211,151 @@ def test_has_changed_richtext(self):
211211
self.assertIsInstance(comparison.htmldiff(), SafeString)
212212
self.assertTrue(comparison.has_changed())
213213

214-
def test_htmldiff_escapes_value(self):
214+
def test_htmldiff_escapes_value_on_change(self):
215215
field = StreamPage._meta.get_field('body')
216216

217217
comparison = self.comparison_class(
218218
field,
219219
StreamPage(body=StreamValue(field.stream_block, [
220-
('text', "Original content", '1'),
220+
('text', "I <b>really</b> like original<i>ish</i> content", '1'),
221+
])),
222+
StreamPage(body=StreamValue(field.stream_block, [
223+
('text', 'I <b>really</b> like evil code <script type="text/javascript">doSomethingBad();</script>', '1'),
224+
])),
225+
)
226+
227+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">I &lt;b&gt;really&lt;/b&gt; like <span class="deletion">original&lt;i&gt;ish&lt;/i&gt; content</span><span class="addition">evil code &lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</span></div>')
228+
self.assertIsInstance(comparison.htmldiff(), SafeString)
229+
230+
def test_htmldiff_escapes_value_on_addition(self):
231+
field = StreamPage._meta.get_field('body')
232+
233+
comparison = self.comparison_class(
234+
field,
235+
StreamPage(body=StreamValue(field.stream_block, [
236+
('text', "Original <em>and unchanged</em> content", '1'),
237+
])),
238+
StreamPage(body=StreamValue(field.stream_block, [
239+
('text', "Original <em>and unchanged</em> content", '1'),
240+
('text', '<script type="text/javascript">doSomethingBad();</script>', '2'),
241+
])),
242+
)
243+
244+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
245+
self.assertIsInstance(comparison.htmldiff(), SafeString)
246+
247+
def test_htmldiff_escapes_value_on_deletion(self):
248+
field = StreamPage._meta.get_field('body')
249+
250+
comparison = self.comparison_class(
251+
field,
252+
StreamPage(body=StreamValue(field.stream_block, [
253+
('text', "Original <em>and unchanged</em> content", '1'),
254+
('text', '<script type="text/javascript">doSomethingBad();</script>', '2'),
221255
])),
222256
StreamPage(body=StreamValue(field.stream_block, [
223-
('text', '<script type="text/javascript">doSomethingBad();</script>', '1'),
257+
('text', "Original <em>and unchanged</em> content", '1'),
224258
])),
225259
)
226260

227-
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object"><span class="deletion">Original content</span><span class="addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</span></div>')
261+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object deletion">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
228262
self.assertIsInstance(comparison.htmldiff(), SafeString)
229263

230-
def test_htmldiff_escapes_value_richtext(self):
264+
def test_htmldiff_richtext_strips_tags_on_change(self):
231265
field = StreamPage._meta.get_field('body')
232266

233267
comparison = self.comparison_class(
234268
field,
235269
StreamPage(body=StreamValue(field.stream_block, [
236-
('rich_text', "Original content", '1'),
270+
('rich_text', "I <b>really</b> like Wagtail &lt;3", '1'),
237271
])),
238272
StreamPage(body=StreamValue(field.stream_block, [
239-
('rich_text', '<script type="text/javascript">doSomethingBad();</script>', '1'),
273+
('rich_text', 'I <b>really</b> like evil code &gt;_&lt; <script type="text/javascript">doSomethingBad();</script>', '1'),
240274
])),
241275
)
242276

243-
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object"><span class="deletion">Original content</span><span class="addition">doSomethingBad();</span></div>')
277+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">I really like <span class="deletion">Wagtail &lt;3</span><span class="addition">evil code &gt;_&lt; doSomethingBad();</span></div>')
278+
self.assertIsInstance(comparison.htmldiff(), SafeString)
279+
280+
def test_htmldiff_richtext_strips_tags_on_addition(self):
281+
field = StreamPage._meta.get_field('body')
282+
283+
comparison = self.comparison_class(
284+
field,
285+
StreamPage(body=StreamValue(field.stream_block, [
286+
('rich_text', "Original <em>and unchanged</em> content", '1'),
287+
])),
288+
StreamPage(body=StreamValue(field.stream_block, [
289+
('rich_text', "Original <em>and unchanged</em> content", '1'),
290+
('rich_text', 'I <b>really</b> like evil code &gt;_&lt; <script type="text/javascript">doSomethingBad();</script>', '2'),
291+
])),
292+
)
293+
294+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original and unchanged content</div>\n<div class="comparison__child-object addition">I really like evil code &gt;_&lt; doSomethingBad();</div>')
295+
self.assertIsInstance(comparison.htmldiff(), SafeString)
296+
297+
def test_htmldiff_richtext_strips_tags_on_deletion(self):
298+
field = StreamPage._meta.get_field('body')
299+
300+
comparison = self.comparison_class(
301+
field,
302+
StreamPage(body=StreamValue(field.stream_block, [
303+
('rich_text', "Original <em>and unchanged</em> content", '1'),
304+
('rich_text', 'I <b>really</b> like evil code &gt;_&lt; <script type="text/javascript">doSomethingBad();</script>', '2'),
305+
])),
306+
StreamPage(body=StreamValue(field.stream_block, [
307+
('rich_text', "Original <em>and unchanged</em> content", '1'),
308+
])),
309+
)
310+
311+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original and unchanged content</div>\n<div class="comparison__child-object deletion">I really like evil code &gt;_&lt; doSomethingBad();</div>')
312+
self.assertIsInstance(comparison.htmldiff(), SafeString)
313+
314+
def test_htmldiff_raw_html_escapes_value_on_change(self):
315+
field = StreamPage._meta.get_field('body')
316+
317+
comparison = self.comparison_class(
318+
field,
319+
StreamPage(body=StreamValue(field.stream_block, [
320+
('raw_html', "Original<i>ish</i> content", '1'),
321+
])),
322+
StreamPage(body=StreamValue(field.stream_block, [
323+
('raw_html', '<script type="text/javascript">doSomethingBad();</script>', '1'),
324+
])),
325+
)
326+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object"><span class="deletion">Original&lt;i&gt;ish&lt;/i&gt; content</span><span class="addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</span></div>')
327+
self.assertIsInstance(comparison.htmldiff(), SafeString)
328+
329+
def test_htmldiff_raw_html_escapes_value_on_addition(self):
330+
field = StreamPage._meta.get_field('body')
331+
332+
comparison = self.comparison_class(
333+
field,
334+
StreamPage(body=StreamValue(field.stream_block, [
335+
('raw_html', "Original <em>and unchanged</em> content", '1'),
336+
])),
337+
StreamPage(body=StreamValue(field.stream_block, [
338+
('raw_html', "Original <em>and unchanged</em> content", '1'),
339+
('raw_html', '<script type="text/javascript">doSomethingBad();</script>', '2'),
340+
])),
341+
)
342+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
343+
self.assertIsInstance(comparison.htmldiff(), SafeString)
344+
345+
def test_htmldiff_raw_html_escapes_value_on_deletion(self):
346+
field = StreamPage._meta.get_field('body')
347+
348+
comparison = self.comparison_class(
349+
field,
350+
StreamPage(body=StreamValue(field.stream_block, [
351+
('raw_html', "Original <em>and unchanged</em> content", '1'),
352+
('raw_html', '<script type="text/javascript">doSomethingBad();</script>', '2'),
353+
])),
354+
StreamPage(body=StreamValue(field.stream_block, [
355+
('raw_html', "Original <em>and unchanged</em> content", '1'),
356+
])),
357+
)
358+
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object deletion">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
244359
self.assertIsInstance(comparison.htmldiff(), SafeString)
245360

246361
def test_compare_structblock(self):
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Generated by Django 3.0.4 on 2020-04-06 09:46
2+
3+
from django.db import migrations
4+
import wagtail.core.blocks
5+
import wagtail.core.fields
6+
import wagtail.tests.testapp.models
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
dependencies = [
12+
('tests', '0043_customdocument_fancy_description'),
13+
]
14+
15+
operations = [
16+
migrations.AlterField(
17+
model_name='streampage',
18+
name='body',
19+
field=wagtail.core.fields.StreamField([('text', wagtail.core.blocks.CharBlock()), ('rich_text', wagtail.core.blocks.RichTextBlock()), ('image', wagtail.tests.testapp.models.ExtendedImageChooserBlock()), ('product', wagtail.core.blocks.StructBlock([('name', wagtail.core.blocks.CharBlock()), ('price', wagtail.core.blocks.CharBlock())])), ('raw_html', wagtail.core.blocks.RawHTMLBlock())]),
20+
),
21+
]

wagtail/tests/testapp/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from wagtail.contrib.settings.models import BaseSetting, register_setting
3030
from wagtail.contrib.sitemaps import Sitemap
3131
from wagtail.contrib.table_block.blocks import TableBlock
32-
from wagtail.core.blocks import CharBlock, RichTextBlock, StructBlock
32+
from wagtail.core.blocks import CharBlock, RawHTMLBlock, RichTextBlock, StructBlock
3333
from wagtail.core.fields import RichTextField, StreamField
3434
from wagtail.core.models import Orderable, Page, PageManager, PageQuerySet
3535
from wagtail.documents.edit_handlers import DocumentChooserPanel
@@ -960,6 +960,7 @@ class StreamPage(Page):
960960
('name', CharBlock()),
961961
('price', CharBlock()),
962962
])),
963+
('raw_html', RawHTMLBlock()),
963964
])
964965

965966
api_fields = ('body',)

0 commit comments

Comments
 (0)