From d8e3b841f8a50c16e394e5e2726a896fdcff2f5a Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Thu, 3 Dec 2020 10:31:43 -0500 Subject: [PATCH 1/5] [Composer] Add xemlock/htmlpurifier-html5 dependency --- composer.json | 5 +++-- composer.lock | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/composer.json b/composer.json index efd4bd081..c24292454 100644 --- a/composer.json +++ b/composer.json @@ -34,6 +34,7 @@ "league/html-to-markdown": "^4.8", "level-2/dice": "^4", "lightopenid/lightopenid": "dev-master", + "matriphe/iso-639": "^1.2", "michelf/php-markdown": "^1.7", "mobiledetect/mobiledetectlib": "^2.8", "monolog/monolog": "^1.25", @@ -47,6 +48,7 @@ "psr/container": "^1.0", "seld/cli-prompt": "^1.0", "smarty/smarty": "^3.1", + "xemlock/htmlpurifier-html5": "^0.1.11", "fxp/composer-asset-plugin": "^1.4", "bower-asset/base64": "^1.0", "bower-asset/chart-js": "^2.8", @@ -64,8 +66,7 @@ "npm-asset/moment": "^2.24", "npm-asset/perfect-scrollbar": "0.6.16", "npm-asset/textcomplete": "^0.18.2", - "npm-asset/typeahead.js": "^0.11.1", - "matriphe/iso-639": "^1.2" + "npm-asset/typeahead.js": "^0.11.1" }, "repositories": [ { diff --git a/composer.lock b/composer.lock index 8ba31ecb0..779c3b51b 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "fd22bd8c29dcea3d6b6eeb117d79af52", + "content-hash": "7d8031c9b95fd94d8872804759a26509", "packages": [ { "name": "asika/simple-console", @@ -3431,6 +3431,57 @@ "shim" ], "time": "2020-05-12T16:14:59+00:00" + }, + { + "name": "xemlock/htmlpurifier-html5", + "version": "v0.1.11", + "source": { + "type": "git", + "url": "https://github.com/xemlock/htmlpurifier-html5.git", + "reference": "f0d563f9fd4a82a3d759043483f9a94c0d8c2255" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/xemlock/htmlpurifier-html5/zipball/f0d563f9fd4a82a3d759043483f9a94c0d8c2255", + "reference": "f0d563f9fd4a82a3d759043483f9a94c0d8c2255", + "shasum": "" + }, + "require": { + "ezyang/htmlpurifier": "^4.8", + "php": ">=5.2" + }, + "require-dev": { + "php-coveralls/php-coveralls": "^1.1|^2.1", + "phpunit/phpunit": ">=4.7 <8.0" + }, + "type": "library", + "autoload": { + "classmap": [ + "library/HTMLPurifier/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "xemlock", + "email": "xemlock@gmail.com" + } + ], + "description": "HTML5 element definitions for HTML Purifier", + "keywords": [ + "HTML5", + "Purifier", + "html", + "htmlpurifier", + "security", + "tidy", + "validator", + "xss" + ], + "time": "2019-08-07T17:19:21+00:00" } ], "packages-dev": [ From a0f6d678c4450b559c0be87533def7357f06665f Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 4 Dec 2020 07:27:13 -0500 Subject: [PATCH 2/5] Add HTML Purify to BBCode::convert - Replace previous HTML cleanup using DOMDocument - Remove obsolete HTML::sanitizeCSS method - Use Strings::autoLinkRegEx for PageInfo::getRelevantUrlFromBody - Update previous tests expectations - Add new specific XSS tests --- src/Content/PageInfo.php | 2 +- src/Content/Text/BBCode.php | 125 ++++++++++---------------- src/Content/Text/HTML.php | 21 ----- tests/src/Content/Text/BBCodeTest.php | 46 ++++++++-- 4 files changed, 83 insertions(+), 111 deletions(-) diff --git a/src/Content/PageInfo.php b/src/Content/PageInfo.php index 786385c3e..5396bc1bb 100644 --- a/src/Content/PageInfo.php +++ b/src/Content/PageInfo.php @@ -265,7 +265,7 @@ class PageInfo } if (!$matches && $searchNakedUrls) { - preg_match('~(?<=\W|^)(?', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']); } else { if (!empty($data['image'])) { - $return .= sprintf('
', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']); + $return .= sprintf('
', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']); } elseif (!empty($data['preview'])) { - $return .= sprintf('
', $data['url'], self::proxyUrl($data['preview'], $simplehtml), $data['title']); + $return .= sprintf('
', $data['url'], self::proxyUrl($data['preview'], $simplehtml), $data['title']); } $return .= sprintf('

%s

', $data['url'], $data['title']); } @@ -1033,7 +1033,7 @@ class BBCode switch ($simplehtml) { case self::API: - $text = ($is_quote_share? '
' : '') . '

' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ':

' . "\n" . $content; + $text = ($is_quote_share? '
' : '') . '

' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ':

' . "\n" . $content; break; case self::DIASPORA: if (stripos(Strings::normaliseLink($attributes['link']), 'http://twitter.com/') === 0) { @@ -1062,7 +1062,7 @@ class BBCode break; case self::OSTATUS: - $text = ($is_quote_share? '
' : '') . '

' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' @' . $author_contact['addr'] . ': ' . $content . '

' . "\n"; + $text = ($is_quote_share? '
' : '') . '

' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' @' . $author_contact['addr'] . ': ' . $content . '

' . "\n"; break; case self::ACTIVITYPUB: $author = '@' . $author_contact['addr'] . ':'; @@ -1275,6 +1275,8 @@ class BBCode return ''; } + Hook::callAll('bbcode', $text); + $a = DI::app(); $text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) { @@ -1300,11 +1302,12 @@ class BBCode return $return; }; - - // Remove the abstract element. It is a non visible element. $text = self::stripAbstract($text); + // Line ending normalisation + $text = str_replace("\r\n", "\n", $text); + // Move new lines outside of tags $text = preg_replace("#\[(\w*)](\n*)#ism", '$2[$1]', $text); $text = preg_replace("#(\n*)\[/(\w*)]#ism", '[/$2]$1', $text); @@ -1338,16 +1341,6 @@ class BBCode $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); } - // Convert new line chars to html
tags - - // nlbr seems to be hopelessly messed up - // $Text = nl2br($Text); - - // We'll emulate it. - - $text = trim($text); - $text = str_replace("\r\n", "\n", $text); - // Remove linefeeds inside of the table elements. See issue #6799 $search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", @@ -1367,11 +1360,14 @@ class BBCode $replace = ["[table]", "[/table]"]; $text = str_replace($search, $replace, $text); + // Trim new lines regardless of the system.remove_multiplicated_lines config value + $text = trim($text, "\n"); + // removing multiplicated newlines if (DI::config()->get('system', 'remove_multiplicated_lines')) { - $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n", + $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[*]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n", "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"]; - $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[ul]", "[/ul]", "\n[share ", "[/attachment]", + $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[*]", "[ul]", "[/ul]", "\n[share ", "[/attachment]", "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"]; do { $oldtext = $text; @@ -1447,8 +1443,8 @@ class BBCode // Check for sized text // [size=50] --> font-size: 50px (with the unit). if ($simple_html != self::DIASPORA) { - $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "$2", $text); - $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); + $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", '$2', $text); + $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", '$2', $text); } else { // Issue 2199: Diaspora doesn't interpret the construct above, nor the or element $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); @@ -1456,28 +1452,16 @@ class BBCode // Check for centered text - $text = preg_replace("(\[center\](.*?)\[\/center\])ism", "
$1
", $text); + $text = preg_replace("(\[center\](.*?)\[\/center\])ism", '
$1
', $text); // Check for list text $text = str_replace("[*]", "
  • ", $text); // Check for style sheet commands - $text = preg_replace_callback( - "(\[style=(.*?)\](.*?)\[\/style\])ism", - function ($match) { - return "" . $match[2] . ""; - }, - $text - ); + $text = preg_replace("(\[style=(.*?)\](.*?)\[\/style\])ism", '$2', $text); // Check for CSS classes - $text = preg_replace_callback( - "(\[class=(.*?)\](.*?)\[\/class\])ism", - function ($match) { - return "" . $match[2] . ""; - }, - $text - ); + $text = preg_replace("(\[class=(.*?)\](.*?)\[\/class\])ism", '$2', $text); // handle nested lists $endlessloop = 0; @@ -1608,20 +1592,20 @@ class BBCode $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); - $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $Text); + $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $Text); // Simplify "video" element $text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text); - // Try to Oembed if ($try_oembed) { + // html5 video and audio $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", - '', $text); + '', $text); $text = preg_replace("/\[video\](.*?)\[\/video\]/ism", '$1', $text); - $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); + $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); @@ -1632,9 +1616,6 @@ class BBCode '$1', $text); } - // html5 video and audio - - if ($try_oembed) { $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $text); } else { @@ -1678,7 +1659,7 @@ class BBCode $text = OEmbed::BBCode2HTML($text); // Avoid triple linefeeds through oembed - $text = str_replace("


    ", "

    ", $text); + $text = str_replace("


    ", "

    ", $text); // If we found an event earlier, strip out all the event code and replace with a reformatted version. // Replace the event-start section with the entire formatted event. The other bbcode is stripped. @@ -1708,7 +1689,7 @@ class BBCode $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text)); // Emojis are always 4 byte Unicode characters if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) { - $text = '' . $text . ''; + $text = '' . $text . ''; } } @@ -1722,8 +1703,6 @@ class BBCode $text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text); } - $text = str_replace(["\r","\n"], ['
    ', '
    '], $text); - // Remove all hashtag addresses if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) { $text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text); @@ -1872,18 +1851,21 @@ class BBCode // Additionally, [pre] tags preserve spaces $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", function ($match) { - return str_replace(' ', ' ', $match[1]); + return str_replace(' ', ' ', htmlentities($match[1], ENT_NOQUOTES,'UTF-8')); }, $text); + // Add HTML new lines + $text = str_replace("\n", '
    ', $text); + return $text; }); // Escaped code $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism", function ($matches) { if (strpos($matches[2], "\n") !== false) { - $return = '
    ' . htmlspecialchars(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
    '; + $return = '
    ' . htmlentities(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
    '; } else { - $return = '' . htmlspecialchars($matches[2], ENT_NOQUOTES, 'UTF-8') . ''; + $return = '' . htmlentities($matches[2], ENT_NOQUOTES, 'UTF-8') . ''; } return $return; @@ -1891,37 +1873,20 @@ class BBCode $text ); - // Clean up the HTML by loading and saving the HTML with the DOM. - // Bad structured html can break a whole page. - // For performance reasons do it only with activated item cache or at export. - if (!$try_oembed || (get_itemcachepath() != '')) { - $doc = new DOMDocument(); - $doc->preserveWhiteSpace = false; + $config = \HTMLPurifier_HTML5Config::createDefault(); + $config->set('HTML.Doctype', 'HTML5'); + $config->set('Attr.AllowedRel', [ + 'noreferrer' => true, + 'noopener' => true, + ]); + $config->set('Attr.AllowedFrameTargets', [ + '_blank' => true, + ]); - $text = mb_convert_encoding($text, 'HTML-ENTITIES', "UTF-8"); + $HTMLPurifier = new \HTMLPurifier($config); + $text = $HTMLPurifier->purify($text); - $doctype = ''; - $encoding = ''; - @$doc->loadHTML($encoding . $doctype . '' . $text . ''); - $doc->encoding = 'UTF-8'; - $text = $doc->saveHTML(); - $text = str_replace(['', '', $doctype, $encoding], ['', '', '', ''], $text); - - $text = str_replace('
  • ', '', $text); - - //$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES'); - } - - // Clean up some useless linebreaks in lists - //$Text = str_replace('

    ', '', $Text); - //$Text = str_replace('
    ', '', $Text); - //$Text = str_replace('
  • ', '
  • ', $Text); - //$Text = str_replace('
    = "a") && ($char <= "z")) { - $cleaned .= $char; - } - - if (!(strpos(" #;:0123456789-_.%", $char) === false)) { - $cleaned .= $char; - } - } - - return $cleaned; - } - /** * Search all instances of a specific HTML tag node in the provided DOM document and replaces them with BBCode text nodes. * diff --git a/tests/src/Content/Text/BBCodeTest.php b/tests/src/Content/Text/BBCodeTest.php index 1a1e05f45..1769e6fd7 100644 --- a/tests/src/Content/Text/BBCodeTest.php +++ b/tests/src/Content/Text/BBCodeTest.php @@ -75,6 +75,18 @@ class BBCodeTest extends MockedTest $this->dice->shouldReceive('create') ->with(BaseURL::class) ->andReturn($baseUrlMock); + + $config = \HTMLPurifier_HTML5Config::createDefault(); + $config->set('HTML.Doctype', 'HTML5'); + $config->set('Attr.AllowedRel', [ + 'noreferrer' => true, + 'noopener' => true, + ]); + $config->set('Attr.AllowedFrameTargets', [ + '_blank' => true, + ]); + + $this->HTMLPurifier = new \HTMLPurifier($config); } public function dataLinks() @@ -171,7 +183,7 @@ class BBCodeTest extends MockedTest public function testAutoLinking(string $data, bool $assertHTML) { $output = BBCode::convert($data); - $assert = '' . $data . ''; + $assert = $this->HTMLPurifier->purify('' . $data . ''); if ($assertHTML) { self::assertEquals($assert, $output); } else { @@ -183,31 +195,31 @@ class BBCodeTest extends MockedTest { return [ 'bug-7271-condensed-space' => [ - 'expectedHtml' => '', + 'expectedHtml' => '', 'text' => '[ol][*] http://example.com/[/ol]', ], 'bug-7271-condensed-nospace' => [ - 'expectedHtml' => '', + 'expectedHtml' => '', 'text' => '[ol][*]http://example.com/[/ol]', ], 'bug-7271-indented-space' => [ - 'expectedHtml' => '', + 'expectedHtml' => '', 'text' => '[ul] [*] http://example.com/ [/ul]', ], 'bug-7271-indented-nospace' => [ - 'expectedHtml' => '', + 'expectedHtml' => '', 'text' => '[ul] [*]http://example.com/ [/ul]', ], 'bug-2199-named-size' => [ - 'expectedHtml' => 'Test text', + 'expectedHtml' => 'Test text', 'text' => '[size=xx-large]Test text[/size]', ], 'bug-2199-numeric-size' => [ - 'expectedHtml' => 'Test text', + 'expectedHtml' => 'Test text', 'text' => '[size=24]Test text[/size]', ], 'bug-2199-diaspora-no-named-size' => [ @@ -225,7 +237,7 @@ class BBCodeTest extends MockedTest 'simpleHtml' => 3, ], 'bug-7665-audio-tag' => [ - 'expectedHtml' => '', + 'expectedHtml' => '', 'text' => '[audio]http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3[/audio]', 'try_oembed' => true, ], @@ -246,9 +258,25 @@ class BBCodeTest extends MockedTest 'text' => '[test] Space', ], 'task-8800-pre-spaces' => [ - 'expectedHtml' => '    Spaces', + 'expectedHtml' => '    Spaces', 'text' => '[pre] Spaces[/pre]', ], + 'bug-9611-purify-xss-nobb' => [ + 'expectedHTML' => 'dare to move your mouse here', + 'text' => '[nobb]dare to move your mouse here[/nobb]' + ], + 'bug-9611-purify-xss-noparse' => [ + 'expectedHTML' => 'dare to move your mouse here', + 'text' => '[noparse]dare to move your mouse here[/noparse]' + ], + 'bug-9611-purify-xss-attributes' => [ + 'expectedHTML' => 'dare to move your mouse here', + 'text' => '[color="onmouseover=alert(0) style="]dare to move your mouse here[/color]' + ], + 'bug-9611-purify-attributes-correct' => [ + 'expectedHTML' => 'dare to move your mouse here', + 'text' => '[color=FFFFFF]dare to move your mouse here[/color]' + ], ]; } From a382798999c2b0ab6667d8d7c8e187b7b4c5b5b0 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 4 Dec 2020 07:28:18 -0500 Subject: [PATCH 3/5] Add some more result panels to Babel --- src/Module/Debug/Babel.php | 26 +++++++++++++++++++++++++- view/templates/babel.tpl | 4 +--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Module/Debug/Babel.php b/src/Module/Debug/Babel.php index 5b89c5301..e33f03214 100644 --- a/src/Module/Debug/Babel.php +++ b/src/Module/Debug/Babel.php @@ -49,7 +49,7 @@ class Babel extends BaseModule if (!empty($_REQUEST['text'])) { switch (($_REQUEST['type'] ?? '') ?: 'bbcode') { case 'bbcode': - $bbcode = trim($_REQUEST['text']); + $bbcode = $_REQUEST['text']; $results[] = [ 'title' => DI::l10n()->t('Source input'), 'content' => visible_whitespace($bbcode) @@ -67,6 +67,11 @@ class Babel extends BaseModule 'content' => visible_whitespace($html) ]; + $results[] = [ + 'title' => DI::l10n()->t('BBCode::convert (hex)'), + 'content' => visible_whitespace(bin2hex($html)), + ]; + $results[] = [ 'title' => DI::l10n()->t('BBCode::convert'), 'content' => $html @@ -178,6 +183,25 @@ class Babel extends BaseModule 'content' => $html ]; + $config = \HTMLPurifier_Config::createDefault(); + $HTMLPurifier = new \HTMLPurifier($config); + $purified = $HTMLPurifier->purify($html); + + $results[] = [ + 'title' => DI::l10n()->t('HTML Purified (raw)'), + 'content' => visible_whitespace($purified), + ]; + + $results[] = [ + 'title' => DI::l10n()->t('HTML Purified (hex)'), + 'content' => visible_whitespace(bin2hex($purified)), + ]; + + $results[] = [ + 'title' => DI::l10n()->t('HTML Purified'), + 'content' => $purified, + ]; + $bbcode = Text\HTML::toBBCode($html); $results[] = [ 'title' => DI::l10n()->t('HTML::toBBCode'), diff --git a/view/templates/babel.tpl b/view/templates/babel.tpl index 57d17fea9..9b5a3d62a 100644 --- a/view/templates/babel.tpl +++ b/view/templates/babel.tpl @@ -24,9 +24,7 @@

    {{$result.title}}

    -
    - {{$result.content nofilter}} -
    +
    {{$result.content nofilter}}
    {{/foreach}} From e71270630227a43811cb8005153666724af50dda Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 4 Dec 2020 07:29:08 -0500 Subject: [PATCH 4/5] Add BBCode versioning - This will trigger the re-conversion of displayed items on version update --- src/Content/Text/BBCode.php | 3 +++ src/Model/Item.php | 29 +++++++++++++++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 5f7727854..38171d295 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -49,6 +49,9 @@ use Friendica\Util\XML; class BBCode { + // Update this value to the current date whenever changes are made to BBCode::convert + const VERSION = '2020-12-03'; + const INTERNAL = 0; const API = 2; const DIASPORA = 3; diff --git a/src/Model/Item.php b/src/Model/Item.php index a8402a283..779f5b83c 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -3529,49 +3529,50 @@ class Item */ public static function putInCache(&$item, $update = false) { - $body = $item["body"]; + // Save original body to prevent addons to modify it + $body = $item['body']; $rendered_hash = $item['rendered-hash'] ?? ''; $rendered_html = $item['rendered-html'] ?? ''; if ($rendered_hash == '' - || $rendered_html == "" - || $rendered_hash != hash("md5", $item["body"]) - || DI::config()->get("system", "ignore_cache") + || $rendered_html == '' + || $rendered_hash != BBCode::VERSION . '::' . hash('md5', $body) + || DI::config()->get('system', 'ignore_cache') ) { self::addRedirToImageTags($item); - $item["rendered-html"] = BBCode::convert($item["body"]); - $item["rendered-hash"] = hash("md5", $item["body"]); + $item['rendered-html'] = BBCode::convert($item['body']); + $item['rendered-hash'] = hash('md5', $body); $hook_data = ['item' => $item, 'rendered-html' => $item['rendered-html'], 'rendered-hash' => $item['rendered-hash']]; Hook::callAll('put_item_in_cache', $hook_data); $item['rendered-html'] = $hook_data['rendered-html']; - $item['rendered-hash'] = $hook_data['rendered-hash']; + $item['rendered-hash'] = BBCode::VERSION . '::' . $hook_data['rendered-hash']; unset($hook_data); // Force an update if the generated values differ from the existing ones - if ($rendered_hash != $item["rendered-hash"]) { + if ($rendered_hash != $item['rendered-hash']) { $update = true; } // Only compare the HTML when we forcefully ignore the cache - if (DI::config()->get("system", "ignore_cache") && ($rendered_html != $item["rendered-html"])) { + if (DI::config()->get('system', 'ignore_cache') && ($rendered_html != $item['rendered-html'])) { $update = true; } - if ($update && !empty($item["id"])) { + if ($update && !empty($item['id'])) { self::update( [ - 'rendered-html' => $item["rendered-html"], - 'rendered-hash' => $item["rendered-hash"] + 'rendered-html' => $item['rendered-html'], + 'rendered-hash' => $item['rendered-hash'] ], - ['id' => $item["id"]] + ['id' => $item['id']] ); } } - $item["body"] = $body; + $item['body'] = $body; } /** From 1cc509c5ef5ff4dcb2564c3e964e688a52adf045 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 4 Dec 2020 07:55:48 -0500 Subject: [PATCH 5/5] Move BBCode version tagging inside of the hash to prevent item-content.rendered-hash overflow --- src/Model/Item.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Model/Item.php b/src/Model/Item.php index 779f5b83c..d41e84c5b 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -3537,18 +3537,18 @@ class Item if ($rendered_hash == '' || $rendered_html == '' - || $rendered_hash != BBCode::VERSION . '::' . hash('md5', $body) + || $rendered_hash != hash('md5', BBCode::VERSION . '::' . $body) || DI::config()->get('system', 'ignore_cache') ) { self::addRedirToImageTags($item); $item['rendered-html'] = BBCode::convert($item['body']); - $item['rendered-hash'] = hash('md5', $body); + $item['rendered-hash'] = hash('md5', BBCode::VERSION . '::' . $body); $hook_data = ['item' => $item, 'rendered-html' => $item['rendered-html'], 'rendered-hash' => $item['rendered-hash']]; Hook::callAll('put_item_in_cache', $hook_data); $item['rendered-html'] = $hook_data['rendered-html']; - $item['rendered-hash'] = BBCode::VERSION . '::' . $hook_data['rendered-hash']; + $item['rendered-hash'] = $hook_data['rendered-hash']; unset($hook_data); // Force an update if the generated values differ from the existing ones