Merge pull request #13384 from annando/smilies

Posts without text or only with emojis are now always accepted in the language check
This commit is contained in:
Hypolite Petovan 2023-10-14 12:46:49 -04:00 committed by GitHub
commit a0da13cf6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 108 additions and 6 deletions

View file

@ -285,4 +285,33 @@ class Smilies
return str_replace($matches[0], $t, $matches[0]);
}
/**
* Checks if the body doesn't contain any alphanumeric characters
*
* @param string $body Possibly-HTML post body
* @return boolean
*/
public static function isEmojiPost(string $body): bool
{
// Strips all whitespace
$conv = preg_replace('#\s#u', '', html_entity_decode($body));
if (empty($conv)) {
return false;
}
if (!class_exists('IntlChar')) {
// Most Emojis are 4 byte Unicode characters, so this is a good workaround, when IntlChar does not exist on the system
return strlen($conv) / mb_strlen($conv) == 4;
}
for ($i = 0; $i < mb_strlen($conv); $i++) {
$character = mb_substr($conv, $i, 1);
if (\IntlChar::isalnum($character) || \IntlChar::ispunct($character) || \IntlChar::isgraph($character) && (strlen($character) <= 2)) {
return false;
}
}
return true;
}
}

View file

@ -1791,13 +1791,9 @@ class BBCode
$text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text);
}
if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA)) {
$conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text));
// Emojis are always 4 byte Unicode characters
if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) {
if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA) && Smilies::isEmojiPost($text)) {
$text = '<span style="font-size: xx-large; line-height: normal;">' . $text . '</span>';
}
}
// Handle mentions and hashtag links
if ($simple_html == self::DIASPORA) {

View file

@ -21,6 +21,7 @@
namespace Friendica\Protocol;
use Friendica\Content\Smilies;
use Friendica\Content\Text\BBCode;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
@ -172,6 +173,11 @@ class Relay
*/
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0)
{
if (empty($body) || Smilies::isEmojiPost($body)) {
Logger::debug('Empty body or only emojis', ['body' => $body]);
return true;
}
$languages = [];
foreach (Item::getLanguageArray($body, 10, $uri_id, $author_id) as $language => $reliability) {
if ($reliability > 0) {

View file

@ -72,4 +72,75 @@ class SmiliesTest extends FixtureTest
$output = Smilies::replaceFromArray($text, $smilies);
self::assertEquals($expected, $output);
}
public function dataIsEmojiPost(): array
{
return [
'emoji' => [
'expected' => true,
'body' => '👀',
],
'emojis' => [
'expected' => true,
'body' => '👀🤷',
],
'emoji+whitespace' => [
'expected' => true,
'body' => ' 👀 ',
],
'empty' => [
'expected' => false,
'body' => '',
],
'whitespace' => [
'expected' => false,
'body' => '
',
],
'emoji+ASCII' => [
'expected' => false,
'body' => '🤷a',
],
'HTML entity whitespace' => [
'expected' => false,
'body' => '&nbsp;',
],
'HTML entity else' => [
'expected' => false,
'body' => '&deg;',
],
'emojis+HTML whitespace' => [
'expected' => true,
'body' => '👀&nbsp;🤷',
],
'emojis+HTML else' => [
'expected' => false,
'body' => '👀&lt;🤷',
],
'zwj' => [
'expected' => true,
'body' => '👨‍👨‍👧‍',
],
'zwj+whitespace' => [
'expected' => true,
'body' => ' 👨‍👨‍👧‍ ',
],
'zwj+HTML whitespace' => [
'expected' => true,
'body' => '&nbsp;👨‍👨‍👧‍&nbsp;',
],
];
}
/**
* @dataProvider dataIsEmojiPost
*
* @param bool $expected
* @param string $body
* @return void
*/
public function testIsEmojiPost(bool $expected, string $body)
{
$this->assertEquals($expected, Smilies::isEmojiPost($body));
}
}