Merge pull request #11884 from annando/language
Improved language detection
This commit is contained in:
commit
8f283985f0
2 changed files with 47 additions and 18 deletions
|
@ -919,8 +919,6 @@ class Item
|
||||||
|
|
||||||
$item['gravity'] = self::getGravity($item);
|
$item['gravity'] = self::getGravity($item);
|
||||||
|
|
||||||
$item['language'] = self::getLanguage($item);
|
|
||||||
|
|
||||||
$default = ['url' => $item['author-link'], 'name' => $item['author-name'],
|
$default = ['url' => $item['author-link'], 'name' => $item['author-name'],
|
||||||
'photo' => $item['author-avatar'], 'network' => $item['network']];
|
'photo' => $item['author-avatar'], 'network' => $item['network']];
|
||||||
$item['author-id'] = ($item['author-id'] ?? 0) ?: Contact::getIdForURL($item['author-link'], 0, null, $default);
|
$item['author-id'] = ($item['author-id'] ?? 0) ?: Contact::getIdForURL($item['author-link'], 0, null, $default);
|
||||||
|
@ -1108,6 +1106,8 @@ class Item
|
||||||
// Check for hashtags in the body and repair or add hashtag links
|
// Check for hashtags in the body and repair or add hashtag links
|
||||||
$item['body'] = self::setHashtags($item['body']);
|
$item['body'] = self::setHashtags($item['body']);
|
||||||
|
|
||||||
|
$item['language'] = self::getLanguage($item);
|
||||||
|
|
||||||
$notify_type = Delivery::POST;
|
$notify_type = Delivery::POST;
|
||||||
|
|
||||||
// Filling item related side tables
|
// Filling item related side tables
|
||||||
|
@ -1869,6 +1869,8 @@ class Item
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$naked_body = self::getDominantLanguage($naked_body);
|
||||||
|
|
||||||
$availableLanguages = DI::l10n()->getAvailableLanguages();
|
$availableLanguages = DI::l10n()->getAvailableLanguages();
|
||||||
// See https://github.com/friendica/friendica/issues/10511
|
// See https://github.com/friendica/friendica/issues/10511
|
||||||
// Persian is manually added to language detection until a persian translation is provided for the interface, at
|
// Persian is manually added to language detection until a persian translation is provided for the interface, at
|
||||||
|
@ -1884,6 +1886,33 @@ class Item
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if latin or non latin are dominant in the body and only return the dominant one
|
||||||
|
*
|
||||||
|
* @param string $body
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private static function getDominantLanguage(string $body): string
|
||||||
|
{
|
||||||
|
$latin = '';
|
||||||
|
$non_latin = '';
|
||||||
|
for ($i = 0; $i < mb_strlen($body); $i++) {
|
||||||
|
$character = mb_substr($body, $i, 1);
|
||||||
|
$ord = mb_ord($character);
|
||||||
|
|
||||||
|
// We add the most common characters to both strings.
|
||||||
|
if (($ord <= 64) || ($ord >= 91 && $ord <= 96) || ($ord >= 123 && $ord <= 191) || in_array($ord, [215, 247]) || ($ord >= 697 && $ord <= 735) || ($ord > 65535)) {
|
||||||
|
$latin .= $character;
|
||||||
|
$non_latin .= $character;
|
||||||
|
} elseif ($ord < 768) {
|
||||||
|
$latin .= $character;
|
||||||
|
} else {
|
||||||
|
$non_latin .= $character;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (mb_strlen($latin) > mb_strlen($non_latin)) ? $latin : $non_latin;
|
||||||
|
}
|
||||||
|
|
||||||
public static function getLanguageMessage(array $item): string
|
public static function getLanguageMessage(array $item): string
|
||||||
{
|
{
|
||||||
$iso639 = new \Matriphe\ISO639\ISO639;
|
$iso639 = new \Matriphe\ISO639\ISO639;
|
||||||
|
|
|
@ -1411,7 +1411,7 @@ msgstr ""
|
||||||
msgid "Friend Suggestions"
|
msgid "Friend Suggestions"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: mod/tagger.php:78 src/Content/Item.php:297 src/Model/Item.php:2785
|
#: mod/tagger.php:78 src/Content/Item.php:297 src/Model/Item.php:2814
|
||||||
msgid "photo"
|
msgid "photo"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
@ -2292,7 +2292,7 @@ msgstr ""
|
||||||
msgid "show more"
|
msgid "show more"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Content/Item.php:288 src/Model/Item.php:2783
|
#: src/Content/Item.php:288 src/Model/Item.php:2812
|
||||||
msgid "event"
|
msgid "event"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
@ -2639,8 +2639,8 @@ msgid ""
|
||||||
"<a href=\"%1$s\" target=\"_blank\" rel=\"noopener noreferrer\">%2$s</a> %3$s"
|
"<a href=\"%1$s\" target=\"_blank\" rel=\"noopener noreferrer\">%2$s</a> %3$s"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Content/Text/BBCode.php:1213 src/Model/Item.php:3359
|
#: src/Content/Text/BBCode.php:1213 src/Model/Item.php:3388
|
||||||
#: src/Model/Item.php:3365 src/Model/Item.php:3366
|
#: src/Model/Item.php:3394 src/Model/Item.php:3395
|
||||||
msgid "Link to source"
|
msgid "Link to source"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
@ -3782,66 +3782,66 @@ msgstr ""
|
||||||
msgid "Edit groups"
|
msgid "Edit groups"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:1895
|
#: src/Model/Item.php:1924
|
||||||
#, php-format
|
#, php-format
|
||||||
msgid "Detected languages in this post:\\n%s"
|
msgid "Detected languages in this post:\\n%s"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:2787
|
#: src/Model/Item.php:2816
|
||||||
msgid "activity"
|
msgid "activity"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:2789
|
#: src/Model/Item.php:2818
|
||||||
msgid "comment"
|
msgid "comment"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:2792
|
#: src/Model/Item.php:2821
|
||||||
msgid "post"
|
msgid "post"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:2908
|
#: src/Model/Item.php:2937
|
||||||
#, php-format
|
#, php-format
|
||||||
msgid "Content warning: %s"
|
msgid "Content warning: %s"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:3271
|
#: src/Model/Item.php:3300
|
||||||
msgid "bytes"
|
msgid "bytes"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:3302
|
#: src/Model/Item.php:3331
|
||||||
#, php-format
|
#, php-format
|
||||||
msgid "%2$s (%3$d%%, %1$d vote)"
|
msgid "%2$s (%3$d%%, %1$d vote)"
|
||||||
msgid_plural "%2$s (%3$d%%, %1$d votes)"
|
msgid_plural "%2$s (%3$d%%, %1$d votes)"
|
||||||
msgstr[0] ""
|
msgstr[0] ""
|
||||||
msgstr[1] ""
|
msgstr[1] ""
|
||||||
|
|
||||||
#: src/Model/Item.php:3304
|
#: src/Model/Item.php:3333
|
||||||
#, php-format
|
#, php-format
|
||||||
msgid "%2$s (%1$d vote)"
|
msgid "%2$s (%1$d vote)"
|
||||||
msgid_plural "%2$s (%1$d votes)"
|
msgid_plural "%2$s (%1$d votes)"
|
||||||
msgstr[0] ""
|
msgstr[0] ""
|
||||||
msgstr[1] ""
|
msgstr[1] ""
|
||||||
|
|
||||||
#: src/Model/Item.php:3309
|
#: src/Model/Item.php:3338
|
||||||
#, php-format
|
#, php-format
|
||||||
msgid "%d voter. Poll end: %s"
|
msgid "%d voter. Poll end: %s"
|
||||||
msgid_plural "%d voters. Poll end: %s"
|
msgid_plural "%d voters. Poll end: %s"
|
||||||
msgstr[0] ""
|
msgstr[0] ""
|
||||||
msgstr[1] ""
|
msgstr[1] ""
|
||||||
|
|
||||||
#: src/Model/Item.php:3311
|
#: src/Model/Item.php:3340
|
||||||
#, php-format
|
#, php-format
|
||||||
msgid "%d voter."
|
msgid "%d voter."
|
||||||
msgid_plural "%d voters."
|
msgid_plural "%d voters."
|
||||||
msgstr[0] ""
|
msgstr[0] ""
|
||||||
msgstr[1] ""
|
msgstr[1] ""
|
||||||
|
|
||||||
#: src/Model/Item.php:3313
|
#: src/Model/Item.php:3342
|
||||||
#, php-format
|
#, php-format
|
||||||
msgid "Poll end: %s"
|
msgid "Poll end: %s"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: src/Model/Item.php:3347 src/Model/Item.php:3348
|
#: src/Model/Item.php:3376 src/Model/Item.php:3377
|
||||||
msgid "View on separate page"
|
msgid "View on separate page"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue