Merge pull request #13483 from annando/languages

Additional languages / new hook for additional language detection
This commit is contained in:
Hypolite Petovan 2023-10-01 02:50:55 -04:00 committed by GitHub
commit 6503016676
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 52 additions and 8 deletions

View file

@ -221,6 +221,13 @@ Please note: body contents are bbcode - not HTML
Called when receiving a post from another source. This may also be used to post local activity or system generated messages. Called when receiving a post from another source. This may also be used to post local activity or system generated messages.
`$b` is the item array of information to be stored in the database and the item body is bbcode. `$b` is the item array of information to be stored in the database and the item body is bbcode.
### detect_languages
Called after the language detection. This can be used for alternative language detection methods.
`$data` is an array:
- **text**: The text that is analyzed.
- **detected**: (input/output) Array of language codes detected in the related text.
### addon_settings ### addon_settings
Called when generating the HTML for the addon settings page. Called when generating the HTML for the addon settings page.
`$data` is an array containing: `$data` is an array containing:
@ -800,6 +807,7 @@ Here is a complete list of all hook callbacks with file locations (as of 24-Sep-
### src/Model/Item.php ### src/Model/Item.php
Hook::callAll('detect_languages', $item);
Hook::callAll('post_local', $item); Hook::callAll('post_local', $item);
Hook::callAll('post_remote', $item); Hook::callAll('post_remote', $item);
Hook::callAll('post_local_end', $posted_item); Hook::callAll('post_local_end', $posted_item);

View file

@ -103,6 +103,13 @@ Derzeitige Hooks
$b ist das Item-Array einer Information, die in der Datenbank und im Item gespeichert ist. $b ist das Item-Array einer Information, die in der Datenbank und im Item gespeichert ist.
{Bitte beachte: der Seiteninhalt ist bbcode - nicht HTML) {Bitte beachte: der Seiteninhalt ist bbcode - nicht HTML)
**'detect_languages'**
Wird nach der Sprachenerkennung aufgerufen.
Dieser Hook kann dafür verwendet werden, alternative Erkennungsfunktionen einzubinden.
`$data` ist ein Array:
'text' => Der analysierte Text.
'detected' => (Eingabe/Ausgabe) Das Array mit den erkannten Sprachen.
**'addon_settings'** - wird aufgerufen, wenn die HTML-Ausgabe der Addon-Einstellungsseite generiert wird. **'addon_settings'** - wird aufgerufen, wenn die HTML-Ausgabe der Addon-Einstellungsseite generiert wird.
$b ist die HTML-Ausgabe (String) der Addon-Einstellungsseite vor dem finalen "</form>"-Tag. $b ist die HTML-Ausgabe (String) der Addon-Einstellungsseite vor dem finalen "</form>"-Tag.
@ -316,6 +323,7 @@ Eine komplette Liste aller Hook-Callbacks mit den zugehörigen Dateien (am 01-Ap
### src/Model/Item.php ### src/Model/Item.php
Hook::callAll('detect_languages', $item);
Hook::callAll('post_local', $item); Hook::callAll('post_local', $item);
Hook::callAll('post_remote', $item); Hook::callAll('post_remote', $item);
Hook::callAll('post_local_end', $posted_item); Hook::callAll('post_local_end', $posted_item);

View file

@ -397,13 +397,31 @@ class L10n
// See https://github.com/friendica/friendica/issues/10511 // See https://github.com/friendica/friendica/issues/10511
// Persian is manually added to language detection until a persian translation is provided for the interface, at // Persian is manually added to language detection until a persian translation is provided for the interface, at
// which point it will be automatically available through `getAvailableLanguages()` and this should be removed. // which point it will be automatically available through `getAvailableLanguages()` and this should be removed.
// Additionally Portuguese, Ukrainian, traditional Chinese and Welsh are added to that list. // Additionally some more languages are added to that list that are used in the Fediverse.
$additional_langs = [ $additional_langs = [
'af' => 'Afrikaans',
'cy' => 'Cymraeg', 'cy' => 'Cymraeg',
'uk' => 'Українська', 'el-monoton' => 'Ελληνικά',
'eu' => 'euskara',
'fa' => 'فارسی',
'gl' => 'Galego',
'hi' => 'हिन्दी',
'hr' => 'Hrvatski',
'id' => 'bahasa Indonesia',
'ko' => '한국인',
'lt' => 'lietuvių',
'lv' => 'latviešu',
'sk' => 'slovenský',
'sl' => 'Slovenščina',
'sw' => 'Kiswahili',
'th' => 'แบบไทย',
'tl' => 'Wikang Tagalog',
'tr' => 'Türkçe',
'pt-PT' => 'Português', 'pt-PT' => 'Português',
'uk' => 'Українська',
'uz' => 'Ўзбек',
'vi' => 'Tiếng Việt',
'zh-hant' => '繁體', 'zh-hant' => '繁體',
'fa' => 'فارسی'
]; ];
$langs = array_merge($additional_langs, $langs); $langs = array_merge($additional_langs, $langs);
ksort($langs); ksort($langs);

View file

@ -2017,7 +2017,7 @@ class Item
$naked_body = BBCode::toPlaintext($naked_body); $naked_body = BBCode::toPlaintext($naked_body);
// Remove possibly remaining links // Remove possibly remaining links
$naked_body = preg_replace(Strings::autoLinkRegEx(), '', $naked_body); $naked_body = trim(preg_replace(Strings::autoLinkRegEx(), '', $naked_body));
if (empty($naked_body)) { if (empty($naked_body)) {
return []; return [];
@ -2029,7 +2029,17 @@ class Item
$availableLanguages = DI::l10n()->convertForLanguageDetection($availableLanguages); $availableLanguages = DI::l10n()->convertForLanguageDetection($availableLanguages);
$ld = new Language(array_keys($availableLanguages)); $ld = new Language(array_keys($availableLanguages));
return $ld->detect($naked_body)->limit(0, $count)->close() ?: []; $languages = $ld->detect($naked_body)->limit(0, $count)->close() ?: [];
$data = [
'text' => $naked_body,
'detected' => $languages,
];
Hook::callAll('detect_languages', $data);
$languages = $data['detected'];
return $languages;
} }
/** /**