Merge pull request #13611 from annando/languages
Use the post language for the language detection / config for quality
This commit is contained in:
commit
58e5f0d9c5
6 changed files with 101 additions and 25 deletions
|
@ -117,7 +117,7 @@ class Item
|
||||||
const DELIVER_FIELDLIST = [
|
const DELIVER_FIELDLIST = [
|
||||||
'uid', 'id', 'parent', 'uri-id', 'uri', 'thr-parent', 'parent-uri', 'guid',
|
'uid', 'id', 'parent', 'uri-id', 'uri', 'thr-parent', 'parent-uri', 'guid',
|
||||||
'parent-guid', 'conversation', 'received', 'created', 'edited', 'verb', 'object-type', 'object', 'target',
|
'parent-guid', 'conversation', 'received', 'created', 'edited', 'verb', 'object-type', 'object', 'target',
|
||||||
'private', 'title', 'body', 'raw-body', 'location', 'coord', 'app',
|
'private', 'title', 'body', 'raw-body', 'language', 'location', 'coord', 'app',
|
||||||
'inform', 'deleted', 'extid', 'post-type', 'post-reason', 'gravity',
|
'inform', 'deleted', 'extid', 'post-type', 'post-reason', 'gravity',
|
||||||
'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid',
|
'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid',
|
||||||
'author-id', 'author-addr', 'author-link', 'author-name', 'author-avatar', 'owner-id', 'owner-link', 'contact-uid',
|
'author-id', 'author-addr', 'author-link', 'author-name', 'author-avatar', 'owner-id', 'owner-link', 'contact-uid',
|
||||||
|
@ -1484,6 +1484,10 @@ class Item
|
||||||
*/
|
*/
|
||||||
private static function setOwnerforResharedItem(array $item)
|
private static function setOwnerforResharedItem(array $item)
|
||||||
{
|
{
|
||||||
|
if ($item['uid'] == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$parent = Post::selectFirst(
|
$parent = Post::selectFirst(
|
||||||
['id', 'causer-id', 'owner-id', 'author-id', 'author-link', 'origin', 'post-reason'],
|
['id', 'causer-id', 'owner-id', 'author-id', 'author-link', 'origin', 'post-reason'],
|
||||||
['uri-id' => $item['thr-parent-id'], 'uid' => $item['uid']]
|
['uri-id' => $item['thr-parent-id'], 'uid' => $item['uid']]
|
||||||
|
|
|
@ -586,7 +586,14 @@ class User
|
||||||
$languages = [];
|
$languages = [];
|
||||||
$uids = [];
|
$uids = [];
|
||||||
|
|
||||||
$users = DBA::select('user', ['uid', 'language'], ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `uid` > ?", 0]);
|
$condition = ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `uid` > ?", 0];
|
||||||
|
|
||||||
|
$abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
|
||||||
|
if (!empty($abandon_days)) {
|
||||||
|
$condition = DBA::mergeConditions($condition, ["`last-activity` > ?", DateTimeFormat::utc('now - ' . $abandon_days . ' days')]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$users = DBA::select('user', ['uid', 'language'], $condition);
|
||||||
while ($user = DBA::fetch($users)) {
|
while ($user = DBA::fetch($users)) {
|
||||||
$uids[] = $user['uid'];
|
$uids[] = $user['uid'];
|
||||||
$code = DI::l10n()->toISO6391($user['language']);
|
$code = DI::l10n()->toISO6391($user['language']);
|
||||||
|
@ -612,6 +619,7 @@ class User
|
||||||
}
|
}
|
||||||
DBA::close($channels);
|
DBA::close($channels);
|
||||||
|
|
||||||
|
ksort($languages);
|
||||||
return array_keys($languages);
|
return array_keys($languages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1673,7 +1673,39 @@ class Processor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0);
|
$languages = self::getPostLanguages($activity);
|
||||||
|
|
||||||
|
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0, $languages);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch the post language from the content
|
||||||
|
*
|
||||||
|
* @param array $activity
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
private static function getPostLanguages(array $activity): array
|
||||||
|
{
|
||||||
|
$content = JsonLD::fetchElement($activity['as:object'], 'as:content') ?? '';
|
||||||
|
$languages = JsonLD::fetchElementArray($activity['as:object'], 'as:content', '@language') ?? [];
|
||||||
|
if (empty($languages)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$iso639 = new \Matriphe\ISO639\ISO639;
|
||||||
|
|
||||||
|
$result = [];
|
||||||
|
foreach ($languages as $language) {
|
||||||
|
if ($language == $content) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$language = DI::l10n()->toISO6391($language);
|
||||||
|
if (!in_array($language, array_column($iso639->allLanguages(), 0))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$result[] = $language;
|
||||||
|
}
|
||||||
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -895,6 +895,19 @@ class Transmitter
|
||||||
*/
|
*/
|
||||||
public static function getReceiversForUriId(int $uri_id, bool $blindcopy)
|
public static function getReceiversForUriId(int $uri_id, bool $blindcopy)
|
||||||
{
|
{
|
||||||
|
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
|
||||||
|
if (empty($tags)) {
|
||||||
|
Logger::debug('No receivers found', ['uri-id' => $uri_id]);
|
||||||
|
$post = Post::selectFirst([Item::DELIVER_FIELDLIST], ['uri-id' => $uri_id, 'origin' => true]);
|
||||||
|
if (!empty($post)) {
|
||||||
|
ActivityPub\Transmitter::storeReceiversForItem($post);
|
||||||
|
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
|
||||||
|
Logger::debug('Receivers are created', ['uri-id' => $uri_id, 'receivers' => count($tags)]);
|
||||||
|
} else {
|
||||||
|
Logger::debug('Origin item not found', ['uri-id' => $uri_id]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$receivers = [
|
$receivers = [
|
||||||
'to' => [],
|
'to' => [],
|
||||||
'cc' => [],
|
'cc' => [],
|
||||||
|
@ -902,7 +915,7 @@ class Transmitter
|
||||||
'audience' => [],
|
'audience' => [],
|
||||||
];
|
];
|
||||||
|
|
||||||
foreach (Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]) as $receiver) {
|
foreach ($tags as $receiver) {
|
||||||
switch ($receiver['type']) {
|
switch ($receiver['type']) {
|
||||||
case Tag::TO:
|
case Tag::TO:
|
||||||
$receivers['to'][] = $receiver['url'];
|
$receivers['to'][] = $receiver['url'];
|
||||||
|
@ -1884,7 +1897,7 @@ class Transmitter
|
||||||
if (!empty($item['language'])) {
|
if (!empty($item['language'])) {
|
||||||
$languages = array_keys(json_decode($item['language'], true));
|
$languages = array_keys(json_decode($item['language'], true));
|
||||||
if (!empty($languages[0])) {
|
if (!empty($languages[0])) {
|
||||||
return $languages[0];
|
return DI::l10n()->toISO6391($languages[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1892,12 +1905,12 @@ class Transmitter
|
||||||
if (!empty($item['uid'])) {
|
if (!empty($item['uid'])) {
|
||||||
$user = DBA::selectFirst('user', ['language'], ['uid' => $item['uid']]);
|
$user = DBA::selectFirst('user', ['language'], ['uid' => $item['uid']]);
|
||||||
if (!empty($user['language'])) {
|
if (!empty($user['language'])) {
|
||||||
return $user['language'];
|
return DI::l10n()->toISO6391($user['language']);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// And finally just use the system language
|
// And finally just use the system language
|
||||||
return DI::config()->get('system', 'language');
|
return DI::l10n()->toISO6391(DI::config()->get('system', 'language'));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -53,13 +53,16 @@ class Relay
|
||||||
/**
|
/**
|
||||||
* Check if a post is wanted
|
* Check if a post is wanted
|
||||||
*
|
*
|
||||||
* @param array $tags
|
* @param array $tags
|
||||||
* @param string $body
|
* @param string $body
|
||||||
* @param int $authorid
|
* @param int $authorid
|
||||||
* @param string $url
|
* @param string $url
|
||||||
|
* @param string $network
|
||||||
|
* @param int $causerid
|
||||||
|
* @param array $languages
|
||||||
* @return boolean "true" is the post is wanted by the system
|
* @return boolean "true" is the post is wanted by the system
|
||||||
*/
|
*/
|
||||||
public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0): bool
|
public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0, array $languages = []): bool
|
||||||
{
|
{
|
||||||
$config = DI::config();
|
$config = DI::config();
|
||||||
|
|
||||||
|
@ -128,7 +131,7 @@ class Relay
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!self::isWantedLanguage($body, 0, $authorid)) {
|
if (!self::isWantedLanguage($body, 0, $authorid, $languages)) {
|
||||||
Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]);
|
Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -171,37 +174,45 @@ class Relay
|
||||||
* @param string $body
|
* @param string $body
|
||||||
* @param int $uri_id
|
* @param int $uri_id
|
||||||
* @param int $author_id
|
* @param int $author_id
|
||||||
|
* @param array $languages
|
||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0)
|
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0, array $languages = [])
|
||||||
{
|
{
|
||||||
if (empty($body) || Smilies::isEmojiPost($body)) {
|
$detected = [];
|
||||||
|
$quality = DI::config()->get('system', 'relay_language_quality');
|
||||||
|
foreach (Item::getLanguageArray($body, DI::config()->get('system', 'relay_languages'), $uri_id, $author_id) as $language => $reliability) {
|
||||||
|
if (($reliability >= $quality) && ($quality > 0)) {
|
||||||
|
$detected[] = $language;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($languages) && empty($detected) && (empty($body) || Smilies::isEmojiPost($body))) {
|
||||||
Logger::debug('Empty body or only emojis', ['body' => $body]);
|
Logger::debug('Empty body or only emojis', ['body' => $body]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
$languages = [];
|
if (!empty($languages) || !empty($detected)) {
|
||||||
foreach (Item::getLanguageArray($body, 10, $uri_id, $author_id) as $language => $reliability) {
|
|
||||||
if ($reliability > 0) {
|
|
||||||
$languages[] = $language;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($languages)) {
|
|
||||||
$cachekey = 'relay:isWantedLanguage';
|
$cachekey = 'relay:isWantedLanguage';
|
||||||
$user_languages = DI::cache()->get($cachekey);
|
$user_languages = DI::cache()->get($cachekey);
|
||||||
if (is_null($user_languages)) {
|
if (is_null($user_languages)) {
|
||||||
$user_languages = User::getLanguages();
|
$user_languages = User::getLanguages();
|
||||||
DI::cache()->set($cachekey, $user_languages, Duration::HALF_HOUR);
|
DI::cache()->set($cachekey, $user_languages);
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($languages as $language) {
|
foreach ($detected as $language) {
|
||||||
if (in_array($language, $user_languages)) {
|
if (in_array($language, $user_languages)) {
|
||||||
Logger::debug('Wanted language found', ['language' => $language, 'languages' => $languages, 'userlang' => $user_languages, 'body' => $body]);
|
Logger::debug('Wanted language found in detected languages', ['language' => $language, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Logger::debug('No wanted language found', ['languages' => $languages, 'userlang' => $user_languages, 'body' => $body]);
|
foreach ($languages as $language) {
|
||||||
|
if (in_array($language, $user_languages)) {
|
||||||
|
Logger::debug('Wanted language found in defined languages', ['language' => $language, 'languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Logger::debug('No wanted language found', ['languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
|
||||||
return false;
|
return false;
|
||||||
} elseif (DI::config()->get('system', 'relay_deny_undetected_language')) {
|
} elseif (DI::config()->get('system', 'relay_deny_undetected_language')) {
|
||||||
Logger::info('Undetected language found', ['body' => $body]);
|
Logger::info('Undetected language found', ['body' => $body]);
|
||||||
|
|
|
@ -562,6 +562,14 @@ return [
|
||||||
// Deny undetected languages
|
// Deny undetected languages
|
||||||
'relay_deny_undetected_language' => false,
|
'relay_deny_undetected_language' => false,
|
||||||
|
|
||||||
|
// relay_language_quality (Float)
|
||||||
|
// Minimum value for the language detection quality for relay posts. The value must be between 0 and 1.
|
||||||
|
'relay_language_quality' => 0,
|
||||||
|
|
||||||
|
// relay_languages (Integer)
|
||||||
|
// Number of languages that are used per post to check for acceptable posts.
|
||||||
|
'relay_languages' => 10,
|
||||||
|
|
||||||
// session_handler (database|cache|native)
|
// session_handler (database|cache|native)
|
||||||
// Whether to use Cache to store session data or to use PHP native session storage.
|
// Whether to use Cache to store session data or to use PHP native session storage.
|
||||||
'session_handler' => 'database',
|
'session_handler' => 'database',
|
||||||
|
|
Loading…
Reference in a new issue