Check more fields for the language

This commit is contained in:
Michael 2023-10-08 06:44:37 +00:00
parent 8968c63fcc
commit 0c82974986
4 changed files with 23 additions and 20 deletions

View File

@ -1212,8 +1212,6 @@ class Item
// Check for hashtags in the body and repair or add hashtag links
$item['body'] = self::setHashtags($item['body']);
$item['language'] = self::getLanguage($item);
$notify_type = Delivery::POST;
// Filling item related side tables
@ -1262,6 +1260,8 @@ class Item
}
}
$item['language'] = self::getLanguage($item);
$inserted = Post::insert($item['uri-id'], $item);
if ($item['gravity'] == self::GRAVITY_PARENT) {
@ -1991,7 +1991,7 @@ class Item
return '';
}
$languages = self::getLanguageArray(trim($item['title'] . "\n" . $item['body']), 3, $item['uri-id'], $item['author-id']);
$languages = self::getLanguageArray($item['title'] . ' ' . ($item['content-warning'] ?? '') . ' ' . $item['body'], 3, $item['uri-id'], $item['author-id']);
if (empty($languages)) {
return '';
}

View File

@ -158,18 +158,7 @@ class Engagement
$body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body'];
$body = preg_replace("~\[url\=.*\]https?:.*\[\/url\]~", '', $body);
$body = Post\Media::addAttachmentsToBody($item['uri-id'], $body, [Post\Media::IMAGE]);
$text = BBCode::toPlaintext($body, false);
$text = preg_replace(Strings::autoLinkRegEx(), '', $text);
do {
$oldtext = $text;
$text = str_replace([' ', "\n", "\r"], ' ', $text);
} while ($oldtext != $text);
return $text;
return BBCode::toSearchText($body, $item['uri-id']);
}
private static function getMediaType(int $uri_id): int

View File

@ -1652,7 +1652,19 @@ class Processor
$attributed_to = JsonLD::fetchElement($activity['as:object'], 'as:attributedTo', '@id');
$authorid = Contact::getIdForURL($attributed_to);
$body = HTML::toBBCode(JsonLD::fetchElement($activity['as:object'], 'as:content', '@value') ?? '');
$content = JsonLD::fetchElement($activity['as:object'], 'as:name', '@value') ?? '';
$content .= ' ' . JsonLD::fetchElement($activity['as:object'], 'as:summary', '@value') ?? '';
$content .= ' ' . HTML::toBBCode(JsonLD::fetchElement($activity['as:object'], 'as:content', '@value') ?? '');
$attachments = JsonLD::fetchElementArray($activity['as:object'], 'as:attachment') ?? [];
foreach ($attachments as $media) {
if (!empty($media['as:summary'])) {
$content .= ' ' . JsonLD::fetchElement($media, 'as:summary', '@value');
}
if (!empty($media['as:name'])) {
$content .= ' ' . JsonLD::fetchElement($media, 'as:name', '@value');
}
}
$messageTags = [];
$tags = Receiver::processTags(JsonLD::fetchElementArray($activity['as:object'], 'as:tag') ?? []);
@ -1665,7 +1677,7 @@ class Processor
}
}
return Relay::isSolicitedPost($messageTags, $body, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0);
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0);
}
/**

View File

@ -125,7 +125,7 @@ class Relay
}
}
if (!self::isWantedLanguage($body)) {
if (!self::isWantedLanguage($body, 0, $authorid)) {
Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]);
return false;
}
@ -166,12 +166,14 @@ class Relay
* Detect the language of a post and decide if the post should be accepted
*
* @param string $body
* @param int $uri_id
* @param int $author_id
* @return boolean
*/
public static function isWantedLanguage(string $body)
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0)
{
$languages = [];
foreach (Item::getLanguageArray($body, 10) as $language => $reliability) {
foreach (Item::getLanguageArray($body, 10, $uri_id, $author_id) as $language => $reliability) {
if ($reliability > 0) {
$languages[] = $language;
}