We now store the tags in two separate tables
This commit is contained in:
parent
d3f4e4d629
commit
db657b0149
8 changed files with 121 additions and 98 deletions
|
@ -141,13 +141,14 @@ function query_page_info($url, $photo = "", $keywords = false, $keyword_blacklis
|
|||
return $data;
|
||||
}
|
||||
|
||||
function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "", $return_array = false)
|
||||
function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
|
||||
{
|
||||
$data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
|
||||
if (empty($data['keywords']) || !is_array($data['keywords'])) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$tags = "";
|
||||
$taglist = [];
|
||||
if (isset($data["keywords"]) && count($data["keywords"])) {
|
||||
foreach ($data["keywords"] as $keyword) {
|
||||
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
|
||||
["", "", "", "", "", ""], $keyword);
|
||||
|
@ -157,15 +158,27 @@ function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blackl
|
|||
}
|
||||
|
||||
$tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]";
|
||||
$taglist[] = $hashtag;
|
||||
}
|
||||
}
|
||||
|
||||
if ($return_array) {
|
||||
return $taglist;
|
||||
} else {
|
||||
return $tags;
|
||||
}
|
||||
|
||||
function get_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
|
||||
{
|
||||
$data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
|
||||
if (empty($data['keywords']) || !is_array($data['keywords'])) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$taglist = [];
|
||||
foreach ($data['keywords'] as $keyword) {
|
||||
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
|
||||
["", "", "", "", "", ""], $keyword);
|
||||
|
||||
$taglist[] = $hashtag;
|
||||
}
|
||||
|
||||
return $taglist;
|
||||
}
|
||||
|
||||
function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_blacklist = "")
|
||||
|
|
|
@ -54,6 +54,54 @@ class Tag
|
|||
self::EXCLUSIVE_MENTION => '!',
|
||||
];
|
||||
|
||||
public static function store(int $uriid, int $type, string $name, string $url = '')
|
||||
{
|
||||
$name = trim($name, "\x00..\x20\xFF#!@");
|
||||
if (empty($name)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$fields = ['name' => substr($name, 0, 64), 'type' => $type];
|
||||
|
||||
if (!empty($url) && ($url != $name)) {
|
||||
$fields['url'] = strtolower($url);
|
||||
}
|
||||
|
||||
$tag = DBA::selectFirst('tag', ['id'], $fields);
|
||||
if (!DBA::isResult($tag)) {
|
||||
DBA::insert('tag', $fields, true);
|
||||
$tagid = DBA::lastInsertId();
|
||||
} else {
|
||||
$tagid = $tag['id'];
|
||||
}
|
||||
|
||||
if (empty($tagid)) {
|
||||
Logger::error('No tag id created', $fields);
|
||||
return;
|
||||
}
|
||||
|
||||
DBA::insert('post-tag', ['uri-id' => $uriid, 'tid' => $tagid], true);
|
||||
|
||||
Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'tag' => $fields]);
|
||||
}
|
||||
|
||||
public static function storeByHash(int $uriid, string $hash, string $name, string $url = '')
|
||||
{
|
||||
if ($hash == self::TAG_CHARACTER[self::MENTION]) {
|
||||
$type = self::MENTION;
|
||||
} elseif ($hash == self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]) {
|
||||
$type = self::EXCLUSIVE_MENTION;
|
||||
} elseif ($hash == self::TAG_CHARACTER[self::IMPLICIT_MENTION]) {
|
||||
$type = self::IMPLICIT_MENTION;
|
||||
} elseif ($hash == self::TAG_CHARACTER[self::HASHTAG]) {
|
||||
$type = self::HASHTAG;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
self::store($uriid, $type, $name, $url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store tags from the body
|
||||
*
|
||||
|
@ -73,9 +121,7 @@ class Tag
|
|||
continue;
|
||||
}
|
||||
|
||||
$fields = ['uri-id' => $uriid, 'name' => substr($tag, 1, 64), 'type' => self::HASHTAG];
|
||||
DBA::insert('tag', $fields, true);
|
||||
Logger::info('Stored tag', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
|
||||
self::storeByHash($uriid, '#', $tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ use Friendica\Model\Event;
|
|||
use Friendica\Model\Item;
|
||||
use Friendica\Model\ItemURI;
|
||||
use Friendica\Model\Mail;
|
||||
use Friendica\Model\Tag;
|
||||
use Friendica\Model\Term;
|
||||
use Friendica\Model\User;
|
||||
use Friendica\Protocol\Activity;
|
||||
|
@ -585,53 +586,43 @@ class Processor
|
|||
private static function storeTags(int $uriid, array $tags = null)
|
||||
{
|
||||
// Make sure to delete all existing tags (can happen when called via the update functionality)
|
||||
DBA::delete('tag', ['uri-id' => $uriid]);
|
||||
DBA::delete('post-tag', ['uri-id' => $uriid]);
|
||||
|
||||
foreach ($tags as $tag) {
|
||||
if (empty($tag['name']) || empty($tag['type']) || !in_array($tag['type'], ['Mention', 'Hashtag'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$fields = ['uri-id' => $uriid, 'name' => $tag['name']];
|
||||
$hash = substr($tag['name'], 0, 1);
|
||||
|
||||
if ($tag['type'] == 'Mention') {
|
||||
$fields['type'] = Term::MENTION;
|
||||
|
||||
if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::MENTION]) {
|
||||
$fields['name'] = substr($fields['name'], 1);
|
||||
} elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
|
||||
$fields['type'] = Term::EXCLUSIVE_MENTION;
|
||||
$fields['name'] = substr($fields['name'], 1);
|
||||
} elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
|
||||
$fields['type'] = Term::IMPLICIT_MENTION;
|
||||
$fields['name'] = substr($fields['name'], 1);
|
||||
if (in_array($hash, [Tag::TAG_CHARACTER[Tag::MENTION],
|
||||
Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION],
|
||||
Tag::TAG_CHARACTER[Tag::IMPLICIT_MENTION]])) {
|
||||
$tag['name'] = substr($tag['name'], 1);
|
||||
} else {
|
||||
$hash = '#';
|
||||
}
|
||||
|
||||
if (!empty($tag['href'])) {
|
||||
$apcontact = APContact::getByURL($tag['href']);
|
||||
if (!empty($apcontact['name']) || !empty($apcontact['nick'])) {
|
||||
$fields['name'] = $apcontact['name'] ?: $apcontact['nick'];
|
||||
$tag['name'] = $apcontact['name'] ?: $apcontact['nick'];
|
||||
}
|
||||
}
|
||||
} elseif ($tag['type'] == 'Hashtag') {
|
||||
$fields['type'] = Term::HASHTAG;
|
||||
if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) {
|
||||
$fields['name'] = substr($fields['name'], 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($fields['name'])) {
|
||||
continue;
|
||||
if (substr($tag['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) {
|
||||
$tag['name'] = substr($tag['name'], 1);
|
||||
} else {
|
||||
$fields['name'] = substr($fields['name'], 0, 64);
|
||||
$hash = '@';
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($tag['href'] && ($tag['href'] != $tag['name']))) {
|
||||
$fields['url'] = $tag['href'];
|
||||
if (empty($tag['name'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DBA::insert('tag', $fields, true);
|
||||
|
||||
Logger::info('Stored tag/mention', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
|
||||
Tag::storeByHash($uriid, $hash, $tag['name'], $tag['href']);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2464,26 +2464,7 @@ class DFRN
|
|||
|
||||
$item["tag"] .= $termhash . "[url=" . $termurl . "]" . $term . "[/url]";
|
||||
|
||||
// Store the hashtag/mention
|
||||
$fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64)];
|
||||
|
||||
if ($termhash == Term::TAG_CHARACTER[Term::MENTION]) {
|
||||
$fields['type'] = Term::MENTION;
|
||||
} elseif ($termhash == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
|
||||
$fields['type'] = Term::EXCLUSIVE_MENTION;
|
||||
} elseif ($termhash == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
|
||||
$fields['type'] = Term::IMPLICIT_MENTION;
|
||||
} elseif ($termhash == Term::TAG_CHARACTER[Term::HASHTAG]) {
|
||||
$fields['type'] = Term::HASHTAG;
|
||||
}
|
||||
|
||||
if (!empty($termurl)) {
|
||||
$fields['url'] = $termurl;
|
||||
}
|
||||
|
||||
DBA::insert('tag', $fields, true);
|
||||
|
||||
Logger::info('Stored tag/mention', ['uri-id' => $item['uri-id'], 'tag' => $term, 'url' => $termurl, 'hash' => $termhash, 'fields' => $fields]);
|
||||
Tag::storeByHash($item['uri-id'], $termhash, $term, $termurl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1840,20 +1840,7 @@ class Diaspora
|
|||
continue;
|
||||
}
|
||||
|
||||
$fields = ['uri-id' => $uriid, 'name' => substr($person['name'] ?: $person['nick'], 0, 64), 'url' => $person['url']];
|
||||
|
||||
if ($match[1] == Term::TAG_CHARACTER[Term::MENTION]) {
|
||||
$fields['type'] = Term::MENTION;
|
||||
} elseif ($match[1] == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
|
||||
$fields['type'] = Term::EXCLUSIVE_MENTION;
|
||||
} elseif ($match[1] == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
|
||||
$fields['type'] = Term::IMPLICIT_MENTION;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
DBA::insert('tag', $fields, true);
|
||||
Logger::info('Stored mention', ['uriid' => $uriid, 'match' => $match, 'fields' => $fields]);
|
||||
Tag::storeByHash($uriid, $match[1], $person['name'] ?: $person['nick'], $person['url']);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ use Friendica\Core\Protocol;
|
|||
use Friendica\Database\DBA;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Item;
|
||||
use Friendica\Model\Term;
|
||||
use Friendica\Model\Tag;
|
||||
use Friendica\Util\Network;
|
||||
use Friendica\Util\ParseUrl;
|
||||
use Friendica\Util\XML;
|
||||
|
@ -478,7 +478,7 @@ class Feed {
|
|||
$item["title"] = "";
|
||||
$item["body"] = $item["body"] . add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
|
||||
$item["tag"] = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
|
||||
$taglist = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"], true);
|
||||
$taglist = get_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
|
||||
$item["object-type"] = Activity\ObjectType::BOOKMARK;
|
||||
unset($item["attach"]);
|
||||
} else {
|
||||
|
@ -492,7 +492,7 @@ class Feed {
|
|||
} else {
|
||||
// @todo $preview is never set in this case, is it intended? - @MrPetovan 2018-02-13
|
||||
$item["tag"] = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
|
||||
$taglist = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"], true);
|
||||
$taglist = get_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
|
||||
}
|
||||
$item["body"] .= "\n" . $item['tag'];
|
||||
} else {
|
||||
|
@ -531,10 +531,7 @@ class Feed {
|
|||
if (!empty($id) && !empty($taglist)) {
|
||||
$feeditem = Item::selectFirst(['uri-id'], ['id' => $id]);
|
||||
foreach ($taglist as $tag) {
|
||||
$fields = ['uri-id' => $feeditem['uri-id'], 'name' => substr($tag, 0, 64), 'type' => Term::HASHTAG];
|
||||
DBA::insert('tag', $fields, true);
|
||||
|
||||
Logger::info('Stored tag', ['uri-id' => $feeditem['uri-id'], 'tag' => $tag, 'fields' => $fields]);
|
||||
Tag::storeByHash($feeditem['uri-id'], '#', $tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ use Friendica\Model\Conversation;
|
|||
use Friendica\Model\GContact;
|
||||
use Friendica\Model\Item;
|
||||
use Friendica\Model\ItemURI;
|
||||
use Friendica\Model\Term;
|
||||
use Friendica\Model\Tag;
|
||||
use Friendica\Model\User;
|
||||
use Friendica\Network\Probe;
|
||||
use Friendica\Util\DateTimeFormat;
|
||||
|
@ -665,10 +665,7 @@ class OStatus
|
|||
$item['tag'] .= '#[url=' . DI::baseUrl() . '/search?tag=' . $term . ']' . $term . '[/url]';
|
||||
|
||||
// Store the hashtag
|
||||
$fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64), 'type' => Term::HASHTAG];
|
||||
DBA::insert('tag', $fields, true);
|
||||
|
||||
Logger::info('Stored tag', ['uri-id' => $item['uri-id'], 'tag' => $term, 'fields' => $fields]);
|
||||
Tag::storeByHash($item['uri-id'], '#', $term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1293,16 +1293,27 @@ return [
|
|||
]
|
||||
],
|
||||
"tag" => [
|
||||
"comment" => "item tags and mentions",
|
||||
"comment" => "tags and mentions",
|
||||
"fields" => [
|
||||
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
|
||||
"type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""],
|
||||
"name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "primary" => "1", "comment" => ""],
|
||||
"url" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => ""]
|
||||
"id" => ["type" => "int unsigned", "not null" => "1", "extra" => "auto_increment", "primary" => "1", "comment" => ""],
|
||||
"type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "comment" => ""],
|
||||
"name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "comment" => ""],
|
||||
"url" => ["type" => "varbinary(255)", "not null" => "1", "default" => "", "comment" => ""]
|
||||
],
|
||||
"indexes" => [
|
||||
"PRIMARY" => ["uri-id", "type", "name"],
|
||||
"type_name" => ["type", "name"]
|
||||
"PRIMARY" => ["id"],
|
||||
"type_name_url" => ["UNIQUE", "type", "name", "url"]
|
||||
]
|
||||
],
|
||||
"post-tag" => [
|
||||
"comment" => "post relation to tags",
|
||||
"fields" => [
|
||||
"tid" => ["type" => "int unsigned", "not null" => "1", "relation" => ["tag" => "id"], "primary" => "1", "comment" => ""],
|
||||
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
|
||||
],
|
||||
"indexes" => [
|
||||
"PRIMARY" => ["tid", "uri-id"],
|
||||
"uri-id" => ["uri-id"]
|
||||
]
|
||||
],
|
||||
"thread" => [
|
||||
|
|
Loading…
Reference in a new issue