From 7b13b7e9c80585904640577473ce13a891ef1436 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 7 Aug 2020 13:49:59 +0000 Subject: [PATCH] Reworked "getIdForURL" --- mod/match.php | 2 +- src/Model/Contact.php | 252 ++++++----------------- src/Model/Item.php | 2 +- src/Network/HTTPRequest.php | 5 + src/Protocol/ActivityPub/Processor.php | 10 +- src/Protocol/ActivityPub/Transmitter.php | 2 +- src/Protocol/Feed.php | 2 +- src/Protocol/OStatus.php | 4 +- 8 files changed, 84 insertions(+), 195 deletions(-) diff --git a/mod/match.php b/mod/match.php index 8e0baacd1..cd1c66c89 100644 --- a/mod/match.php +++ b/mod/match.php @@ -88,7 +88,7 @@ function match_content(App $a) $profile = $msearch->results[$i]; // Already known contact - if (!$profile || Contact::getIdForURL($profile->url, local_user(), false)) { + if (!$profile || Contact::getIdForURL($profile->url, local_user())) { continue; } diff --git a/src/Model/Contact.php b/src/Model/Contact.php index c8d715214..cdc3187da 100644 --- a/src/Model/Contact.php +++ b/src/Model/Contact.php @@ -330,7 +330,7 @@ class Contact */ public static function isFollowerByURL($url, $uid) { - $cid = self::getIdForURL($url, $uid, false); + $cid = self::getIdForURL($url, $uid); if (empty($cid)) { return false; @@ -376,7 +376,7 @@ class Contact */ public static function isSharingByURL($url, $uid) { - $cid = self::getIdForURL($url, $uid, false); + $cid = self::getIdForURL($url, $uid); if (empty($cid)) { return false; @@ -471,7 +471,7 @@ class Contact if (!DBA::isResult($self)) { return false; } - return self::getIdForURL($self['url'], 0, false); + return self::getIdForURL($self['url']); } /** @@ -508,7 +508,7 @@ class Contact $ucid = $contact['id']; } else { $pcid = $contact['id']; - $ucid = Contact::getIdForURL($contact['url'], $uid, false); + $ucid = Contact::getIdForURL($contact['url'], $uid); } return ['public' => $pcid, 'user' => $ucid]; @@ -993,86 +993,6 @@ class Contact return $menucondensed; } - /** - * Have a look at all contact tables for a given profile url. - * This function works as a replacement for probing the contact. - * - * @param string $url Contact URL - * @param integer $cid Contact ID - * - * @return array Contact array in the "probe" structure - */ - private static function getProbeDataFromDatabase($url, $cid = null) - { - // The link could be provided as http although we stored it as https - $ssl_url = str_replace('http://', 'https://', $url); - - $fields = ['id', 'uid', 'url', 'addr', 'alias', 'notify', 'poll', 'name', 'nick', - 'photo', 'keywords', 'location', 'about', 'network', - 'priority', 'batch', 'request', 'confirm', 'poco']; - - if (!empty($cid)) { - $data = DBA::selectFirst('contact', $fields, ['id' => $cid]); - if (DBA::isResult($data)) { - return $data; - } - } - - $data = DBA::selectFirst('contact', $fields, ['nurl' => Strings::normaliseLink($url)]); - - if (!DBA::isResult($data)) { - $condition = ['alias' => [$url, Strings::normaliseLink($url), $ssl_url]]; - $data = DBA::selectFirst('contact', $fields, $condition); - } - - if (DBA::isResult($data)) { - // For security reasons we don't fetch key data from our users - $data["pubkey"] = ''; - return $data; - } - - $fields = ['url', 'addr', 'alias', 'notify', 'name', 'nick', - 'photo', 'keywords', 'location', 'about', 'network']; - $condition = ['alias' => [$url, Strings::normaliseLink($url), $ssl_url]]; - $data = DBA::selectFirst('contact', $fields, $condition); - - if (DBA::isResult($data)) { - $data["pubkey"] = ''; - $data["poll"] = ''; - $data["priority"] = 0; - $data["batch"] = ''; - $data["request"] = ''; - $data["confirm"] = ''; - $data["poco"] = ''; - return $data; - } - - $data = ActivityPub::probeProfile($url, false); - if (!empty($data)) { - return $data; - } - - $fields = ['url', 'addr', 'alias', 'notify', 'poll', 'name', 'nick', - 'photo', 'network', 'priority', 'batch', 'request', 'confirm']; - $data = DBA::selectFirst('fcontact', $fields, ['url' => $url]); - - if (!DBA::isResult($data)) { - $condition = ['alias' => [$url, Strings::normaliseLink($url), $ssl_url]]; - $data = DBA::selectFirst('contact', $fields, $condition); - } - - if (DBA::isResult($data)) { - $data["pubkey"] = ''; - $data["keywords"] = ''; - $data["location"] = ''; - $data["about"] = ''; - $data["poco"] = ''; - return $data; - } - - return []; - } - /** * Fetch the contact id for a given URL and user * @@ -1093,8 +1013,8 @@ class Contact * * @param string $url Contact URL * @param integer $uid The user id for the contact (0 = public contact) - * @param boolean $update true = always update, false = never update, null = update when not found or outdated - * @param array $default Default value for creating the contact when every else fails + * @param boolean $update true = always update, false = never update, null = update when not found + * @param array $default Default value for creating the contact when everything else fails * * @return integer Contact ID * @throws HTTPException\InternalServerErrorException @@ -1102,78 +1022,72 @@ class Contact */ public static function getIdForURL($url, $uid = 0, $update = null, $default = []) { - Logger::info('Get contact data', ['url' => $url, 'user' => $uid]); - $contact_id = 0; if ($url == '') { + Logger::notice('Empty url, quitting', ['url' => $url, 'user' => $uid, 'default' => $default]); return 0; } - $contact = self::getByURL($url, false, ['id', 'avatar', 'updated', 'network'], $uid); + $contact = self::getByURL($url, false, ['id', 'network'], $uid); if (!empty($contact)) { $contact_id = $contact["id"]; - if (empty($default) && in_array($contact['network'], [Protocol::MAIL, Protocol::PHANTOM]) && ($uid == 0)) { - // Update public mail accounts via their user's accounts - $fields = ['network', 'addr', 'name', 'nick', 'avatar', 'photo', 'thumb', 'micro']; - $mailcontact = DBA::selectFirst('contact', $fields, ["`addr` = ? AND `network` = ? AND `uid` != 0", $url, Protocol::MAIL]); - if (!DBA::isResult($mailcontact)) { - $mailcontact = DBA::selectFirst('contact', $fields, ["`nurl` = ? AND `network` = ? AND `uid` != 0", $url, Protocol::MAIL]); - } - - if (DBA::isResult($mailcontact)) { - DBA::update('contact', $mailcontact, ['id' => $contact_id]); - } - } - if (empty($update)) { + Logger::debug('Contact found', ['url' => $url, 'uid' => $uid, 'update' => $update, 'cid' => $contact_id]); return $contact_id; } } elseif ($uid != 0) { - // Non-existing user-specific contact, exiting + Logger::debug('Contact does not exist for the user', ['url' => $url, 'uid' => $uid, 'update' => $update]); + return 0; + } elseif (empty($default) && !is_null($update) && !$update) { + Logger::info('Contact not found, update not desired', ['url' => $url, 'uid' => $uid, 'update' => $update]); return 0; } - if (!$update && empty($default)) { - // When we don't want to update, we look if we know this contact in any way - $data = self::getProbeDataFromDatabase($url, $contact_id); - $background_update = true; - } elseif (!$update && !empty($default['network'])) { - // If there are default values, take these - $data = $default; - $background_update = false; - } else { - $data = []; - $background_update = false; - } + $data = []; - if ((empty($data) && is_null($update)) || $update) { + if (empty($default['network']) || $update) { $data = Probe::uri($url, "", $uid); - $probed = !empty($data['network']) && ($data['network'] != Protocol::PHANTOM); - } else { - $probed = false; + + // Take the default values when probing failed + if (!empty($default) && !in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO]))) { + $data = array_merge($data, $default); + } + } elseif (!empty($default['network'])) { + $data = $default; } - // Take the default values when probing failed - if (!empty($default) && (empty($data['network']) || !in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO])))) { - $data = array_merge($data, $default); + if (($uid == 0) && (empty($data['network']) || ($data['network'] == Protocol::PHANTOM))) { + // Fetch data for the public contact via the first found personal contact + /// @todo Check if this case can happen at all (possibly with mail accounts?) + $fields = ['name', 'nick', 'url', 'addr', 'alias', 'avatar', 'contact-type', + 'keywords', 'location', 'about', 'unsearchable', 'batch', 'notify', 'poll', + 'request', 'confirm', 'poco', 'subscribe', 'network', 'baseurl', 'gsid']; + + $personal_contact = DBA::selectFirst('contact', $fields, ["`addr` = ? AND `uid` != 0", $url]); + if (!DBA::isResult($personal_contact)) { + $personal_contact = DBA::selectFirst('contact', $fields, ["`nurl` = ? AND `uid` != 0", Strings::normaliseLink($url)]); + } + + if (DBA::isResult($personal_contact)) { + Logger::info('Take contact data from personal contact', ['url' => $url, 'update' => $update, 'contact' => $personal_contact, 'callstack' => System::callstack(20)]); + $data = $personal_contact; + $data['photo'] = $personal_contact['avatar']; + $data['account-type'] = $personal_contact['contact-type']; + $data['hide'] = $personal_contact['unsearchable']; + unset($data['avatar']); + unset($data['contact-type']); + unset($data['unsearchable']); + } } if (empty($data['network']) || ($data['network'] == Protocol::PHANTOM)) { - Logger::info('No valid network found', ['url' => $url, 'data' => $data, 'callstack' => System::callstack(20)]); + Logger::notice('No valid network found', ['url' => $url, 'uid' => $uid, 'default' => $default, 'update' => $update, 'callstack' => System::callstack(20)]); return 0; } - if (!empty($data['baseurl'])) { - $data['baseurl'] = GServer::cleanURL($data['baseurl']); - } - - if (!empty($data['baseurl']) && empty($data['gsid'])) { - $data['gsid'] = GServer::getID($data['baseurl']); - } - if (!$contact_id) { $urls = [Strings::normaliseLink($url), Strings::normaliseLink($data['url'])]; if (!empty($data['alias'])) { @@ -1187,75 +1101,45 @@ class Contact } if (!$contact_id) { + // We only insert the basic data. The rest will be done in "updateFromProbeArray" $fields = [ 'uid' => $uid, - 'created' => DateTimeFormat::utcNow(), 'url' => $data['url'], 'nurl' => Strings::normaliseLink($data['url']), - 'addr' => $data['addr'] ?? '', - 'alias' => $data['alias'] ?? '', - 'notify' => $data['notify'] ?? '', - 'poll' => $data['poll'] ?? '', - 'name' => $data['name'] ?? '', - 'nick' => $data['nick'] ?? '', - 'keywords' => $data['keywords'] ?? '', - 'location' => $data['location'] ?? '', - 'about' => $data['about'] ?? '', 'network' => $data['network'], - 'pubkey' => $data['pubkey'] ?? '', + 'created' => DateTimeFormat::utcNow(), 'rel' => self::SHARING, - 'priority' => $data['priority'] ?? 0, - 'batch' => $data['batch'] ?? '', - 'request' => $data['request'] ?? '', - 'confirm' => $data['confirm'] ?? '', - 'poco' => $data['poco'] ?? '', - 'baseurl' => $data['baseurl'] ?? '', - 'gsid' => $data['gsid'] ?? null, - 'name-date' => DateTimeFormat::utcNow(), - 'uri-date' => DateTimeFormat::utcNow(), - 'avatar-date' => DateTimeFormat::utcNow(), 'writable' => 1, 'blocked' => 0, 'readonly' => 0, 'pending' => 0]; - if (($uid == 0) && $probed) { - $fields['last-item'] = Probe::getLastUpdate($data); - Logger::info('Fetched last item', ['url' => $url, 'probed_url' => $data['url'], 'last-item' => $fields['last-item'], 'callstack' => System::callstack(20)]); - } - $condition = ['nurl' => Strings::normaliseLink($data["url"]), 'uid' => $uid, 'deleted' => false]; // Before inserting we do check if the entry does exist now. + DBA::lock('contact'); $contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); - if (!DBA::isResult($contact)) { - Logger::info('Create new contact', $fields); - - self::insert($fields); - - // We intentionally aren't using lastInsertId here. There is a chance for duplicates. - $contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); - if (!DBA::isResult($contact)) { - Logger::info('Contact creation failed', $fields); - // Shouldn't happen - return 0; - } + if (DBA::isResult($contact)) { + $contact_id = $contact['id']; + Logger::notice('Contact had been created (shortly) before', ['id' => $contact_id, 'url' => $url, 'uid' => $uid]); } else { - Logger::info('Contact had been created before', ['id' => $contact["id"], 'url' => $url, 'contact' => $fields]); + DBA::insert('contact', $fields); + $contact_id = DBA::lastInsertId(); + if ($contact_id) { + Logger::info('Contact inserted', ['id' => $contact_id, 'url' => $url, 'uid' => $uid]); + } + } + DBA::unlock(); + if (!$contact_id) { + Logger::info('Contact was not inserted', ['url' => $url, 'uid' => $uid]); + return 0; } - - $contact_id = $contact["id"]; - } - - if ($background_update && !$probed && in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO]))) { - // Update in the background when we fetched the data solely from the database - Worker::add(PRIORITY_MEDIUM, "UpdateContact", $contact_id); - } elseif (!empty($data['network'])) { - self::updateFromProbeArray($contact_id, $data); } else { - Logger::info('Invalid data', ['url' => $url, 'data' => $data]); + Logger::info('Contact will be updated', ['url' => $url, 'uid' => $uid, 'update' => $update, 'cid' => $contact_id]); } + self::updateFromProbeArray($contact_id, $data); + return $contact_id; } @@ -2565,15 +2449,15 @@ class Contact return $url ?: $contact_url; // Equivalent to: ($url != '') ? $url : $contact_url; } - $data = self::getProbeDataFromDatabase($contact_url); - if (empty($data)) { + $contact = self::getByURL($contact_url, false); + if (empty($contact)) { return $url ?: $contact_url; // Equivalent to: ($url != '') ? $url : $contact_url; } // Prevents endless loop in case only a non-public contact exists for the contact URL - unset($data['uid']); + unset($contact['uid']); - return self::magicLinkByContact($data, $url ?: $contact_url); + return self::magicLinkByContact($contact, $url ?: $contact_url); } /** diff --git a/src/Model/Item.php b/src/Model/Item.php index 0a1038049..1ac8cf22d 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -3064,7 +3064,7 @@ class Item if (local_user() == $uid) { $item_contact_id = $owner_self_contact['id']; } else { - $item_contact_id = Contact::getIdForURL($author_contact['url'], $uid, false); + $item_contact_id = Contact::getIdForURL($author_contact['url'], $uid); $item_contact = DBA::selectFirst('contact', [], ['id' => $item_contact_id]); if (!DBA::isResult($item_contact)) { Logger::log('like: unknown item contact ' . $item_contact_id); diff --git a/src/Network/HTTPRequest.php b/src/Network/HTTPRequest.php index 87177b1a4..6fcd610aa 100644 --- a/src/Network/HTTPRequest.php +++ b/src/Network/HTTPRequest.php @@ -312,6 +312,11 @@ class HTTPRequest implements IHTTPRequest */ public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false) { + if (Network::isUrlBlocked($url)) { + $this->logger->info('Domain is blocked.', ['url' => $url]); + return $url; + } + $url = Network::stripTrackingQueryParams($url); if ($depth > 10) { diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php index 037c2e849..b4dba53cc 100644 --- a/src/Protocol/ActivityPub/Processor.php +++ b/src/Protocol/ActivityPub/Processor.php @@ -226,9 +226,9 @@ class Processor $item['network'] = Protocol::ACTIVITYPUB; $item['author-link'] = $activity['author']; - $item['author-id'] = Contact::getIdForURL($activity['author'], 0, false); + $item['author-id'] = Contact::getIdForURL($activity['author']); $item['owner-link'] = $activity['actor']; - $item['owner-id'] = Contact::getIdForURL($activity['actor'], 0, false); + $item['owner-id'] = Contact::getIdForURL($activity['actor']); if (in_array(0, $activity['receiver']) && !empty($activity['unlisted'])) { $item['private'] = Item::UNLISTED; @@ -528,13 +528,13 @@ class Processor $item['uid'] = $receiver; if ($item['isForum'] ?? false) { - $item['contact-id'] = Contact::getIdForURL($activity['actor'], $receiver, false); + $item['contact-id'] = Contact::getIdForURL($activity['actor'], $receiver); } else { - $item['contact-id'] = Contact::getIdForURL($activity['author'], $receiver, false); + $item['contact-id'] = Contact::getIdForURL($activity['author'], $receiver); } if (($receiver != 0) && empty($item['contact-id'])) { - $item['contact-id'] = Contact::getIdForURL($activity['author'], 0, false); + $item['contact-id'] = Contact::getIdForURL($activity['author']); } if (!empty($activity['directmessage'])) { diff --git a/src/Protocol/ActivityPub/Transmitter.php b/src/Protocol/ActivityPub/Transmitter.php index 6bf507ed8..6742ab7c2 100644 --- a/src/Protocol/ActivityPub/Transmitter.php +++ b/src/Protocol/ActivityPub/Transmitter.php @@ -150,7 +150,7 @@ class Transmitter */ public static function getOutbox($owner, $page = null) { - $public_contact = Contact::getIdForURL($owner['url'], 0, false); + $public_contact = Contact::getIdForURL($owner['url']); $condition = ['uid' => 0, 'contact-id' => $public_contact, 'author-id' => $public_contact, 'private' => [Item::PUBLIC, Item::UNLISTED], 'gravity' => [GRAVITY_PARENT, GRAVITY_COMMENT], diff --git a/src/Protocol/Feed.php b/src/Protocol/Feed.php index 9aab4f52c..1d68ff0e2 100644 --- a/src/Protocol/Feed.php +++ b/src/Protocol/Feed.php @@ -690,7 +690,7 @@ class Feed } $check_date = empty($last_update) ? '' : DateTimeFormat::utc($last_update); - $authorid = Contact::getIdForURL($owner["url"], 0, false); + $authorid = Contact::getIdForURL($owner["url"]); $condition = ["`uid` = ? AND `received` > ? AND NOT `deleted` AND `gravity` IN (?, ?) AND `private` != ? AND `visible` AND `wall` AND `parent-network` IN (?, ?, ?, ?)", diff --git a/src/Protocol/OStatus.php b/src/Protocol/OStatus.php index c00c029cd..0635be87d 100644 --- a/src/Protocol/OStatus.php +++ b/src/Protocol/OStatus.php @@ -219,7 +219,7 @@ class OStatus } // Ensure that we are having this contact (with uid=0) - $cid = Contact::getIdForURL($aliaslink, 0, false); + $cid = Contact::getIdForURL($aliaslink); if ($cid) { $fields = ['url', 'nurl', 'name', 'nick', 'alias', 'about', 'location']; @@ -2122,7 +2122,7 @@ class OStatus } $check_date = DateTimeFormat::utc($last_update); - $authorid = Contact::getIdForURL($owner["url"], 0, false); + $authorid = Contact::getIdForURL($owner["url"]); $condition = ["`uid` = ? AND `received` > ? AND NOT `deleted` AND `private` != ? AND `visible` AND `wall` AND `parent-network` IN (?, ?)",