Merge pull request #8976 from annando/probe-last
Probe for the date of the last item
This commit is contained in:
commit
f81164de7a
5 changed files with 197 additions and 200 deletions
|
@ -1158,13 +1158,12 @@ class Contact
|
|||
* @param integer $uid The user id for the contact (0 = public contact)
|
||||
* @param boolean $update true = always update, false = never update, null = update when not found or outdated
|
||||
* @param array $default Default value for creating the contact when every else fails
|
||||
* @param boolean $in_loop Internally used variable to prevent an endless loop
|
||||
*
|
||||
* @return integer Contact ID
|
||||
* @throws HTTPException\InternalServerErrorException
|
||||
* @throws \ImagickException
|
||||
*/
|
||||
public static function getIdForURL($url, $uid = 0, $update = null, $default = [], $in_loop = false)
|
||||
public static function getIdForURL($url, $uid = 0, $update = null, $default = [])
|
||||
{
|
||||
Logger::info('Get contact data', ['url' => $url, 'user' => $uid]);
|
||||
|
||||
|
@ -1218,11 +1217,11 @@ class Contact
|
|||
}
|
||||
|
||||
// Take the default values when probing failed
|
||||
if (!empty($default) && !in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO]))) {
|
||||
if (!empty($default) && (empty($data['network']) || !in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO])))) {
|
||||
$data = array_merge($data, $default);
|
||||
}
|
||||
|
||||
if (empty($data) || ($data['network'] == Protocol::PHANTOM)) {
|
||||
if (empty($data['network']) || ($data['network'] == Protocol::PHANTOM)) {
|
||||
Logger::info('No valid network found', ['url' => $url, 'data' => $data, 'callstack' => System::callstack(20)]);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1235,8 +1234,26 @@ class Contact
|
|||
$data['gsid'] = GServer::getID($data['baseurl']);
|
||||
}
|
||||
|
||||
if (!$contact_id && !empty($data['alias']) && ($data['alias'] != $data['url']) && !$in_loop) {
|
||||
$contact_id = self::getIdForURL($data["alias"], $uid, false, $default, true);
|
||||
if (!$contact_id && !empty($data['alias']) && ($data['alias'] != $data['url'])) {
|
||||
$contact = self::getByURL($data['alias'], false, ['id']);
|
||||
if (!empty($contact['id'])) {
|
||||
$contact_id = $contact['id'];
|
||||
Logger::info('Fetched id by alias', ['cid' => $contact_id, 'url' => $url, 'probed_url' => $data['url'], 'alias' => $data['alias']]);
|
||||
}
|
||||
}
|
||||
|
||||
// Possibly there is a contact entry with the probed URL
|
||||
if (!$contact_id && ($url != $data['url']) && ($url != $data['alias'])) {
|
||||
$contact = self::getByURL($data['url'], false, ['id']);
|
||||
if (!empty($contact['id'])) {
|
||||
$contact_id = $contact['id'];
|
||||
Logger::info('Fetched id by url', ['cid' => $contact_id, 'url' => $url, 'probed_url' => $data['url'], 'alias' => $data['alias']]);
|
||||
}
|
||||
}
|
||||
|
||||
if ($uid == 0) {
|
||||
$data['last-item'] = Probe::getLastUpdate($data);
|
||||
Logger::info('Fetched last item', ['url' => $url, 'probed_url' => $data['url'], 'last-item' => $data['last-item'], 'callstack' => System::callstack(20)]);
|
||||
}
|
||||
|
||||
if (!$contact_id) {
|
||||
|
@ -1272,6 +1289,10 @@ class Contact
|
|||
'readonly' => 0,
|
||||
'pending' => 0];
|
||||
|
||||
if (!empty($data['last-item'])) {
|
||||
$fields['last-item'] = $data['last-item'];
|
||||
}
|
||||
|
||||
$condition = ['nurl' => Strings::normaliseLink($data["url"]), 'uid' => $uid, 'deleted' => false];
|
||||
|
||||
// Before inserting we do check if the entry does exist now.
|
||||
|
@ -1308,7 +1329,7 @@ class Contact
|
|||
self::updateFromProbe($contact_id, '', false);
|
||||
}
|
||||
} else {
|
||||
$fields = ['url', 'nurl', 'addr', 'alias', 'name', 'nick', 'keywords', 'location', 'about', 'avatar-date', 'baseurl', 'gsid'];
|
||||
$fields = ['url', 'nurl', 'addr', 'alias', 'name', 'nick', 'keywords', 'location', 'about', 'avatar-date', 'baseurl', 'gsid', 'last-item'];
|
||||
$contact = DBA::selectFirst('contact', $fields, ['id' => $contact_id]);
|
||||
|
||||
// This condition should always be true
|
||||
|
@ -1329,6 +1350,10 @@ class Contact
|
|||
$updated[$field] = ($data[$field] ?? '') ?: $contact[$field];
|
||||
}
|
||||
|
||||
if (!empty($data['last-item']) && ($contact['last-item'] < $data['last-item'])) {
|
||||
$updated['last-item'] = $data['last-item'];
|
||||
}
|
||||
|
||||
if (($updated['addr'] != $contact['addr']) || (!empty($data['alias']) && ($data['alias'] != $contact['alias']))) {
|
||||
$updated['uri-date'] = DateTimeFormat::utcNow();
|
||||
}
|
||||
|
@ -2806,183 +2831,6 @@ class Contact
|
|||
return ['count' => $count, 'added' => $added, 'updated' => $updated];
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the last date that the contact had posted something
|
||||
*
|
||||
* This functionality is currently unused
|
||||
*
|
||||
* @param string $data probing result
|
||||
* @param bool $force force updating
|
||||
*/
|
||||
private static function setLastUpdate(array $data, bool $force = false)
|
||||
{
|
||||
$contact = self::getByURL($data['url'], false, []);
|
||||
if (empty($contact)) {
|
||||
return;
|
||||
}
|
||||
if (!$force && !GServer::updateNeeded($contact['created'], $contact['updated'], $contact['last_failure'], $contact['last_contact'])) {
|
||||
Logger::info("Don't update profile", ['url' => $data['url'], 'updated' => $contact['updated']]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (self::updateFromNoScrape($data)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!empty($data['outbox'])) {
|
||||
self::updateFromOutbox($data['outbox'], $data);
|
||||
} elseif (!empty($data['poll']) && ($data['network'] == Protocol::ACTIVITYPUB)) {
|
||||
self::updateFromOutbox($data['poll'], $data);
|
||||
} elseif (!empty($data['poll'])) {
|
||||
self::updateFromFeed($data);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a global contact via the "noscrape" endpoint
|
||||
*
|
||||
* @param string $data Probing result
|
||||
*
|
||||
* @return bool 'true' if update was successful or the server was unreachable
|
||||
*/
|
||||
private static function updateFromNoScrape(array $data)
|
||||
{
|
||||
// Check the 'noscrape' endpoint when it is a Friendica server
|
||||
$gserver = DBA::selectFirst('gserver', ['noscrape'], ["`nurl` = ? AND `noscrape` != ''",
|
||||
Strings::normaliseLink($data['baseurl'])]);
|
||||
if (!DBA::isResult($gserver)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$curlResult = DI::httpRequest()->get($gserver['noscrape'] . '/' . $data['nick']);
|
||||
|
||||
if ($curlResult->isSuccess() && !empty($curlResult->getBody())) {
|
||||
$noscrape = json_decode($curlResult->getBody(), true);
|
||||
if (!empty($noscrape) && !empty($noscrape['updated'])) {
|
||||
$noscrape['updated'] = DateTimeFormat::utc($noscrape['updated'], DateTimeFormat::MYSQL);
|
||||
$fields = ['failed' => false, 'last_contact' => DateTimeFormat::utcNow(), 'updated' => $noscrape['updated']];
|
||||
DBA::update('contact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]);
|
||||
return true;
|
||||
}
|
||||
} elseif ($curlResult->isTimeout()) {
|
||||
// On a timeout return the existing value, but mark the contact as failure
|
||||
$fields = ['failed' => true, 'last_failure' => DateTimeFormat::utcNow()];
|
||||
DBA::update('contact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a global contact via an ActivityPub Outbox
|
||||
*
|
||||
* @param string $feed
|
||||
* @param array $data Probing result
|
||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||
*/
|
||||
private static function updateFromOutbox(string $feed, array $data)
|
||||
{
|
||||
$outbox = ActivityPub::fetchContent($feed);
|
||||
if (empty($outbox)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!empty($outbox['orderedItems'])) {
|
||||
$items = $outbox['orderedItems'];
|
||||
} elseif (!empty($outbox['first']['orderedItems'])) {
|
||||
$items = $outbox['first']['orderedItems'];
|
||||
} elseif (!empty($outbox['first']['href']) && ($outbox['first']['href'] != $feed)) {
|
||||
self::updateFromOutbox($outbox['first']['href'], $data);
|
||||
return;
|
||||
} elseif (!empty($outbox['first'])) {
|
||||
if (is_string($outbox['first']) && ($outbox['first'] != $feed)) {
|
||||
self::updateFromOutbox($outbox['first'], $data);
|
||||
} else {
|
||||
Logger::warning('Unexpected data', ['outbox' => $outbox]);
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
$items = [];
|
||||
}
|
||||
|
||||
$last_updated = '';
|
||||
foreach ($items as $activity) {
|
||||
if (!empty($activity['published'])) {
|
||||
$published = DateTimeFormat::utc($activity['published']);
|
||||
} elseif (!empty($activity['object']['published'])) {
|
||||
$published = DateTimeFormat::utc($activity['object']['published']);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($last_updated < $published) {
|
||||
$last_updated = $published;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($last_updated)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$fields = ['failed' => false, 'last_contact' => DateTimeFormat::utcNow(), 'updated' => $last_updated];
|
||||
DBA::update('contact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a global contact via an XML feed
|
||||
*
|
||||
* @param string $data Probing result
|
||||
*/
|
||||
private static function updateFromFeed(array $data)
|
||||
{
|
||||
// Search for the newest entry in the feed
|
||||
$curlResult = DI::httpRequest()->get($data['poll']);
|
||||
if (!$curlResult->isSuccess()) {
|
||||
$fields = ['failed' => true, 'last_failure' => DateTimeFormat::utcNow()];
|
||||
DBA::update('contact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]);
|
||||
|
||||
Logger::info("Profile wasn't reachable (no feed)", ['url' => $data['url']]);
|
||||
return;
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
@$doc->loadXML($curlResult->getBody());
|
||||
|
||||
$xpath = new DOMXPath($doc);
|
||||
$xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom');
|
||||
|
||||
$entries = $xpath->query('/atom:feed/atom:entry');
|
||||
|
||||
$last_updated = '';
|
||||
|
||||
foreach ($entries as $entry) {
|
||||
$published_item = $xpath->query('atom:published/text()', $entry)->item(0);
|
||||
$updated_item = $xpath->query('atom:updated/text()' , $entry)->item(0);
|
||||
$published = !empty($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null;
|
||||
$updated = !empty($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null;
|
||||
|
||||
if (empty($published) || empty($updated)) {
|
||||
Logger::notice('Invalid entry for XPath.', ['entry' => $entry, 'url' => $data['url']]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($last_updated < $published) {
|
||||
$last_updated = $published;
|
||||
}
|
||||
|
||||
if ($last_updated < $updated) {
|
||||
$last_updated = $updated;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($last_updated)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$fields = ['failed' => false, 'last_contact' => DateTimeFormat::utcNow(), 'updated' => $last_updated];
|
||||
DBA::update('contact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a random, global contact of the current node
|
||||
*
|
||||
|
|
|
@ -38,6 +38,7 @@ use Friendica\Protocol\ActivityPub;
|
|||
use Friendica\Protocol\Email;
|
||||
use Friendica\Protocol\Feed;
|
||||
use Friendica\Util\Crypto;
|
||||
use Friendica\Util\DateTimeFormat;
|
||||
use Friendica\Util\Network;
|
||||
use Friendica\Util\Strings;
|
||||
use Friendica\Util\XML;
|
||||
|
@ -2009,4 +2010,165 @@ class Probe
|
|||
|
||||
return $fixed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the last date that the contact had posted something (publically)
|
||||
*
|
||||
* @param string $data probing result
|
||||
* @return string last activity
|
||||
*/
|
||||
public static function getLastUpdate(array $data)
|
||||
{
|
||||
if ($lastUpdate = self::updateFromNoScrape($data)) {
|
||||
return $lastUpdate;
|
||||
}
|
||||
|
||||
if (!empty($data['outbox'])) {
|
||||
return self::updateFromOutbox($data['outbox'], $data);
|
||||
} elseif (!empty($data['poll']) && ($data['network'] == Protocol::ACTIVITYPUB)) {
|
||||
return self::updateFromOutbox($data['poll'], $data);
|
||||
} elseif (!empty($data['poll'])) {
|
||||
return self::updateFromFeed($data);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the last activity date from the "noscrape" endpoint
|
||||
*
|
||||
* @param array $data Probing result
|
||||
* @return string last activity
|
||||
*
|
||||
* @return bool 'true' if update was successful or the server was unreachable
|
||||
*/
|
||||
private static function updateFromNoScrape(array $data)
|
||||
{
|
||||
if (empty($data['baseurl'])) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Check the 'noscrape' endpoint when it is a Friendica server
|
||||
$gserver = DBA::selectFirst('gserver', ['noscrape'], ["`nurl` = ? AND `noscrape` != ''",
|
||||
Strings::normaliseLink($data['baseurl'])]);
|
||||
if (!DBA::isResult($gserver)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$curlResult = DI::httpRequest()->get($gserver['noscrape'] . '/' . $data['nick']);
|
||||
|
||||
if ($curlResult->isSuccess() && !empty($curlResult->getBody())) {
|
||||
$noscrape = json_decode($curlResult->getBody(), true);
|
||||
if (!empty($noscrape) && !empty($noscrape['updated'])) {
|
||||
return DateTimeFormat::utc($noscrape['updated'], DateTimeFormat::MYSQL);
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the last activity date from an ActivityPub Outbox
|
||||
*
|
||||
* @param string $feed
|
||||
* @param array $data Probing result
|
||||
* @return string last activity
|
||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||
*/
|
||||
private static function updateFromOutbox(string $feed, array $data)
|
||||
{
|
||||
$outbox = ActivityPub::fetchContent($feed);
|
||||
if (empty($outbox)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if (!empty($outbox['orderedItems'])) {
|
||||
$items = $outbox['orderedItems'];
|
||||
} elseif (!empty($outbox['first']['orderedItems'])) {
|
||||
$items = $outbox['first']['orderedItems'];
|
||||
} elseif (!empty($outbox['first']['href']) && ($outbox['first']['href'] != $feed)) {
|
||||
return self::updateFromOutbox($outbox['first']['href'], $data);
|
||||
} elseif (!empty($outbox['first'])) {
|
||||
if (is_string($outbox['first']) && ($outbox['first'] != $feed)) {
|
||||
return self::updateFromOutbox($outbox['first'], $data);
|
||||
} else {
|
||||
Logger::warning('Unexpected data', ['outbox' => $outbox]);
|
||||
}
|
||||
return '';
|
||||
} else {
|
||||
$items = [];
|
||||
}
|
||||
|
||||
$last_updated = '';
|
||||
foreach ($items as $activity) {
|
||||
if (!empty($activity['published'])) {
|
||||
$published = DateTimeFormat::utc($activity['published']);
|
||||
} elseif (!empty($activity['object']['published'])) {
|
||||
$published = DateTimeFormat::utc($activity['object']['published']);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($last_updated < $published) {
|
||||
$last_updated = $published;
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($last_updated)) {
|
||||
return $last_updated;
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the last activity date from an XML feed
|
||||
*
|
||||
* @param array $data Probing result
|
||||
* @return string last activity
|
||||
*/
|
||||
private static function updateFromFeed(array $data)
|
||||
{
|
||||
// Search for the newest entry in the feed
|
||||
$curlResult = DI::httpRequest()->get($data['poll']);
|
||||
if (!$curlResult->isSuccess()) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
@$doc->loadXML($curlResult->getBody());
|
||||
|
||||
$xpath = new DOMXPath($doc);
|
||||
$xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom');
|
||||
|
||||
$entries = $xpath->query('/atom:feed/atom:entry');
|
||||
|
||||
$last_updated = '';
|
||||
|
||||
foreach ($entries as $entry) {
|
||||
$published_item = $xpath->query('atom:published/text()', $entry)->item(0);
|
||||
$updated_item = $xpath->query('atom:updated/text()' , $entry)->item(0);
|
||||
$published = !empty($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null;
|
||||
$updated = !empty($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null;
|
||||
|
||||
if (empty($published) || empty($updated)) {
|
||||
Logger::notice('Invalid entry for XPath.', ['entry' => $entry, 'url' => $data['url']]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($last_updated < $published) {
|
||||
$last_updated = $published;
|
||||
}
|
||||
|
||||
if ($last_updated < $updated) {
|
||||
$last_updated = $updated;
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($last_updated)) {
|
||||
return $last_updated;
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1106,20 +1106,7 @@ class Diaspora
|
|||
*/
|
||||
private static function contactByHandle($uid, $handle)
|
||||
{
|
||||
$cid = Contact::getIdForURL($handle, $uid);
|
||||
if (!$cid) {
|
||||
Logger::log("Haven't found a contact for user " . $uid . " and handle " . $handle, Logger::DEBUG);
|
||||
return false;
|
||||
}
|
||||
|
||||
$contact = DBA::selectFirst('contact', [], ['id' => $cid]);
|
||||
if (!DBA::isResult($contact)) {
|
||||
// This here shouldn't happen at all
|
||||
Logger::log("Haven't found a contact for user " . $uid . " and handle " . $handle, Logger::DEBUG);
|
||||
return false;
|
||||
}
|
||||
|
||||
return $contact;
|
||||
return Contact::getByURL($handle, null, [], $uid);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -2017,7 +2017,7 @@ class OStatus
|
|||
$mentioned = $newmentions;
|
||||
|
||||
foreach ($mentioned as $mention) {
|
||||
$contact = Contact::getByURL($mention, ['contact-type']);
|
||||
$contact = Contact::getByURL($mention, false, ['contact-type']);
|
||||
if (!empty($contact) && ($contact['contact-type'] == Contact::TYPE_COMMUNITY)) {
|
||||
XML::addElement($doc, $entry, "link", "",
|
||||
[
|
||||
|
|
|
@ -201,7 +201,7 @@ function frio_remote_nav($a, &$nav)
|
|||
$remoteUser = Contact::getById(remote_user(), $fields);
|
||||
$nav['remote'] = DI::l10n()->t('Guest');
|
||||
} elseif (Model\Profile::getMyURL()) {
|
||||
$remoteUser = Contact::getByURL($homelink, $fields);
|
||||
$remoteUser = Contact::getByURL($homelink, null, $fields);
|
||||
$nav['remote'] = DI::l10n()->t('Visitor');
|
||||
} else {
|
||||
$remoteUser = null;
|
||||
|
|
Loading…
Reference in a new issue