From 85c7bacb0091639a947fd4dee438add66cee5d7f Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 10 Jul 2022 13:01:47 +0000 Subject: [PATCH] Improved server detection --- src/Model/GServer.php | 103 +++++++++++++++++++++----------- src/Module/Admin/Federation.php | 10 ++-- static/generator.config.php | 22 +++---- 3 files changed, 87 insertions(+), 48 deletions(-) diff --git a/src/Model/GServer.php b/src/Model/GServer.php index d04186240..fd87ec283 100644 --- a/src/Model/GServer.php +++ b/src/Model/GServer.php @@ -186,11 +186,12 @@ class GServer return self::check($server, $network, $force); } - public static function getNextUpdateDate(bool $success, string $created = '', string $last_contact = '') + public static function getNextUpdateDate(bool $success, string $created = '', string $last_contact = '', bool $undetected = false) { - // On successful contact process check again next week + // On successful contact process check again next week when it is a detected system. + // When we haven't detected the system, it could be a static website or a really old system. if ($success) { - return DateTimeFormat::utc('now +7 day'); + return DateTimeFormat::utc($undetected ? 'now +1 month' : 'now +7 day'); } $now = strtotime(DateTimeFormat::utcNow()); @@ -331,6 +332,11 @@ class GServer // Remove URL content that is not supposed to exist for a server url $url = rtrim(self::cleanURL($url), '/'); + if (empty($url)) { + Logger::notice('Empty URL.'); + return false; + } + if (!Network::isUrlValid($url)) { self::setFailure($url); return false; @@ -352,6 +358,11 @@ class GServer return false; } + if (empty($finalurl)) { + Logger::notice('Empty redirected URL.', ['url' => $url]); + return false; + } + // We only follow redirects when the path stays the same or the target url has no path. // Some systems have got redirects on their landing page to a single account page. This check handles it. if (((parse_url($url, PHP_URL_HOST) != parse_url($finalurl, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) == parse_url($finalurl, PHP_URL_PATH))) || @@ -367,9 +378,9 @@ class GServer (parse_url($url, PHP_URL_SCHEME) != parse_url($finalurl, PHP_URL_SCHEME))) { if (!Network::isUrlValid($finalurl)) { self::setFailure($finalurl); - return false; + } else { + $url = $finalurl; } - $url = $finalurl; } $in_webroot = empty(parse_url($url, PHP_URL_PATH)); @@ -410,11 +421,11 @@ class GServer if ($curlResult->isSuccess()) { $json = json_decode($curlResult->getBody(), true); - if (!empty($json)) { + if (!empty($json) && is_array($json)) { $data = self::fetchDataFromSystemActor($json, $serverdata); $serverdata = $data['server']; $systemactor = $data['actor']; - if (!$html_fetched && ($serverdata['detection-method'] == self::DETECT_AP_ACTOR)) { + if (!$html_fetched && !in_array($serverdata['detection-method'], [self::DETECT_SYSTEM_ACTOR, self::DETECT_AP_COLLECTION])) { $curlResult = DI::httpClient()->get($url, HttpClientAccept::HTML); } } elseif (!$html_fetched && (strlen($curlResult->getBody()) < 1000)) { @@ -447,9 +458,8 @@ class GServer } if ($validHostMeta) { - if ($serverdata['detection-method'] == self::DETECT_MANUAL) { + if (in_array($serverdata['detection-method'], [self::DETECT_MANUAL, self::DETECT_HEADER, self::DETECT_BODY])) { $serverdata['detection-method'] = self::DETECT_HOST_META; - $serverdata['platform'] = ''; } if (($serverdata['network'] == Protocol::PHANTOM) || in_array($serverdata['detection-method'], self::DETECT_UNSPECIFIC)) { @@ -476,6 +486,8 @@ class GServer $serverdata = self::detectGNUSocial($url, $serverdata); } } + } elseif (in_array($serverdata['platform'], ['friendica', 'friendika']) && in_array($serverdata['detection-method'], self::DETECT_UNSPECIFIC)) { + $serverdata = self::detectFriendica($url, $serverdata); } if (($serverdata['network'] == Protocol::PHANTOM) || in_array($serverdata['detection-method'], self::DETECT_UNSPECIFIC)) { @@ -507,7 +519,8 @@ class GServer // When a server is new, then there is no gserver entry yet. // But in "detectNetworkViaContacts" it could happen that a contact is updated, // and this can call this function here as well. - if (in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED]) && self::getID($url, true)) { + if (self::getID($url, true) && (in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED]) || + in_array($serverdata['detection-method'], [self::DETECT_MANUAL, self::DETECT_HEADER, self::DETECT_BODY, self::DETECT_HOST_META]))) { $serverdata = self::detectNetworkViaContacts($url, $serverdata); } @@ -535,7 +548,7 @@ class GServer $serverdata['registered-users'] = 0; } - $serverdata['next_contact'] = self::getNextUpdateDate(true); + $serverdata['next_contact'] = self::getNextUpdateDate(true, '', '', in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED])); $serverdata['last_contact'] = DateTimeFormat::utcNow(); $serverdata['failed'] = false; @@ -1222,7 +1235,7 @@ class GServer return ['server' => $serverdata, 'actor' => '']; } - $actor = JsonLD::compact($data); + $actor = JsonLD::compact($data, false); if (in_array(JsonLD::fetchElement($actor, '@type'), ActivityPub\Receiver::ACCOUNT_TYPES)) { $serverdata['network'] = Protocol::ACTIVITYPUB; $serverdata['site_name'] = JsonLD::fetchElement($actor, 'as:name', '@value'); @@ -1842,16 +1855,17 @@ class GServer } $platforms = array_merge($ap_platforms, $dfrn_platforms, $zap_platforms, $platforms); - $valid_platforms = array_values($platforms); $doc = new DOMDocument(); @$doc->loadHTML($curlResult->getBody()); $xpath = new DOMXPath($doc); + $assigned = false; // We can only detect honk via some HTML element on their page if ($xpath->query('//div[@id="honksonpage"]')->count() == 1) { $serverdata['platform'] = 'honk'; $serverdata['network'] = Protocol::ACTIVITYPUB; + $assigned = true; } $title = trim(XML::getFirstNodeValue($xpath, '//head/title/text()')); @@ -1884,27 +1898,23 @@ class GServer if (in_array($attr['name'], ['application-name', 'al:android:app_name', 'al:ios:app_name', 'twitter:app:name:googleplay', 'twitter:app:name:iphone', 'twitter:app:name:ipad', 'generator'])) { - $platform = str_replace(array_keys($platforms), array_values($platforms), $attr['content']); - $platform = strtolower(str_replace('/', ' ', $platform)); - $version_part = explode(' ', $platform); - - if (count($version_part) >= 2) { - if (in_array($version_part[0], array_values($dfrn_platforms))) { - $serverdata['network'] = Protocol::DFRN; - } elseif (in_array($version_part[0], array_values($ap_platforms))) { - $serverdata['network'] = Protocol::ACTIVITYPUB; - } elseif (in_array($version_part[0], array_values($zap_platforms))) { - $serverdata['network'] = Protocol::ZOT; - } - if (in_array(strtolower($version_part[0]), $valid_platforms)) { - $platform = strtolower($version_part[0]); - $serverdata['version'] = $version_part[1]; - } + $platform = str_ireplace(array_keys($platforms), array_values($platforms), $attr['content']); + $platform = str_replace('/', ' ', $platform); + $platform_parts = explode(' ', $platform); + if ((count($platform_parts) >= 2) && in_array(strtolower($platform_parts[0]), array_values($platforms))) { + $platform = $platform_parts[0]; + $serverdata['version'] = $platform_parts[1]; + } + if (in_array($platform, array_values($dfrn_platforms))) { + $serverdata['network'] = Protocol::DFRN; + } elseif (in_array($platform, array_values($ap_platforms))) { + $serverdata['network'] = Protocol::ACTIVITYPUB; + } elseif (in_array($platform, array_values($zap_platforms))) { + $serverdata['network'] = Protocol::ZOT; } if (in_array($platform, array_values($platforms))) { $serverdata['platform'] = $platform; - } elseif (empty($serverdata['platform'])) { - print_r($attr); + $assigned = true; } } } @@ -1939,8 +1949,7 @@ class GServer if (in_array($attr['property'], ['og:platform', 'generator'])) { if (in_array($attr['content'], array_keys($platforms))) { $serverdata['platform'] = $platforms[$attr['content']]; - } else { - print_r($attr); + $assigned = true; } if (in_array($attr['content'], array_keys($ap_platforms))) { @@ -1951,7 +1960,33 @@ class GServer } } - if (in_array($serverdata['platform'], $valid_platforms) && ($serverdata['detection-method'] == self::DETECT_MANUAL)) { + $list = $xpath->query('//link[@rel="me"]'); + foreach ($list as $node) { + foreach ($node->attributes as $attribute) { + if (parse_url(trim($attribute->value), PHP_URL_HOST) == 'micro.blog') { + $serverdata['version'] = trim($serverdata['platform'] . ' ' . $serverdata['version']); + $serverdata['platform'] = 'microblog'; + $serverdata['network'] = Protocol::ACTIVITYPUB; + $assigned = true; + } + } + } + + if ($serverdata['platform'] != 'microblog') { + $list = $xpath->query('//link[@rel="micropub"]'); + foreach ($list as $node) { + foreach ($node->attributes as $attribute) { + if (trim($attribute->value) == 'https://micro.blog/micropub') { + $serverdata['version'] = trim($serverdata['platform'] . ' ' . $serverdata['version']); + $serverdata['platform'] = 'microblog'; + $serverdata['network'] = Protocol::ACTIVITYPUB; + $assigned = true; + } + } + } + } + + if ($assigned && in_array($serverdata['detection-method'], [self::DETECT_MANUAL, self::DETECT_HEADER])) { $serverdata['detection-method'] = self::DETECT_BODY; } diff --git a/src/Module/Admin/Federation.php b/src/Module/Admin/Federation.php index ccde3b4de..c5badefe9 100644 --- a/src/Module/Admin/Federation.php +++ b/src/Module/Admin/Federation.php @@ -46,10 +46,11 @@ class Federation extends BaseAdmin 'gnusocial' => ['name' => 'GNU Social/Statusnet', 'color' => '#a22430'], // dark red from the logo 'gotosocial' => ['name' => 'GoToSocial', 'color' => '#df8958'], // Some color from their mascot 'hometown' => ['name' => 'Hometown', 'color' => '#1f70c1'], // Color from the Patreon page + 'honk' => ['name' => 'Honk', 'color' => '##0d0d0d'], // Background color from the page 'hubzilla' => ['name' => 'Hubzilla/Red Matrix', 'color' => '#43488a'], // blue from the logo - 'hugo' => ['name' => 'Hugo', 'color' => '#0a1922'], // Color from the homepage background 'lemmy' => ['name' => 'Lemmy', 'color' => '#00c853'], // Green from the page 'mastodon' => ['name' => 'Mastodon', 'color' => '#1a9df9'], // blue from the Mastodon logo + 'microblog' => ['name' => 'Microblog', 'color' => '#fdb52b'], // Color from the page 'misskey' => ['name' => 'Misskey', 'color' => '#ccfefd'], // Font color of the homepage 'mobilizon' => ['name' => 'Mobilizon', 'color' => '#ffd599'], // Background color of parts of the homepage 'nextcloud' => ['name' => 'Nextcloud', 'color' => '#1cafff'], // Logo color @@ -85,7 +86,8 @@ class Federation extends BaseAdmin SUM(IFNULL(`active-month-users`, `active-week-users`)) AS `month`, SUM(IFNULL(`active-halfyear-users`, `active-week-users`)) AS `halfyear`, `platform`, ANY_VALUE(`network`) AS `network`, MAX(`version`) AS `version` - FROM `gserver` WHERE NOT `failed` AND `detection-method` != ? AND NOT `network` IN (?, ?) GROUP BY `platform`", GServer::DETECT_MANUAL, Protocol::PHANTOM, Protocol::FEED); + FROM `gserver` WHERE NOT `failed` AND `platform` != ? AND `detection-method` != ? AND NOT `network` IN (?, ?) GROUP BY `platform`", + '', GServer::DETECT_MANUAL, Protocol::PHANTOM, Protocol::FEED); while ($gserver = DBA::fetch($gservers)) { $total += $gserver['total']; $users += $gserver['users']; @@ -102,7 +104,7 @@ class Federation extends BaseAdmin if (in_array($gserver['platform'], ['Red Matrix', 'redmatrix', 'red'])) { $version['version'] = 'Red ' . $version['version']; - } elseif (in_array($gserver['platform'], ['osada', 'mistpark', 'roadhouse', 'zap'])) { + } elseif (in_array($gserver['platform'], ['osada', 'mistpark', 'roadhouse', 'zap', 'macgirvin', 'mkultra'])) { $version['version'] = $gserver['platform'] . ' ' . $version['version']; } elseif (in_array($gserver['platform'], ['activityrelay', 'pub-relay', 'selective-relay', 'aoderelay'])) { $version['version'] = $gserver['platform'] . '-' . $version['version']; @@ -118,7 +120,7 @@ class Federation extends BaseAdmin $platform = 'friendica'; } elseif (in_array($platform, ['red matrix', 'redmatrix', 'red'])) { $platform = 'hubzilla'; - } elseif (in_array($platform, ['mistpark', 'osada', 'roadhouse', 'zap'])) { + } elseif (in_array($platform, ['osada', 'mistpark', 'roadhouse', 'zap', 'macgirvin', 'mkultra'])) { $platform = 'mistpark'; } elseif(stristr($platform, 'pleroma')) { $platform = 'pleroma'; diff --git a/static/generator.config.php b/static/generator.config.php index 6a5c7c5ef..d1397f3cb 100644 --- a/static/generator.config.php +++ b/static/generator.config.php @@ -25,7 +25,7 @@ $platforms = [ 'BaseKit' => 'basekit', 'BBEdit' => 'bbedit', 'Big Cartel' => 'big-cartel', - 'blogger' => 'blogger', + 'Blogger' => 'blogger', 'Bloom' => 'bloom', 'Bludit' => 'bludit', 'BunnyPress' => 'bunnypress', @@ -48,25 +48,31 @@ $platforms = [ 'filerun' => 'filerun', 'FlatPress' => 'flatpress', 'Gatsby' => 'gatsby', + 'Ghost' => 'ghost', 'gitweb' => 'gitweb', 'gnusocial' => 'gnusocial', + 'Government Site Builder' => 'government-site-builder', 'GravCMS' => 'gravcms', 'grocy' => 'grocy', 'Gruta' => 'gruta', 'hakyll' => 'hakyll', 'HedgeDoc - Collaborative markdown notes' => 'hedgedoc', - 'helloworld' => 'helloworld', + 'Hello, world. https://github.com/mimecuvalo/helloworld' => 'helloworld', 'Hexo' => 'hexo', - 'honk' => 'honk', + 'Hugo' => 'hugo', 'ian' => 'ian', 'InterRed' => 'interred', 'Ikiwiki' => 'ikiwiki', + 'Jekyll' => 'jekyll', 'Joomla!' => 'joomla', 'KeyHelp' => 'keyhelp', + 'Known https://withknown.com' => 'known', 'KONTEXT-CMS (c) WARENFORM [www.warenform.net]' => 'kontext-cms', 'ktistec' => 'ktistec', + 'lemoncurry' => 'lemoncurry', 'LibreOffice' => 'libreoffice', 'Magazine News Byte' => 'magazine-news-byte', + 'Magnet' => 'magnet', 'mastodon' => 'mastodon', 'Mattermost' => 'mattermost', 'MediaWiki' => 'mediawiki', @@ -84,7 +90,6 @@ $platforms = [ 'Org mode' => 'org-mode', 'Org-mode' => 'org-mode', 'Org Mode' => 'org-mode', - 'orig4' => 'orig4', 'Osclass' => 'osclass', 'pamphlets/vinyl-press' => 'pamphlets', 'peertube' => 'peertube', @@ -100,6 +105,7 @@ $platforms = [ 'Sedo' => 'sedo', 'sitebaker' => 'sitebaker', 'SitePad' => 'sitepad', + 'SMAR' => 'smar', 'SPIP' => 'spip', 'STUDIO' => 'studio', 'Synology - Synology DiskStation' => 'synology', @@ -116,20 +122,16 @@ $platforms = [ 'Webflow' => 'webflow', 'WikkaWiki' => 'wikkawiki', 'Wix.com' => 'wix.com', - 'WordPress' => 'wordpress', 'WordPress.com' => 'wordpress', + 'WordPress' => 'wordpress', 'Write.as' => 'write.as', 'XAG/CMS' => 'xagcms', 'Zim' => 'zim', ]; $ap_platforms = [ + 'honk' => 'honk', 'PeerTube' => 'peertube', - 'Hugo' => 'hugo', - 'lemoncurry' => 'lemoncurry', - 'Ghost' => 'ghost', - 'Jekyll' => 'jekyll', - 'Known https://withknown.com' => 'known', ]; $dfrn_platforms = [