diff --git a/src/Model/Conversation.php b/src/Model/Conversation.php index 3fa17228d..1c2a75b58 100644 --- a/src/Model/Conversation.php +++ b/src/Model/Conversation.php @@ -41,6 +41,8 @@ class Conversation const PARCEL_RDF = 12; const PARCEL_RSS = 13; const PARCEL_ATOM = 14; + const PARCEL_ATOM03 = 15; + const PARCEL_OPML = 16; const PARCEL_TWITTER = 67; const PARCEL_UNKNOWN = 255; diff --git a/src/Protocol/Feed.php b/src/Protocol/Feed.php index f31513b23..8c6b4c28d 100644 --- a/src/Protocol/Feed.php +++ b/src/Protocol/Feed.php @@ -93,11 +93,8 @@ class Feed @$doc->loadXML($xml); $xpath = new DOMXPath($doc); - if (strpos($xml, ActivityNamespace::ATOM03) && !strpos($xml, ActivityNamespace::ATOM1)) { - $xpath->registerNamespace('atom', ActivityNamespace::ATOM03); - } else { - $xpath->registerNamespace('atom', ActivityNamespace::ATOM1); - } + $xpath->registerNamespace('atom', ActivityNamespace::ATOM1); + $xpath->registerNamespace('atom03', ActivityNamespace::ATOM03); $xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); $xpath->registerNamespace('content', 'http://purl.org/rss/1.0/modules/content/'); $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); @@ -106,6 +103,7 @@ class Feed $xpath->registerNamespace('poco', ActivityNamespace::POCO); $author = []; + $atomns = ''; $entries = null; $protocol = Conversation::PARCEL_UNKNOWN; @@ -121,10 +119,23 @@ class Feed $entries = $xpath->query('/rdf:RDF/rss:item'); } + if ($xpath->query('/opml')->length > 0) { + $protocol = Conversation::PARCEL_OPML; + $author['author-name'] = XML::getFirstNodeValue($xpath, '/opml/head/title/text()'); + $entries = $xpath->query('/opml/body/outline'); + } + // Is it Atom? if ($xpath->query('/atom:feed')->length > 0) { $protocol = Conversation::PARCEL_ATOM; - $alternate = XML::getFirstAttributes($xpath, "atom:link[@rel='alternate']"); + $atomns = 'atom'; + } elseif ($xpath->query('/atom03:feed')->length > 0) { + $protocol = Conversation::PARCEL_ATOM03; + $atomns = 'atom03'; + } + + if (!empty($atomns)) { + $alternate = XML::getFirstAttributes($xpath, $atomns . ":link[@rel='alternate']"); if (is_object($alternate)) { foreach ($alternate as $attribute) { if ($attribute->name == 'href') { @@ -134,7 +145,7 @@ class Feed } if (empty($author['author-link'])) { - $self = XML::getFirstAttributes($xpath, "atom:link[@rel='self']"); + $self = XML::getFirstAttributes($xpath, $atomns . ":link[@rel='self']"); if (is_object($self)) { foreach ($self as $attribute) { if ($attribute->name == 'href') { @@ -145,50 +156,50 @@ class Feed } if (empty($author['author-link'])) { - $author['author-link'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:id/text()'); + $author['author-link'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':id/text()'); } - $author['author-avatar'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:logo/text()'); + $author['author-avatar'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':logo/text()'); - $author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:title/text()'); + $author['author-name'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':title/text()'); if (empty($author['author-name'])) { - $author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:subtitle/text()'); + $author['author-name'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':subtitle/text()'); } if (empty($author['author-name'])) { - $author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:name/text()'); + $author['author-name'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':author/' . $atomns . ':name/text()'); } - $value = XML::getFirstNodeValue($xpath, 'atom:author/poco:displayName/text()'); + $value = XML::getFirstNodeValue($xpath, '' . $atomns . ':author/poco:displayName/text()'); if ($value != '') { $author['author-name'] = $value; } if ($dryRun) { - $author['author-id'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:id/text()'); + $author['author-id'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':author/' . $atomns . ':id/text()'); // See https://tools.ietf.org/html/rfc4287#section-3.2.2 - $value = XML::getFirstNodeValue($xpath, 'atom:author/atom:uri/text()'); + $value = XML::getFirstNodeValue($xpath, $atomns . ':author/' . $atomns . ':uri/text()'); if ($value != '') { $author['author-link'] = $value; } - $value = XML::getFirstNodeValue($xpath, 'atom:author/poco:preferredUsername/text()'); + $value = XML::getFirstNodeValue($xpath, $atomns . ':author/poco:preferredUsername/text()'); if ($value != '') { $author['author-nick'] = $value; } - $value = XML::getFirstNodeValue($xpath, 'atom:author/poco:address/poco:formatted/text()'); + $value = XML::getFirstNodeValue($xpath, $atomns . ':author/poco:address/poco:formatted/text()'); if ($value != '') { $author['author-location'] = $value; } - $value = XML::getFirstNodeValue($xpath, 'atom:author/poco:note/text()'); + $value = XML::getFirstNodeValue($xpath, $atomns . ':author/poco:note/text()'); if ($value != '') { $author['author-about'] = $value; } - $avatar = XML::getFirstAttributes($xpath, "atom:author/atom:link[@rel='avatar']"); + $avatar = XML::getFirstAttributes($xpath, $atomns . ":author/' . $atomns . ':link[@rel='avatar']"); if (is_object($avatar)) { foreach ($avatar as $attribute) { if ($attribute->name == 'href') { @@ -198,11 +209,11 @@ class Feed } } - $author['edited'] = $author['created'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:updated/text()'); + $author['edited'] = $author['created'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':updated/text()'); - $author['app'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:generator/text()'); + $author['app'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':generator/text()'); - $entries = $xpath->query('/atom:feed/atom:entry'); + $entries = $xpath->query('/' . $atomns . ':feed/' . $atomns . ':entry'); } // Is it RSS? @@ -299,9 +310,9 @@ class Feed $item = array_merge($header, $author); - $alternate = XML::getFirstAttributes($xpath, "atom:link[@rel='alternate']", $entry); + $alternate = XML::getFirstAttributes($xpath, $atomns . ":link[@rel='alternate']", $entry); if (!is_object($alternate)) { - $alternate = XML::getFirstAttributes($xpath, 'atom:link', $entry); + $alternate = XML::getFirstAttributes($xpath, $atomns . ':link', $entry); } if (is_object($alternate)) { foreach ($alternate as $attribute) { @@ -311,6 +322,40 @@ class Feed } } + if ($entry->nodeName == 'outline') { + $isrss = false; + $plink = ''; + $uri = ''; + foreach ($entry->attributes as $attribute) { + switch ($attribute->nodeName) { + case 'title': + $item['title'] = $attribute->nodeValue; + break; + + case 'text': + $body = $attribute->nodeValue; + break; + + case 'htmlUrl': + $plink = $attribute->nodeValue; + break; + + case 'xmlUrl': + $uri = $attribute->nodeValue; + break; + + case 'type': + $isrss = $attribute->nodeValue == 'rss'; + break; + } + } + $item['plink'] = $plink ?: $uri; + $item['uri'] = $uri ?: $plink; + if (!$isrss || empty($item['uri'])) { + continue; + } + } + if (empty($item['plink'])) { $item['plink'] = XML::getFirstNodeValue($xpath, 'link/text()', $entry); } @@ -322,7 +367,9 @@ class Feed // Add the base path if missing $item['plink'] = Network::addBasePath($item['plink'], $basepath); - $item['uri'] = XML::getFirstNodeValue($xpath, 'atom:id/text()', $entry); + if (empty($item['uri'])) { + $item['uri'] = XML::getFirstNodeValue($xpath, $atomns . ':id/text()', $entry); + } $guid = XML::getFirstNodeValue($xpath, 'guid/text()', $entry); if (!empty($guid)) { @@ -344,7 +391,9 @@ class Feed Logger::notice('Item URL couldn\'t get expanded', ['url' => $item['plink'], 'exception' => $exception]); } - $item['title'] = XML::getFirstNodeValue($xpath, 'atom:title/text()', $entry); + if (empty($item['title'])) { + $item['title'] = XML::getFirstNodeValue($xpath, $atomns . ':title/text()', $entry); + } if (empty($item['title'])) { $item['title'] = XML::getFirstNodeValue($xpath, 'title/text()', $entry); @@ -360,7 +409,7 @@ class Feed $item['title'] = html_entity_decode($item['title'], ENT_QUOTES, 'UTF-8'); - $published = XML::getFirstNodeValue($xpath, 'atom:published/text()', $entry); + $published = XML::getFirstNodeValue($xpath, $atomns . ':published/text()', $entry); if (empty($published)) { $published = XML::getFirstNodeValue($xpath, 'pubDate/text()', $entry); @@ -370,7 +419,7 @@ class Feed $published = XML::getFirstNodeValue($xpath, 'dc:date/text()', $entry); } - $updated = XML::getFirstNodeValue($xpath, 'atom:updated/text()', $entry); + $updated = XML::getFirstNodeValue($xpath, $atomns . ':updated/text()', $entry); if (empty($updated) && !empty($published)) { $updated = $published; @@ -406,7 +455,7 @@ class Feed $creator = XML::getFirstNodeValue($xpath, 'author/text()', $entry); if (empty($creator)) { - $creator = XML::getFirstNodeValue($xpath, 'atom:author/atom:name/text()', $entry); + $creator = XML::getFirstNodeValue($xpath, $atomns . ':author/' . $atomns . ':name/text()', $entry); } if (empty($creator)) { @@ -429,33 +478,35 @@ class Feed $attachments = []; - $enclosures = $xpath->query("enclosure|atom:link[@rel='enclosure']", $entry); - foreach ($enclosures as $enclosure) { - $href = ''; - $length = null; - $type = null; + $enclosures = $xpath->query("enclosure|' . $atomns . ':link[@rel='enclosure']", $entry); + if (!empty($enclosures)) { + foreach ($enclosures as $enclosure) { + $href = ''; + $length = null; + $type = null; - foreach ($enclosure->attributes as $attribute) { - if (in_array($attribute->name, ['url', 'href'])) { - $href = $attribute->textContent; - } elseif ($attribute->name == 'length') { - $length = (int)$attribute->textContent; - } elseif ($attribute->name == 'type') { - $type = $attribute->textContent; + foreach ($enclosure->attributes as $attribute) { + if (in_array($attribute->name, ['url', 'href'])) { + $href = $attribute->textContent; + } elseif ($attribute->name == 'length') { + $length = (int)$attribute->textContent; + } elseif ($attribute->name == 'type') { + $type = $attribute->textContent; + } } - } - if (!empty($href)) { - $attachment = ['uri-id' => -1, 'type' => Post\Media::UNKNOWN, 'url' => $href, 'mimetype' => $type, 'size' => $length]; + if (!empty($href)) { + $attachment = ['uri-id' => -1, 'type' => Post\Media::UNKNOWN, 'url' => $href, 'mimetype' => $type, 'size' => $length]; - $attachment = Post\Media::fetchAdditionalData($attachment); + $attachment = Post\Media::fetchAdditionalData($attachment); - // By now we separate the visible media types (audio, video, image) from the rest - // In the future we should try to avoid the DOCUMENT type and only use the real one - but not in the RC phase. - if (!in_array($attachment['type'], [Post\Media::AUDIO, Post\Media::IMAGE, Post\Media::VIDEO])) { - $attachment['type'] = Post\Media::DOCUMENT; - } - $attachments[] = $attachment; + // By now we separate the visible media types (audio, video, image) from the rest + // In the future we should try to avoid the DOCUMENT type and only use the real one - but not in the RC phase. + if (!in_array($attachment['type'], [Post\Media::AUDIO, Post\Media::IMAGE, Post\Media::VIDEO])) { + $attachment['type'] = Post\Media::DOCUMENT; + } + $attachments[] = $attachment; + } } } @@ -465,13 +516,15 @@ class Feed $taglist[] = $category->nodeValue; } - $body = trim(XML::getFirstNodeValue($xpath, 'atom:content/text()', $entry)); + if (empty($body)) { + $body = trim(XML::getFirstNodeValue($xpath, $atomns . ':content/text()', $entry)); + } if (empty($body)) { $body = trim(XML::getFirstNodeValue($xpath, 'content:encoded/text()', $entry)); } - $summary = trim(XML::getFirstNodeValue($xpath, 'atom:summary/text()', $entry)); + $summary = trim(XML::getFirstNodeValue($xpath, $atomns . ':summary/text()', $entry)); if (empty($summary)) { $summary = trim(XML::getFirstNodeValue($xpath, 'description/text()', $entry));