From aca4b2eaaeb0e1485b7145f73ca46cf8e667cc77 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 27 Mar 2023 06:40:22 +0000 Subject: [PATCH] Added support for "Tumblr Neue Post Format" --- src/Content/Text/NPF.php | 465 +++++++++++++++++++++++++++++++++++++++ src/Model/Post/Media.php | 11 + 2 files changed, 476 insertions(+) create mode 100644 src/Content/Text/NPF.php diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php new file mode 100644 index 000000000..608cfc2e3 --- /dev/null +++ b/src/Content/Text/NPF.php @@ -0,0 +1,465 @@ +. + * + */ + +namespace Friendica\Content\Text; + +use DOMDocument; +use DOMElement; +use DOMNode; +use Friendica\Model\Photo; +use Friendica\Model\Post; + +/** + * Tumblr Neue Post Format + * @see https://www.tumblr.com/docs/npf + */ +class NPF +{ + static public function fromBBCode(string $bbcode, int $uri_id): array + { + $npf = []; + + $bbcode = self::prepareBody($bbcode); + + $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS); + if (empty($html)) { + return []; + } + + $doc = new DOMDocument(); + if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) { + return []; + } + + $node = $doc->getElementsByTagName('body')->item(0); + foreach ($node->childNodes as $child) { + $npf = self::routeElements($child, $uri_id, $npf); + } + + return self::addLinkBlock($uri_id, $npf); + } + + public static function prepareBody(string $body): string + { + $shared = BBCode::fetchShareAttributes($body); + if (!empty($shared)) { + $body = $shared['shared']; + } + + $body = BBCode::removeAttachment($body); + + $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body); + + if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) { + foreach ($pictures as $picture) { + if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) { + $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body); + } + } + } + + $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body); + + if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) { + foreach ($pictures as $picture) { + if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) { + $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body); + } + } + } + + $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body); + $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body); + $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body); + + do { + $oldbody = $body; + $body = str_replace(["\n\n\n"], ["\n\n"], $body); + } while ($oldbody != $body); + + return trim($body); + } + + static private function routeElements(DOMElement|DOMNode $child, int $uri_id, array $npf): array + { + switch ($child->tagName ?? '') { + case 'blockquote': + $npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); + break; + + case 'h1': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + break; + + case 'h2': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + break; + + case 'h3': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + break; + + case 'h4': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + break; + + case 'h5': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + break; + + case 'h6': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + break; + + case 'ul': + $npf = self::addListBlock($child, $uri_id, $npf, false, 0); + break; + + case 'ol': + $npf = self::addListBlock($child, $uri_id, $npf, true, 0); + break; + + case 'hr': + case 'br': + break; + + case 'pre': + case 'code': + $npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); + break; + + case 'a': + $npf = self::addMediaBlock($child, $uri_id, $npf); + break; + + case 'table': + // $child->ownerDocument->saveHTML($child) + break; + + case 'img': + $npf = self::addImageBlock($child, $uri_id, $npf); + break; + + default: + $npf = self::addTextBlock($child, $uri_id, $npf); + break; + } + return $npf; + } + + static private function addImageBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array + { + $attributes = []; + foreach ($child->attributes as $key => $attribute) { + $attributes[$key] = $attribute->value; + } + if (empty($attributes['src'])) { + return $npf; + } + + $entry = [ + 'type' => 'image', + 'media' => [], + ]; + + if (!empty($attributes['alt'])) { + $entry['alt_text'] = $attributes['alt']; + } + + if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) { + $entry['caption'] = $attributes['title']; + } + + $rid = Photo::ridFromURI($attributes['src']); + if (!empty($rid)) { + $photos = Photo::selectToArray([], ['resource-id' => $rid]); + foreach ($photos as $photo) { + $entry['media'][] = [ + 'type' => $photo['type'], + 'url' => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']), + 'width' => $photo['width'], + 'height' => $photo['height'], + ]; + } + if (empty($attributes['alt']) && !empty($photos[0]['desc'])) { + $entry['alt_text'] = $photos[0]['desc']; + } + } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) { + $entry['media'][] = [ + 'type' => $media['mimetype'], + 'url' => $media['url'], + 'width' => $media['width'], + 'height' => $media['height'], + ]; + if (empty($attributes['alt']) && !empty($media['description'])) { + $entry['alt_text'] = $media['description']; + } + } else { + $entry['media'][] = ['url' => $attributes['src']]; + } + + $npf[] = $entry; + + return $npf; + } + + static private function addMediaBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array + { + $attributes = []; + foreach ($child->attributes as $key => $attribute) { + $attributes[$key] = $attribute->value; + } + if (empty($attributes['href'])) { + return $npf; + } + + $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]); + if (!empty($media)) { + switch ($media['type']) { + case Post\Media::AUDIO: + $entry = [ + 'type' => 'audio', + 'media' => [ + 'type' => $media['mimetype'], + 'url' => $media['url'], + ] + ]; + + if (!empty($media['name'])) { + $entry['title'] = $media['name']; + } elseif (!empty($media['description'])) { + $entry['title'] = $media['description']; + } + + $npf[] = self::addPoster($media, $entry); + break; + + case Post\Media::VIDEO: + $entry = [ + 'type' => 'video', + 'media' => [ + 'type' => $media['mimetype'], + 'url' => $media['url'], + ] + ]; + + $npf[] = self::addPoster($media, $entry); + break; + } + } else { + $npf[] = [ + 'type' => 'text', + 'text' => $child->textContent, + 'formatting' => [ + 'start' => 0, + 'end' => strlen($child->textContent), + 'type' => 'link', + 'url' => $attributes['href'] + ] + ]; + } + return $npf; + } + + static private function addPoster(array $media, array $entry): array + { + $poster = []; + if (!empty($media['preview'])) { + $poster['url'] = $media['preview']; + } + if (!empty($media['preview-width'])) { + $poster['width'] = $media['preview-width']; + } + if (!empty($media['preview-height'])) { + $poster['height'] = $media['preview-height']; + } + if (!empty($poster)) { + $entry['poster'] = $poster; + } + return $entry; + } + + static private function fetchText(DOMElement|DOMNode $child, array $text = ['text' => '', 'formatting' => []]): array + { + foreach ($child->childNodes as $node) { + $start = strlen($text['text']); + + switch ($node->nodeName) { + case 'b': + case 'strong': + $type = 'bold'; + break; + + case 'i': + case 'em': + $type = 'italic'; + break; + + case 's': + $type = 'strikethrough'; + break; + + default: + $type = ''; + break; + } + if ($node->nodeName == 'br') { + $text['text'] .= "\n"; + } else { + $text['text'] .= $node->textContent; + } + if (!empty($type)) { + $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type]; + } + } + return $text; + } + + static private function addTextBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, string $subtype = ''): array + { + if (empty($subtype) && (($child->childElementCount) ?? 0 == 1) && ($child->textContent == $child->firstChild->textContent)) { + return self::routeElements($child->firstChild, $uri_id, $npf); + } + + $element = ['type' => 'text']; + + if (!empty($subtype)) { + $element['subtype'] = $subtype; + } + + $text = self::fetchText($child); + + $element['text'] = $text['text']; + $element['formatting'] = $text['formatting']; + + if (empty($subtype)) { + switch ($child->tagName ?? '') { + case 'b': + case 'strong': + $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'bold']; + break; + + case 'i': + case 'em': + $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'italic']; + break; + + case 's': + $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'strikethrough']; + break; + + case 'span': + case 'p': + case 'div': + case 'details'; + case '': + break; + default: + print_r($element); + die($child->tagName . "\n"); + break; + } + } + + if (empty($element['formatting'])) { + unset($element['formatting']); + } + + $npf[] = $element; + + return $npf; + } + + static private function addListBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, bool $ordered, int $level): array + { + foreach ($child->childNodes as $node) { + switch ($node->nodeName) { + case 'ul': + $npf = self::addListBlock($node, $uri_id, $npf, false, $level++); + case 'ol': + $npf = self::addListBlock($node, $uri_id, $npf, true, $level++); + case 'li': + $text = self::fetchText($node); + + $entry = [ + 'type' => 'text', + 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item', + 'text' => $text['text'] + ]; + if ($level > 0) { + $entry['indent_level'] = $level; + } + if (!empty($text['formatting'])) { + $entry['formatting'] = $text['formatting']; + } + $npf[] = $entry; + } + } + + return $npf; + } + + static private function addLinkBlock(int $uri_id, array $npf): array + { + foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { + $host = parse_url($link['url'], PHP_URL_HOST); + if (in_array($host, ['www.youtube.com', 'youtu.be'])) { + $entry = [ + 'type' => 'video', + 'provider' => 'youtube', + 'url' => $link['url'], + ]; + } elseif (in_array($host, ['vimeo.com'])) { + $entry = [ + 'type' => 'video', + 'provider' => 'vimeo', + 'url' => $link['url'], + ]; + } elseif (in_array($host, ['open.spotify.com'])) { + $entry = [ + 'type' => 'audio', + 'provider' => 'spotify', + 'url' => $link['url'], + ]; + } else { + $entry = [ + 'type' => 'link', + 'url' => $link['url'], + ]; + if (!empty($link['name'])) { + $entry['title'] = $link['name']; + } + if (!empty($link['description'])) { + $entry['description'] = $link['description']; + } + if (!empty($link['author-name'])) { + $entry['author'] = $link['author-name']; + } + if (!empty($link['publisher-name'])) { + $entry['site_name'] = $link['publisher-name']; + } + } + + $npf[] = self::addPoster($link, $entry); + } + return $npf; + } +} diff --git a/src/Model/Post/Media.php b/src/Model/Post/Media.php index 0a9557bc6..e84513836 100644 --- a/src/Model/Post/Media.php +++ b/src/Model/Post/Media.php @@ -757,6 +757,17 @@ class Media return DBA::selectToArray('post-media', [], $condition, ['order' => ['id']]); } + public static function getByURL(int $uri_id, string $url, array $types = []) + { + $condition = ["`uri-id` = ? AND `url` = ? AND `type` != ?", $uri_id, $url, self::UNKNOWN]; + + if (!empty($types)) { + $condition = DBA::mergeConditions($condition, ['type' => $types]); + } + + return DBA::selectFirst('post-media', [], $condition); + } + /** * Retrieves the media attachment with the provided media id. *