Deprecate page_info functions to new PageInfo class
- Add tests for parts not using remote requests - Add scheme requirement for page info URLs - Add policy to keep label from stripped Page Info links
This commit is contained in:
parent
eba964ec12
commit
f3323aff5e
4 changed files with 457 additions and 185 deletions
|
@ -19,209 +19,49 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use Friendica\Core\Hook;
|
/**
|
||||||
use Friendica\Core\Logger;
|
* @deprecated since 2020.06
|
||||||
use Friendica\Core\Protocol;
|
* @see \Friendica\Content\PageInfo::getFooterFromData
|
||||||
use Friendica\Core\Renderer;
|
*/
|
||||||
use Friendica\Core\Session;
|
|
||||||
use Friendica\Database\DBA;
|
|
||||||
use Friendica\DI;
|
|
||||||
use Friendica\Model\Item;
|
|
||||||
use Friendica\Protocol\DFRN;
|
|
||||||
use Friendica\Protocol\Feed;
|
|
||||||
use Friendica\Protocol\OStatus;
|
|
||||||
use Friendica\Util\Network;
|
|
||||||
use Friendica\Util\ParseUrl;
|
|
||||||
use Friendica\Util\Strings;
|
|
||||||
|
|
||||||
require_once __DIR__ . '/../mod/share.php';
|
|
||||||
|
|
||||||
function add_page_info_data(array $data, $no_photos = false)
|
function add_page_info_data(array $data, $no_photos = false)
|
||||||
{
|
{
|
||||||
Hook::callAll('page_info_data', $data);
|
return "\n" . \Friendica\Content\PageInfo::getFooterFromData($data, $no_photos);
|
||||||
|
|
||||||
if (empty($data['type'])) {
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
// It maybe is a rich content, but if it does have everything that a link has,
|
|
||||||
// then treat it that way
|
|
||||||
if (($data["type"] == "rich") && is_string($data["title"]) &&
|
|
||||||
is_string($data["text"]) && !empty($data["images"])) {
|
|
||||||
$data["type"] = "link";
|
|
||||||
}
|
|
||||||
|
|
||||||
$data["title"] = $data["title"] ?? '';
|
|
||||||
|
|
||||||
if ((($data["type"] != "link") && ($data["type"] != "video") && ($data["type"] != "photo")) || ($data["title"] == $data["url"])) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($no_photos && ($data["type"] == "photo")) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Escape some bad characters
|
|
||||||
$data["url"] = str_replace(["[", "]"], ["[", "]"], htmlentities($data["url"], ENT_QUOTES, 'UTF-8', false));
|
|
||||||
$data["title"] = str_replace(["[", "]"], ["[", "]"], htmlentities($data["title"], ENT_QUOTES, 'UTF-8', false));
|
|
||||||
|
|
||||||
$text = "[attachment type='".$data["type"]."'";
|
|
||||||
|
|
||||||
if (empty($data["text"])) {
|
|
||||||
$data["text"] = $data["title"];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (empty($data["text"])) {
|
|
||||||
$data["text"] = $data["url"];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($data["url"])) {
|
|
||||||
$text .= " url='".$data["url"]."'";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($data["title"])) {
|
|
||||||
$text .= " title='".$data["title"]."'";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only embedd a picture link when it seems to be a valid picture ("width" is set)
|
|
||||||
if (!empty($data["images"]) && !empty($data["images"][0]["width"])) {
|
|
||||||
$preview = str_replace(["[", "]"], ["[", "]"], htmlentities($data["images"][0]["src"], ENT_QUOTES, 'UTF-8', false));
|
|
||||||
// if the preview picture is larger than 500 pixels then show it in a larger mode
|
|
||||||
// But only, if the picture isn't higher than large (To prevent huge posts)
|
|
||||||
if (!DI::config()->get('system', 'always_show_preview') && ($data["images"][0]["width"] >= 500)
|
|
||||||
&& ($data["images"][0]["width"] >= $data["images"][0]["height"])) {
|
|
||||||
$text .= " image='".$preview."'";
|
|
||||||
} else {
|
|
||||||
$text .= " preview='".$preview."'";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$text .= "]".$data["text"]."[/attachment]";
|
|
||||||
|
|
||||||
$hashtags = "";
|
|
||||||
if (isset($data["keywords"]) && count($data["keywords"])) {
|
|
||||||
$hashtags = "\n";
|
|
||||||
foreach ($data["keywords"] as $keyword) {
|
|
||||||
/// @TODO make a positive list of allowed characters
|
|
||||||
$hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
|
|
||||||
$hashtags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url] ";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return "\n".$text.$hashtags;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated since 2020.06
|
||||||
|
* @see \Friendica\Content\PageInfo::queryUrl
|
||||||
|
*/
|
||||||
function query_page_info($url, $photo = "", $keywords = false, $keyword_denylist = "")
|
function query_page_info($url, $photo = "", $keywords = false, $keyword_denylist = "")
|
||||||
{
|
{
|
||||||
$data = ParseUrl::getSiteinfoCached($url, true);
|
return \Friendica\Content\PageInfo::queryUrl($url, $photo, $keywords, $keyword_denylist);
|
||||||
|
|
||||||
if ($photo != "") {
|
|
||||||
$data["images"][0]["src"] = $photo;
|
|
||||||
}
|
|
||||||
|
|
||||||
Logger::log('fetch page info for ' . $url . ' ' . print_r($data, true), Logger::DEBUG);
|
|
||||||
|
|
||||||
if (!$keywords && isset($data["keywords"])) {
|
|
||||||
unset($data["keywords"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (($keyword_denylist != "") && isset($data["keywords"])) {
|
|
||||||
$list = explode(", ", $keyword_denylist);
|
|
||||||
|
|
||||||
foreach ($list as $keyword) {
|
|
||||||
$keyword = trim($keyword);
|
|
||||||
|
|
||||||
$index = array_search($keyword, $data["keywords"]);
|
|
||||||
if ($index !== false) {
|
|
||||||
unset($data["keywords"][$index]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $data;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated since 2020.06
|
||||||
|
* @see \Friendica\Content\PageInfo::getTagsFromUrl()
|
||||||
|
*/
|
||||||
function get_page_keywords($url, $photo = "", $keywords = false, $keyword_denylist = "")
|
function get_page_keywords($url, $photo = "", $keywords = false, $keyword_denylist = "")
|
||||||
{
|
{
|
||||||
$data = query_page_info($url, $photo, $keywords, $keyword_denylist);
|
return $keywords ? \Friendica\Content\PageInfo::getTagsFromUrl($url, $photo, $keyword_denylist) : [];
|
||||||
if (empty($data["keywords"]) || !is_array($data["keywords"])) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
$taglist = [];
|
|
||||||
foreach ($data['keywords'] as $keyword) {
|
|
||||||
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
|
|
||||||
["", "", "", "", "", ""], $keyword);
|
|
||||||
|
|
||||||
$taglist[] = $hashtag;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $taglist;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated since 2020.06
|
||||||
|
* @see \Friendica\Content\PageInfo::getFooterFromUrl
|
||||||
|
*/
|
||||||
function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_denylist = "")
|
function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_denylist = "")
|
||||||
{
|
{
|
||||||
$data = query_page_info($url, $photo, $keywords, $keyword_denylist);
|
return "\n" . \Friendica\Content\PageInfo::getFooterFromUrl($url, $no_photos, $photo, $keywords, $keyword_denylist);
|
||||||
|
|
||||||
$text = '';
|
|
||||||
|
|
||||||
if (is_array($data)) {
|
|
||||||
$text = add_page_info_data($data, $no_photos);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $text;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated since 2020.06
|
||||||
|
* @see \Friendica\Content\PageInfo::appendToBody
|
||||||
|
*/
|
||||||
function add_page_info_to_body($body, $texturl = false, $no_photos = false)
|
function add_page_info_to_body($body, $texturl = false, $no_photos = false)
|
||||||
{
|
{
|
||||||
Logger::log('add_page_info_to_body: fetch page info for body ' . $body, Logger::DEBUG);
|
return \Friendica\Content\PageInfo::appendToBody($body, $texturl, $no_photos);
|
||||||
|
|
||||||
$URLSearchString = "^\[\]";
|
|
||||||
|
|
||||||
// Fix for Mastodon where the mentions are in a different format
|
|
||||||
$body = preg_replace("/\[url\=([$URLSearchString]*)\]([#!@])(.*?)\[\/url\]/ism",
|
|
||||||
'$2[url=$1]$3[/url]', $body);
|
|
||||||
|
|
||||||
// Adding these spaces is a quick hack due to my problems with regular expressions :)
|
|
||||||
preg_match("/[^!#@]\[url\]([$URLSearchString]*)\[\/url\]/ism", " " . $body, $matches);
|
|
||||||
|
|
||||||
if (!$matches) {
|
|
||||||
preg_match("/[^!#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " " . $body, $matches);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert urls without bbcode elements
|
|
||||||
if (!$matches && $texturl) {
|
|
||||||
preg_match("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", " ".$body, $matches);
|
|
||||||
|
|
||||||
// Yeah, a hack. I really hate regular expressions :)
|
|
||||||
if ($matches) {
|
|
||||||
$matches[1] = $matches[2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($matches) {
|
|
||||||
$footer = add_page_info($matches[1], $no_photos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove the link from the body if the link is attached at the end of the post
|
|
||||||
if (isset($footer) && (trim($footer) != "") && (strpos($footer, $matches[1]))) {
|
|
||||||
$removedlink = trim(str_replace($matches[1], "", $body));
|
|
||||||
if (($removedlink == "") || strstr($body, $removedlink)) {
|
|
||||||
$body = $removedlink;
|
|
||||||
}
|
|
||||||
|
|
||||||
$removedlink = preg_replace("/\[url\=" . preg_quote($matches[1], '/') . "\](.*?)\[\/url\]/ism", '', $body);
|
|
||||||
if (($removedlink == "") || strstr($body, $removedlink)) {
|
|
||||||
$body = $removedlink;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the page information to the bottom
|
|
||||||
if (isset($footer) && (trim($footer) != "")) {
|
|
||||||
$body .= $footer;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $body;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
269
src/Content/PageInfo.php
Normal file
269
src/Content/PageInfo.php
Normal file
|
@ -0,0 +1,269 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* @copyright Copyright (C) 2020, Friendica
|
||||||
|
*
|
||||||
|
* @license GNU AGPL version 3 or any later version
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Friendica\Content;
|
||||||
|
|
||||||
|
use Friendica\Core\Hook;
|
||||||
|
use Friendica\Core\Logger;
|
||||||
|
use Friendica\DI;
|
||||||
|
use Friendica\Network\HTTPException;
|
||||||
|
use Friendica\Util\ParseUrl;
|
||||||
|
use Friendica\Util\Strings;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts trailing URLs from post bodies to transform them in enriched attachment tags through Site Info query
|
||||||
|
*/
|
||||||
|
class PageInfo
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param string $body
|
||||||
|
* @param bool $searchNakedUrls
|
||||||
|
* @param bool $no_photos
|
||||||
|
* @return string
|
||||||
|
* @throws HTTPException\InternalServerErrorException
|
||||||
|
*/
|
||||||
|
public static function appendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false)
|
||||||
|
{
|
||||||
|
Logger::info('add_page_info_to_body: fetch page info for body', ['body' => $body]);
|
||||||
|
|
||||||
|
$url = self::getRelevantUrlFromBody($body, $searchNakedUrls);
|
||||||
|
if (!$url) {
|
||||||
|
return $body;
|
||||||
|
}
|
||||||
|
|
||||||
|
$footer = self::getFooterFromUrl($url, $no_photos);
|
||||||
|
if (!$footer) {
|
||||||
|
return $body;
|
||||||
|
}
|
||||||
|
|
||||||
|
$body = self::stripTrailingUrlFromBody($body, $url);
|
||||||
|
|
||||||
|
$body .= "\n" . $footer;
|
||||||
|
|
||||||
|
return $body;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $url
|
||||||
|
* @param bool $no_photos
|
||||||
|
* @param string $photo
|
||||||
|
* @param bool $keywords
|
||||||
|
* @param string $keyword_denylist
|
||||||
|
* @return string
|
||||||
|
* @throws HTTPException\InternalServerErrorException
|
||||||
|
*/
|
||||||
|
public static function getFooterFromUrl(string $url, bool $no_photos = false, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
|
||||||
|
{
|
||||||
|
$data = self::queryUrl($url, $photo, $keywords, $keyword_denylist);
|
||||||
|
|
||||||
|
return self::getFooterFromData($data, $no_photos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $data
|
||||||
|
* @param bool $no_photos
|
||||||
|
* @return string
|
||||||
|
* @throws HTTPException\InternalServerErrorException
|
||||||
|
*/
|
||||||
|
public static function getFooterFromData(array $data, bool $no_photos = false)
|
||||||
|
{
|
||||||
|
Hook::callAll('page_info_data', $data);
|
||||||
|
|
||||||
|
if (empty($data['type'])) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// It maybe is a rich content, but if it does have everything that a link has,
|
||||||
|
// then treat it that way
|
||||||
|
if (($data['type'] == 'rich') && is_string($data['title']) &&
|
||||||
|
is_string($data['text']) && !empty($data['images'])) {
|
||||||
|
$data['type'] = 'link';
|
||||||
|
}
|
||||||
|
|
||||||
|
$data['title'] = $data['title'] ?? '';
|
||||||
|
|
||||||
|
if ((($data['type'] != 'link') && ($data['type'] != 'video') && ($data['type'] != 'photo')) || ($data['title'] == $data['url'])) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($no_photos && ($data['type'] == 'photo')) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escape some bad characters
|
||||||
|
$data['url'] = str_replace(['[', ']'], ['[', ']'], htmlentities($data['url'], ENT_QUOTES, 'UTF-8', false));
|
||||||
|
$data['title'] = str_replace(['[', ']'], ['[', ']'], htmlentities($data['title'], ENT_QUOTES, 'UTF-8', false));
|
||||||
|
|
||||||
|
$text = "[attachment type='" . $data['type'] . "'";
|
||||||
|
|
||||||
|
if (empty($data['text'])) {
|
||||||
|
$data['text'] = $data['title'];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($data['text'])) {
|
||||||
|
$data['text'] = $data['url'];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($data['url'])) {
|
||||||
|
$text .= " url='" . $data['url'] . "'";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($data['title'])) {
|
||||||
|
$text .= " title='" . $data['title'] . "'";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only embedd a picture link when it seems to be a valid picture ("width" is set)
|
||||||
|
if (!empty($data['images']) && !empty($data['images'][0]['width'])) {
|
||||||
|
$preview = str_replace(['[', ']'], ['[', ']'], htmlentities($data['images'][0]['src'], ENT_QUOTES, 'UTF-8', false));
|
||||||
|
// if the preview picture is larger than 500 pixels then show it in a larger mode
|
||||||
|
// But only, if the picture isn't higher than large (To prevent huge posts)
|
||||||
|
if (!DI::config()->get('system', 'always_show_preview') && ($data['images'][0]['width'] >= 500)
|
||||||
|
&& ($data['images'][0]['width'] >= $data['images'][0]['height'])) {
|
||||||
|
$text .= " image='" . $preview . "'";
|
||||||
|
} else {
|
||||||
|
$text .= " preview='" . $preview . "'";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$text .= ']' . $data['text'] . '[/attachment]';
|
||||||
|
|
||||||
|
$hashtags = '';
|
||||||
|
if (!empty($data['keywords'])) {
|
||||||
|
$hashtags = "\n";
|
||||||
|
foreach ($data['keywords'] as $keyword) {
|
||||||
|
/// @TODO make a positive list of allowed characters
|
||||||
|
$hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
|
||||||
|
$hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . $hashtag . ']' . $hashtag . '[/url] ';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $text . $hashtags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $url
|
||||||
|
* @param string $photo
|
||||||
|
* @param bool $keywords
|
||||||
|
* @param string $keyword_denylist
|
||||||
|
* @return array|bool
|
||||||
|
* @throws HTTPException\InternalServerErrorException
|
||||||
|
*/
|
||||||
|
public static function queryUrl(string $url, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
|
||||||
|
{
|
||||||
|
$data = ParseUrl::getSiteinfoCached($url, true);
|
||||||
|
|
||||||
|
if ($photo != '') {
|
||||||
|
$data['images'][0]['src'] = $photo;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$keywords) {
|
||||||
|
unset($data['keywords']);
|
||||||
|
} elseif ($keyword_denylist) {
|
||||||
|
$list = explode(', ', $keyword_denylist);
|
||||||
|
|
||||||
|
foreach ($list as $keyword) {
|
||||||
|
$keyword = trim($keyword);
|
||||||
|
|
||||||
|
$index = array_search($keyword, $data['keywords']);
|
||||||
|
if ($index !== false) {
|
||||||
|
unset($data['keywords'][$index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger::info('fetch page info for URL', ['url' => $url, 'data' => $data]);
|
||||||
|
|
||||||
|
return $data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $url
|
||||||
|
* @param string $photo
|
||||||
|
* @param string $keyword_denylist
|
||||||
|
* @return array
|
||||||
|
* @throws HTTPException\InternalServerErrorException
|
||||||
|
*/
|
||||||
|
public static function getTagsFromUrl(string $url, string $photo = '', string $keyword_denylist = '')
|
||||||
|
{
|
||||||
|
$data = self::queryUrl($url, $photo, true, $keyword_denylist);
|
||||||
|
|
||||||
|
$taglist = [];
|
||||||
|
foreach ($data['keywords'] as $keyword) {
|
||||||
|
$hashtag = str_replace([' ', '+', '/', '.', '#', "'"],
|
||||||
|
['', '', '', '', '', ''], $keyword);
|
||||||
|
|
||||||
|
$taglist[] = $hashtag;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $taglist;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Picks a non-hashtag, non-mention, schemeful URL at the end of the provided body string to be converted into Page Info.
|
||||||
|
*
|
||||||
|
* @param string $body
|
||||||
|
* @param bool $searchNakedUrls Whether we should pick a naked URL (outside of BBCode tags) as a last resort
|
||||||
|
* @return string|null
|
||||||
|
*/
|
||||||
|
protected static function getRelevantUrlFromBody(string $body, bool $searchNakedUrls = false)
|
||||||
|
{
|
||||||
|
$URLSearchString = 'https?://[^\[\]]*';
|
||||||
|
|
||||||
|
// Fix for Mastodon where the mentions are in a different format
|
||||||
|
$body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
|
||||||
|
|
||||||
|
preg_match("~(?<![!#@])\[url]($URLSearchString)\[/url]$~is", $body, $matches);
|
||||||
|
|
||||||
|
if (!$matches) {
|
||||||
|
preg_match("~(?<![!#@])\[url=($URLSearchString)].*\[/url]$~is", $body, $matches);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$matches && $searchNakedUrls) {
|
||||||
|
preg_match('~(?<=\W|^)(?<![=\]])(https?://.+)$~is', $body, $matches);
|
||||||
|
if ($matches && !Strings::endsWith($body, $matches[1])) {
|
||||||
|
unset($matches);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $matches[1] ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove the provided URL from the body if it is at the end of it.
|
||||||
|
* Keep the link label if it isn't the full URL.
|
||||||
|
*
|
||||||
|
* @param string $body
|
||||||
|
* @param string $url
|
||||||
|
* @return string|string[]|null
|
||||||
|
*/
|
||||||
|
protected static function stripTrailingUrlFromBody(string $body, string $url)
|
||||||
|
{
|
||||||
|
$quotedUrl = preg_quote($url, '#');
|
||||||
|
$body = preg_replace("#(?:
|
||||||
|
\[url]$quotedUrl\[/url]|
|
||||||
|
\[url=$quotedUrl]$quotedUrl\[/url]|
|
||||||
|
\[url=$quotedUrl]([^[]*?)\[/url]|
|
||||||
|
$quotedUrl
|
||||||
|
)$#isx", '$1', $body);
|
||||||
|
|
||||||
|
return $body;
|
||||||
|
}
|
||||||
|
}
|
38
tests/src/Content/PageInfoMock.php
Normal file
38
tests/src/Content/PageInfoMock.php
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* @copyright Copyright (C) 2020, Friendica
|
||||||
|
*
|
||||||
|
* @license GNU AGPL version 3 or any later version
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Friendica\Test\src\Content;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class PageInfoMock
|
||||||
|
*
|
||||||
|
* Exposes protected methods for test in the inherited class
|
||||||
|
*
|
||||||
|
* @method static string|null getRelevantUrlFromBody(string $body, $searchNakedUrls = false)
|
||||||
|
* @method static string stripTrailingUrlFromBody(string $body, string $url)
|
||||||
|
*/
|
||||||
|
class PageInfoMock extends \Friendica\Content\PageInfo
|
||||||
|
{
|
||||||
|
public static function __callStatic($name, $arguments)
|
||||||
|
{
|
||||||
|
return self::$name(...$arguments);
|
||||||
|
}
|
||||||
|
}
|
125
tests/src/Content/PageInfoTest.php
Normal file
125
tests/src/Content/PageInfoTest.php
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* @copyright Copyright (C) 2020, Friendica
|
||||||
|
*
|
||||||
|
* @license GNU AGPL version 3 or any later version
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Friendica\Test\src\Content;
|
||||||
|
|
||||||
|
use Friendica\Test\MockedTest;
|
||||||
|
|
||||||
|
class PageInfoTest extends MockedTest
|
||||||
|
{
|
||||||
|
public function dataGetRelevantUrlFromBody()
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'end-of-content' => [
|
||||||
|
'expected' => 'http://example.com/end-of-content',
|
||||||
|
'body' => 'Content[url]http://example.com/end-of-content[/url]',
|
||||||
|
],
|
||||||
|
'tag-no-attr' => [
|
||||||
|
'expected' => 'http://example.com/tag-no-attr',
|
||||||
|
'body' => '[url]http://example.com/tag-no-attr[/url]',
|
||||||
|
],
|
||||||
|
'tag-attr' => [
|
||||||
|
'expected' => 'http://example.com/tag-attr',
|
||||||
|
'body' => '[url=http://example.com/tag-attr]Example.com[/url]',
|
||||||
|
],
|
||||||
|
'mention' => [
|
||||||
|
'expected' => null,
|
||||||
|
'body' => '@[url=http://example.com/mention]Mention[/url]',
|
||||||
|
],
|
||||||
|
'mention-exclusive' => [
|
||||||
|
'expected' => null,
|
||||||
|
'body' => '@[url=http://example.com/mention-exclusive]Mention Exclusive[/url]',
|
||||||
|
],
|
||||||
|
'hashtag' => [
|
||||||
|
'expected' => null,
|
||||||
|
'body' => '#[url=http://example.com/hashtag]hashtag[/url]',
|
||||||
|
],
|
||||||
|
'naked-url-unexpected' => [
|
||||||
|
'expected' => null,
|
||||||
|
'body' => 'http://example.com/naked-url-unexpected',
|
||||||
|
],
|
||||||
|
'naked-url-expected' => [
|
||||||
|
'expected' => 'http://example.com/naked-url-expected',
|
||||||
|
'body' => 'http://example.com/naked-url-expected',
|
||||||
|
'searchNakedUrls' => true,
|
||||||
|
],
|
||||||
|
'naked-url-end-of-content-unexpected' => [
|
||||||
|
'expected' => null,
|
||||||
|
'body' => 'Contenthttp://example.com/naked-url-end-of-content-unexpected',
|
||||||
|
'searchNakedUrls' => true,
|
||||||
|
],
|
||||||
|
'naked-url-end-of-content-expected' => [
|
||||||
|
'expected' => 'http://example.com/naked-url-end-of-content-expected',
|
||||||
|
'body' => 'Content http://example.com/naked-url-end-of-content-expected',
|
||||||
|
'searchNakedUrls' => true,
|
||||||
|
],
|
||||||
|
'bug-8781-schemeless-link' => [
|
||||||
|
'expected' => null,
|
||||||
|
'body' => '[url]/posts/2576978090fd0138ee4c005056264835[/url]',
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataGetRelevantUrlFromBody
|
||||||
|
*
|
||||||
|
* @param string|null $expected
|
||||||
|
* @param string $body
|
||||||
|
* @param bool $searchNakedUrls
|
||||||
|
*/
|
||||||
|
public function testGetRelevantUrlFromBody($expected, string $body, bool $searchNakedUrls = false)
|
||||||
|
{
|
||||||
|
$this->assertSame($expected, PageInfoMock::getRelevantUrlFromBody($body, $searchNakedUrls));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function dataStripTrailingUrlFromBody()
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'naked-url-append' => [
|
||||||
|
'expected' => 'content',
|
||||||
|
'body' => 'contenthttps://example.com',
|
||||||
|
'url' => 'https://example.com',
|
||||||
|
],
|
||||||
|
'naked-url-not-at-the-end' => [
|
||||||
|
'expected' => 'https://example.comcontent',
|
||||||
|
'body' => 'https://example.comcontent',
|
||||||
|
'url' => 'https://example.com',
|
||||||
|
],
|
||||||
|
'bug-8781-labeled-link' => [
|
||||||
|
'expected' => 'link label',
|
||||||
|
'body' => '[url=https://example.com]link label[/url]',
|
||||||
|
'url' => 'https://example.com',
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataStripTrailingUrlFromBody
|
||||||
|
*
|
||||||
|
* @param string $expected
|
||||||
|
* @param string $body
|
||||||
|
* @param string $url
|
||||||
|
*/
|
||||||
|
public function testStripTrailingUrlFromBody(string $expected, string $body, string $url)
|
||||||
|
{
|
||||||
|
$this->assertSame($expected, PageInfoMock::stripTrailingUrlFromBody($body, $url));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue