2018-11-08 13:33:28 +00:00
< ? php
/**
2023-01-01 14:36:24 +00:00
* @ copyright Copyright ( C ) 2010 - 2023 , the Friendica project
2020-02-09 15:18:46 +00:00
*
* @ license GNU AGPL version 3 or any later version
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation , either version 3 of the
* License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU Affero General Public License for more details .
*
* You should have received a copy of the GNU Affero General Public License
* along with this program . If not , see < https :// www . gnu . org / licenses />.
*
2018-11-08 13:33:28 +00:00
*/
namespace Friendica\Util ;
2018-11-08 16:58:42 +00:00
use Friendica\Content\ContactSelector ;
use Friendica\Core\Logger ;
2022-12-02 13:10:16 +00:00
use Friendica\Core\System ;
2022-11-23 18:45:58 +00:00
use ParagonIE\ConstantTime\Base64 ;
2018-11-08 16:58:42 +00:00
2018-11-08 13:33:28 +00:00
/**
2020-01-19 06:05:23 +00:00
* This class handles string functions
2018-11-08 13:33:28 +00:00
*/
class Strings
{
2019-10-12 10:28:18 +00:00
/**
2020-01-19 06:05:23 +00:00
* Generates a pseudo - random string of hexadecimal characters
2019-10-12 10:28:18 +00:00
*
2022-06-19 22:51:59 +00:00
* @ param int $size Size of string ( default : 64 )
2022-08-18 20:05:00 +00:00
*
2022-06-19 22:51:59 +00:00
* @ return string Pseudo - random string
2019-10-12 10:28:18 +00:00
* @ throws \Exception
*/
2022-06-19 22:51:59 +00:00
public static function getRandomHex ( int $size = 64 ) : string
2019-10-12 10:28:18 +00:00
{
$byte_size = ceil ( $size / 2 );
$bytes = random_bytes ( $byte_size );
$return = substr ( bin2hex ( $bytes ), 0 , $size );
return $return ;
}
/**
* Checks , if the given string is a valid hexadecimal code
*
* @ param string $hexCode
* @ return bool
*/
2022-06-19 22:51:59 +00:00
public static function isHex ( string $hexCode ) : bool
2019-10-12 10:28:18 +00:00
{
return ! empty ( $hexCode ) ? @ preg_match ( " /^[a-f0-9] { 2,} $ /i " , $hexCode ) && ! ( strlen ( $hexCode ) & 1 ) : false ;
}
/**
2020-01-19 06:05:23 +00:00
* Use this on " body " or " content " input where angle chars shouldn ' t be removed ,
2019-10-12 10:28:18 +00:00
* and allow them to be safely displayed .
* @ param string $string
*
* @ return string
*/
public static function escapeHtml ( $string )
{
return htmlspecialchars ( $string , ENT_COMPAT , 'UTF-8' , false );
}
/**
2020-01-19 06:05:23 +00:00
* Generate a string that ' s random , but usually pronounceable . Used to generate initial passwords
2019-10-12 10:28:18 +00:00
*
* @ param int $len length
* @ return string
*/
2022-06-19 22:51:59 +00:00
public static function getRandomName ( int $len ) : string
2019-10-12 10:28:18 +00:00
{
if ( $len <= 0 ) {
return '' ;
}
$vowels = [ 'a' , 'a' , 'ai' , 'au' , 'e' , 'e' , 'e' , 'ee' , 'ea' , 'i' , 'ie' , 'o' , 'ou' , 'u' ];
if ( mt_rand ( 0 , 5 ) == 4 ) {
$vowels [] = 'y' ;
}
$cons = [
'b' , 'bl' , 'br' ,
'c' , 'ch' , 'cl' , 'cr' ,
'd' , 'dr' ,
'f' , 'fl' , 'fr' ,
'g' , 'gh' , 'gl' , 'gr' ,
'h' ,
'j' ,
'k' , 'kh' , 'kl' , 'kr' ,
'l' ,
'm' ,
'n' ,
'p' , 'ph' , 'pl' , 'pr' ,
'qu' ,
'r' , 'rh' ,
's' , 'sc' , 'sh' , 'sm' , 'sp' , 'st' ,
't' , 'th' , 'tr' ,
'v' ,
'w' , 'wh' ,
'x' ,
'z' , 'zh'
];
$midcons = [
'ck' , 'ct' , 'gn' , 'ld' , 'lf' , 'lm' , 'lt' , 'mb' , 'mm' , 'mn' , 'mp' ,
'nd' , 'ng' , 'nk' , 'nt' , 'rn' , 'rp' , 'rt'
];
$noend = [
'bl' , 'br' , 'cl' , 'cr' , 'dr' , 'fl' , 'fr' , 'gl' , 'gr' ,
'kh' , 'kl' , 'kr' , 'mn' , 'pl' , 'pr' , 'rh' , 'tr' , 'qu' , 'wh' , 'q'
];
$start = mt_rand ( 0 , 2 );
if ( $start == 0 ) {
$table = $vowels ;
} else {
$table = $cons ;
}
$word = '' ;
for ( $x = 0 ; $x < $len ; $x ++ ) {
$r = mt_rand ( 0 , count ( $table ) - 1 );
$word .= $table [ $r ];
if ( $table == $vowels ) {
$table = array_merge ( $cons , $midcons );
} else {
$table = $vowels ;
}
}
$word = substr ( $word , 0 , $len );
foreach ( $noend as $noe ) {
$noelen = strlen ( $noe );
if (( strlen ( $word ) > $noelen ) && ( substr ( $word , - $noelen ) == $noe )) {
$word = self :: getRandomName ( $len );
break ;
}
}
return $word ;
}
/**
* Translate and format the network name of a contact
*
* @ param string $network Network name of the contact ( e . g . dfrn , rss and so on )
* @ param string $url The contact url
2022-08-18 20:05:00 +00:00
*
2019-10-12 10:28:18 +00:00
* @ return string Formatted network name
* @ throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
2022-06-16 18:42:40 +00:00
public static function formatNetworkName ( string $network , string $url = '' ) : string
2019-10-12 10:28:18 +00:00
{
if ( $network != '' ) {
if ( $url != '' ) {
$network_name = '<a href="' . $url . '">' . ContactSelector :: networkToName ( $network , $url ) . '</a>' ;
} else {
$network_name = ContactSelector :: networkToName ( $network );
}
return $network_name ;
}
2022-06-16 18:42:40 +00:00
return '' ;
2019-10-12 10:28:18 +00:00
}
/**
2020-01-19 06:05:23 +00:00
* Remove indentation from a text
2019-10-12 10:28:18 +00:00
*
* @ param string $text String to be transformed .
* @ param string $chr Optional . Indentation tag . Default tab ( \t ) .
* @ param int $count Optional . Default null .
*
* @ return string Transformed string .
*/
2022-06-19 22:51:59 +00:00
public static function deindent ( string $text , string $chr = " [ \t ] " , int $count = null ) : string
2019-10-12 10:28:18 +00:00
{
$lines = explode ( " \n " , $text );
if ( is_null ( $count )) {
$m = [];
$k = 0 ;
while ( $k < count ( $lines ) && strlen ( $lines [ $k ]) == 0 ) {
$k ++ ;
}
preg_match ( " |^ " . $chr . " *| " , $lines [ $k ], $m );
$count = strlen ( $m [ 0 ]);
}
for ( $k = 0 ; $k < count ( $lines ); $k ++ ) {
$lines [ $k ] = preg_replace ( " |^ " . $chr . " { " . $count . " }| " , " " , $lines [ $k ]);
}
return implode ( " \n " , $lines );
}
/**
2020-01-19 06:05:23 +00:00
* Get byte size returned in a Data Measurement ( KB , MB , GB )
2019-10-12 10:28:18 +00:00
*
* @ param int $bytes The number of bytes to be measured
* @ param int $precision Optional . Default 2.
*
* @ return string Size with measured units .
*/
2022-06-19 22:51:59 +00:00
public static function formatBytes ( int $bytes , int $precision = 2 ) : string
2019-10-12 10:28:18 +00:00
{
2022-11-30 03:35:18 +00:00
// If this method is called for an infinite (== unlimited) amount of bytes:
if ( $bytes == INF ) {
return INF ;
}
2022-11-30 03:37:08 +00:00
$units = [ 'B' , 'KiB' , 'MiB' , 'GiB' , 'TiB' ];
2019-10-12 10:28:18 +00:00
$bytes = max ( $bytes , 0 );
$pow = floor (( $bytes ? log ( $bytes ) : 0 ) / log ( 1024 ));
$pow = min ( $pow , count ( $units ) - 1 );
$bytes /= pow ( 1024 , $pow );
return round ( $bytes , $precision ) . ' ' . $units [ $pow ];
}
/**
2020-01-19 06:05:23 +00:00
* Protect percent characters in sprintf calls
2019-10-12 10:28:18 +00:00
*
* @ param string $s String to transform .
* @ return string Transformed string .
*/
2022-06-19 22:51:59 +00:00
public static function protectSprintf ( string $s ) : string
2019-10-12 10:28:18 +00:00
{
return str_replace ( '%' , '%%' , $s );
}
/**
2020-01-19 06:05:23 +00:00
* Base64 Encode URL and translate +/ to - _ Optionally strip padding .
2019-10-12 10:28:18 +00:00
*
* @ param string $s URL to encode
* @ param boolean $strip_padding Optional . Default false
* @ return string Encoded URL
2022-11-23 18:45:58 +00:00
* @ see https :// web . archive . org / web / 20160506073138 / http :// salmon - protocol . googlecode . com : 80 / svn / trunk / draft - panzer - magicsig - 01. html #params
2019-10-12 10:28:18 +00:00
*/
2022-06-19 22:51:59 +00:00
public static function base64UrlEncode ( string $s , bool $strip_padding = false ) : string
2019-10-12 10:28:18 +00:00
{
if ( $strip_padding ) {
2022-11-23 18:45:58 +00:00
$s = Base64 :: encodeUnpadded ( $s );
} else {
$s = Base64 :: encode ( $s );
2019-10-12 10:28:18 +00:00
}
2022-11-23 18:45:58 +00:00
return strtr ( $s , '+/' , '-_' );
2019-10-12 10:28:18 +00:00
}
/**
2020-01-19 06:05:23 +00:00
* Decode Base64 Encoded URL and translate - _ to +/
2019-10-12 10:28:18 +00:00
*
2022-06-19 22:51:59 +00:00
* @ param string $s URL to decode
2019-10-12 10:28:18 +00:00
* @ return string Decoded URL
* @ throws \Exception
2022-11-23 18:45:58 +00:00
* @ see https :// web . archive . org / web / 20160506073138 / http :// salmon - protocol . googlecode . com : 80 / svn / trunk / draft - panzer - magicsig - 01. html #params
2019-10-12 10:28:18 +00:00
*/
2022-06-19 22:51:59 +00:00
public static function base64UrlDecode ( string $s ) : string
2019-10-12 10:28:18 +00:00
{
2022-11-23 18:45:58 +00:00
return Base64 :: decode ( strtr ( $s , '-_' , '+/' ));
2019-10-12 10:28:18 +00:00
}
/**
2020-01-19 06:05:23 +00:00
* Normalize url
2019-10-12 10:28:18 +00:00
*
* @ param string $url URL to be normalized .
* @ return string Normalized URL .
*/
2022-06-19 22:51:59 +00:00
public static function normaliseLink ( string $url ) : string
2019-10-12 10:28:18 +00:00
{
$ret = str_replace ([ 'https:' , '//www.' ], [ 'http:' , '//' ], $url );
return rtrim ( $ret , '/' );
}
/**
2020-01-19 06:05:23 +00:00
* Normalize OpenID identity
2019-10-12 10:28:18 +00:00
*
* @ param string $s OpenID Identity
* @ return string normalized OpenId Identity
*/
2022-06-19 22:51:59 +00:00
public static function normaliseOpenID ( string $s ) : string
2019-10-12 10:28:18 +00:00
{
return trim ( str_replace ([ 'http://' , 'https://' ], [ '' , '' ], $s ), '/' );
}
/**
2020-01-19 06:05:23 +00:00
* Compare two URLs to see if they are the same , but ignore
2019-10-12 10:28:18 +00:00
* slight but hopefully insignificant differences such as if one
* is https and the other isn ' t , or if one is www . something and
* the other isn ' t - and also ignore case differences .
*
* @ param string $a first url
* @ param string $b second url
* @ return boolean True if the URLs match , otherwise False
*
*/
2022-06-19 22:51:59 +00:00
public static function compareLink ( string $a , string $b ) : bool
2019-10-12 10:28:18 +00:00
{
return ( strcasecmp ( self :: normaliseLink ( $a ), self :: normaliseLink ( $b )) === 0 );
}
/**
* Ensures the provided URI has its query string punctuation in order .
*
* @ param string $uri
* @ return string
*/
2022-06-19 22:51:59 +00:00
public static function ensureQueryParameter ( string $uri ) : string
2019-10-12 10:28:18 +00:00
{
if ( strpos ( $uri , '?' ) === false && ( $pos = strpos ( $uri , '&' )) !== false ) {
$uri = substr ( $uri , 0 , $pos ) . '?' . substr ( $uri , $pos + 1 );
}
return $uri ;
}
/**
* Check if the trimmed provided string is starting with one of the provided characters
*
* @ param string $string
2022-08-18 20:05:00 +00:00
* @ param array $chars
*
2019-10-12 10:28:18 +00:00
* @ return bool
*/
2022-06-19 22:51:59 +00:00
public static function startsWithChars ( string $string , array $chars ) : bool
2019-10-12 10:28:18 +00:00
{
$return = in_array ( substr ( trim ( $string ), 0 , 1 ), $chars );
return $return ;
}
2020-05-18 05:05:38 +00:00
/**
* Check if the first string starts with the second
*
2020-06-17 06:02:13 +00:00
* @ see http :// maettig . com / code / php / php - performance - benchmarks . php #startswith
2020-05-18 05:05:38 +00:00
* @ param string $string
* @ param string $start
* @ return bool
*/
2022-06-19 22:51:59 +00:00
public static function startsWith ( string $string , string $start ) : bool
2020-05-18 05:05:38 +00:00
{
$return = substr_compare ( $string , $start , 0 , strlen ( $start )) === 0 ;
return $return ;
}
2020-06-17 06:02:13 +00:00
/**
* Checks if the first string ends with the second
*
* @ see http :// maettig . com / code / php / php - performance - benchmarks . php #endswith
* @ param string $string
* @ param string $end
2022-08-18 20:05:00 +00:00
*
2020-06-17 06:02:13 +00:00
* @ return bool
*/
2022-08-18 20:05:00 +00:00
public static function endsWith ( string $string , string $end ) : bool
2020-06-17 06:02:13 +00:00
{
2022-08-18 20:05:00 +00:00
return ( substr_compare ( $string , $end , - strlen ( $end )) === 0 );
2020-06-17 06:02:13 +00:00
}
2019-10-12 10:28:18 +00:00
/**
* Returns the regular expression string to match URLs in a given text
*
* @ return string
*/
2022-06-19 22:51:59 +00:00
public static function autoLinkRegEx () : string
2019-10-12 10:28:18 +00:00
{
return ' @
2022-12-19 04:24:59 +00:00
( ? <! [ = \ ' \ ] " /]) # Not preceded by [, =, \ ', ], " , /
2019-03-10 04:25:53 +00:00
\b
2022-12-19 04:24:59 +00:00
( # Capture 1: entire matched URL
' . self::linkRegEx() . '
) @ xiu ' ;
}
/**
* Returns the regular expression string to match only an HTTP URL
*
* @ return string
*/
public static function onlyLinkRegEx () : string
{
return '@^' . self :: linkRegEx () . '$@xiu' ;
}
/**
* @ return string
* @ see https :// daringfireball . net / 2010 / 07 / improved_regex_for_matching_urls
*/
private static function linkRegEx () : string
{
return ' https ? :// # http or https protocol
2019-03-10 04:25:53 +00:00
( ? :
2022-12-19 04:24:59 +00:00
[ ^/ \s\xA0 ` ! () \ [ \ ]{}; : \ ' " ,<>?«»“”‘’.] # Domain can \ 't start with a .
[ ^/ \s\xA0 ` ! () \ [ \ ]{}; : \ ' " ,<>?«»“”‘’]+ # Domain can \ 't end with a .
2019-10-12 10:28:18 +00:00
\ .
[ ^/ \s\xA0 ` ! () \ [ \ ]{}; : \ ' " .,<>?«»“”‘’]+/? # Followed by a slash
2019-03-10 04:25:53 +00:00
)
2022-12-19 04:24:59 +00:00
( ? : # One or more:
[ ^ \s\xA0 () <> ] + # Run of non-space, non-()<>
| # or
\ (([ ^ \s\xA0 () <> ] +| ( \ ([ ^ \s () <> ] + \ ))) * \ ) # balanced parens, up to 2 levels
| # or
[ ^ \s\xA0 ` ! () \ [ \ ]{}; : \ ' " .,<>?«»“”‘’] # not a space or one of these punct chars
) * ' ;
2019-10-12 10:28:18 +00:00
}
/**
* Ensures a single path item doesn ' t contain any path - traversing characters
*
* @ param string $pathItem
2022-08-18 20:05:00 +00:00
*
* @ see https :// stackoverflow . com / a / 46097713
2019-10-12 10:28:18 +00:00
* @ return string
*/
2022-06-19 22:51:59 +00:00
public static function sanitizeFilePathItem ( string $pathItem ) : string
2019-10-12 10:28:18 +00:00
{
$pathItem = str_replace ( '/' , '_' , $pathItem );
$pathItem = str_replace ( '\\' , '_' , $pathItem );
$pathItem = str_replace ( DIRECTORY_SEPARATOR , '_' , $pathItem ); // In case it does not equal the standard values
return $pathItem ;
}
2020-03-30 05:57:10 +00:00
/**
* Multi - byte safe implementation of substr_replace where $start and $length are character offset and count rather
* than byte offset and counts .
*
* Depends on mbstring , use default encoding .
*
* @ param string $string
* @ param string $replacement
* @ param int $start
* @ param int | null $length
2022-08-18 20:05:00 +00:00
*
2020-03-30 05:57:10 +00:00
* @ return string
* @ see substr_replace ()
*/
2022-06-19 22:51:59 +00:00
public static function substringReplace ( string $string , string $replacement , int $start , int $length = null ) : string
2020-03-30 05:57:10 +00:00
{
$string_length = mb_strlen ( $string );
$length = $length ? ? $string_length ;
if ( $start < 0 ) {
$start = max ( 0 , $string_length + $start );
} else if ( $start > $string_length ) {
$start = $string_length ;
}
if ( $length < 0 ) {
$length = max ( 0 , $string_length - $start + $length );
} else if ( $length > $string_length ) {
$length = $string_length ;
}
if (( $start + $length ) > $string_length ) {
$length = $string_length - $start ;
}
return mb_substr ( $string , 0 , $start ) . $replacement . mb_substr ( $string , $start + $length , $string_length - $start - $length );
}
2020-06-04 23:25:48 +00:00
/**
* Perform a custom function on a text after having escaped blocks matched by the provided regular expressions .
* Only full matches are used , capturing group are ignored .
*
* To change the provided text , the callback function needs to return it and this function will return the modified
* version as well after having restored the escaped blocks .
*
* @ param string $text
* @ param string $regex
* @ param callable $callback
2022-08-18 20:05:00 +00:00
*
2020-06-04 23:25:48 +00:00
* @ return string
*/
2022-05-08 02:19:11 +00:00
public static function performWithEscapedBlocks ( string $text , string $regex , callable $callback ) : string
2020-06-04 23:25:48 +00:00
{
// Enables nested use
$executionId = random_int ( PHP_INT_MAX / 10 , PHP_INT_MAX );
$blocks = [];
2022-12-02 13:10:16 +00:00
$return = preg_replace_callback ( $regex ,
2020-06-04 23:25:48 +00:00
function ( $matches ) use ( $executionId , & $blocks ) {
$return = '«block-' . $executionId . '-' . count ( $blocks ) . '»' ;
$blocks [] = $matches [ 0 ];
return $return ;
},
$text
);
2022-12-02 13:10:16 +00:00
if ( is_null ( $return )) {
2023-01-12 07:06:24 +00:00
Logger :: notice ( 'Received null value from preg_replace_callback' , [ 'text' => $text , 'regex' => $regex , 'blocks' => $blocks , 'executionId' => $executionId , 'callstack' => System :: callstack ( 10 )]);
2022-12-02 13:10:16 +00:00
}
$text = $callback ( $return ? ? $text ) ? ? '' ;
2020-06-04 23:25:48 +00:00
// Restore code blocks
$text = preg_replace_callback ( '/«block-' . $executionId . '-([0-9]+)»/iU' ,
function ( $matches ) use ( $blocks ) {
$return = $matches [ 0 ];
if ( isset ( $blocks [ intval ( $matches [ 1 ])])) {
$return = $blocks [ $matches [ 1 ]];
}
return $return ;
},
$text
);
return $text ;
}
2022-11-26 20:43:31 +00:00
/**
* This function converts a PHP ' s shorhand notation string for file sizes in to an integer number of total bytes .
* For example : The string for shorthand notation of '2M' ( which is 2 , 097 , 152 Bytes ) is converted to 2097152
2022-11-26 22:12:46 +00:00
* @ see https :// www . php . net / manual / en / faq . using . php #faq.using.shorthandbytes
2022-11-26 20:43:31 +00:00
* @ param string $shorthand
* @ return int
*/
public static function getBytesFromShorthand ( string $shorthand ) : int
{
2022-11-26 22:14:21 +00:00
$shorthand = trim ( $shorthand );
2022-11-26 20:43:31 +00:00
2022-11-26 22:14:21 +00:00
if ( is_numeric ( $shorthand )) {
2022-11-26 20:43:31 +00:00
return $shorthand ;
2022-11-26 22:14:21 +00:00
}
2022-11-26 20:43:31 +00:00
2022-11-26 22:14:21 +00:00
$last = strtolower ( $shorthand [ strlen ( $shorthand ) - 1 ]);
$shorthand = substr ( $shorthand , 0 , - 1 );
2022-11-26 20:43:31 +00:00
switch ( $last ) {
case 'g' :
$shorthand *= 1024 ;
case 'm' :
$shorthand *= 1024 ;
case 'k' :
$shorthand *= 1024 ;
}
return $shorthand ;
}
2022-11-26 20:53:12 +00:00
2019-10-12 14:07:22 +00:00
}