600 lines
20 KiB
PHP
600 lines
20 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @file classes/core/PKPString.php
|
|
*
|
|
* Copyright (c) 2014-2021 Simon Fraser University
|
|
* Copyright (c) 2000-2021 John Willinsky
|
|
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
|
*
|
|
* @class PKPString
|
|
*
|
|
* @ingroup core
|
|
*
|
|
* @brief String manipulation wrapper class.
|
|
*
|
|
*/
|
|
|
|
namespace PKP\core;
|
|
|
|
use HTMLPurifier;
|
|
use HTMLPurifier_Config;
|
|
use PKP\config\Config;
|
|
use Stringy\Stringy;
|
|
|
|
class PKPString
|
|
{
|
|
/** @var int Camel case for class names */
|
|
public const CAMEL_CASE_HEAD_UP = 1;
|
|
|
|
/** @var int Camel case for method names */
|
|
public const CAMEL_CASE_HEAD_DOWN = 2;
|
|
|
|
/**
|
|
* Perform initialization required for the string wrapper library.
|
|
*/
|
|
public static function initialize()
|
|
{
|
|
static $isInitialized;
|
|
if (!$isInitialized) {
|
|
if (self::hasMBString()) {
|
|
// Set up default encoding
|
|
mb_internal_encoding('utf-8');
|
|
ini_set('default_charset', 'utf-8');
|
|
}
|
|
$isInitialized = true;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if server has the mbstring library.
|
|
*
|
|
* @return bool Returns true iff the server supports mbstring functions.
|
|
*/
|
|
public static function hasMBString()
|
|
{
|
|
static $hasMBString;
|
|
if (isset($hasMBString)) {
|
|
return $hasMBString;
|
|
}
|
|
|
|
// If string overloading is active, it will break many of the
|
|
// native implementations. mbstring.func_overload must be set
|
|
// to 0, 1 or 4 in php.ini (string overloading disabled).
|
|
// Note: Overloading has been deprecated on PHP 7.2
|
|
if (ini_get('mbstring.func_overload') && defined('MB_OVERLOAD_STRING')) {
|
|
$hasMBString = false;
|
|
} else {
|
|
$hasMBString = extension_loaded('mbstring') &&
|
|
function_exists('mb_strlen') &&
|
|
function_exists('mb_strpos') &&
|
|
function_exists('mb_strrpos') &&
|
|
function_exists('mb_substr') &&
|
|
function_exists('mb_strtolower') &&
|
|
function_exists('mb_strtoupper') &&
|
|
function_exists('mb_substr_count') &&
|
|
function_exists('mb_send_mail');
|
|
}
|
|
return $hasMBString;
|
|
}
|
|
|
|
//
|
|
// Wrappers for basic string manipulation routines.
|
|
//
|
|
|
|
/**
|
|
* @see https://www.php.net/strlen
|
|
*
|
|
* @param string $string Input string
|
|
*
|
|
* @return int String length
|
|
*/
|
|
public static function strlen($string)
|
|
{
|
|
return Stringy::create($string)->length();
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/strpos
|
|
*
|
|
* @param string $haystack Input haystack to search
|
|
* @param string $needle Input needle to search for
|
|
* @param int $offset Offset at which to begin searching
|
|
*
|
|
* @return int Position of needle within haystack
|
|
*/
|
|
public static function strpos($haystack, $needle, $offset = 0)
|
|
{
|
|
return Stringy::create($haystack)->indexOf($needle, $offset);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/strrpos
|
|
*
|
|
* @param string $haystack Haystack to search
|
|
* @param string $needle Needle to search haystack for
|
|
*
|
|
* @return int Last index of Needle in Haystack
|
|
*/
|
|
public static function strrpos($haystack, $needle)
|
|
{
|
|
return Stringy::create($haystack)->indexOfLast($needle);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/substr
|
|
*
|
|
* @param string $string Subject to extract substring from
|
|
* @param int $start Position to start from
|
|
* @param int $length Length to extract, or false for entire string from start position
|
|
*
|
|
* @return string Substring of $string
|
|
*/
|
|
public static function substr($string, $start, $length = null)
|
|
{
|
|
return (string) Stringy::create($string)->substr($start, $length);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/strtolower
|
|
*
|
|
* @param string $string Input string
|
|
*
|
|
* @return string Lower case version of input string
|
|
*/
|
|
public static function strtolower($string)
|
|
{
|
|
return (string) Stringy::create($string)->toLowerCase();
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/strtoupper
|
|
*
|
|
* @param string $string Input string
|
|
*
|
|
* @return string Upper case version of input string
|
|
*/
|
|
public static function strtoupper($string)
|
|
{
|
|
return (string) Stringy::create($string)->toUpperCase();
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/ucfirst
|
|
*
|
|
* @param string $string Input string
|
|
*
|
|
* @return string ucfirst version of input string
|
|
*/
|
|
public static function ucfirst($string)
|
|
{
|
|
return (string) Stringy::create($string)->upperCaseFirst();
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/substr_count
|
|
*
|
|
* @param string $haystack Input string to search
|
|
* @param string $needle String to search within $haystack for
|
|
*
|
|
* @return int Count of number of times $needle appeared in $haystack
|
|
*/
|
|
public static function substr_count($haystack, $needle)
|
|
{
|
|
return Stringy::create($haystack)->countSubstr($needle);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/encode_mime_header
|
|
*
|
|
* @param string $string Input MIME header to encode.
|
|
*
|
|
* @return string Encoded MIME header.
|
|
*/
|
|
public static function encode_mime_header($string)
|
|
{
|
|
static::initialize();
|
|
return static::hasMBString()
|
|
? mb_encode_mimeheader($string, mb_internal_encoding(), 'B', Core::isWindows() ? "\r\n" : "\n")
|
|
: $string;
|
|
}
|
|
|
|
//
|
|
// Wrappers for PCRE-compatible regular expression routines.
|
|
// See the php.net documentation for usage.
|
|
//
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_quote
|
|
*
|
|
* @param string $string String to quote
|
|
* @param string $delimiter Delimiter for regular expression
|
|
*
|
|
* @return string Quoted equivalent of $string
|
|
*/
|
|
public static function regexp_quote($string, $delimiter = '/')
|
|
{
|
|
return preg_quote($string, $delimiter);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_grep
|
|
*
|
|
* @param string $pattern Regular expression
|
|
* @param array $input Input
|
|
*
|
|
* @return array
|
|
*/
|
|
public static function regexp_grep($pattern, $input)
|
|
{
|
|
return preg_grep($pattern . 'u', $input);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_match
|
|
*
|
|
* @param string $pattern Regular expression
|
|
* @param string $subject String to apply regular expression to
|
|
*
|
|
* @return int
|
|
*/
|
|
public static function regexp_match($pattern, $subject)
|
|
{
|
|
return preg_match($pattern . 'u', $subject);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_match_get
|
|
*
|
|
* @param string $pattern Regular expression
|
|
* @param string $subject String to apply regular expression to
|
|
* @param array $matches Reference to receive matches
|
|
*
|
|
* @return int|boolean Returns 1 if the pattern matches given subject, 0 if it does not, or FALSE if an error occurred.
|
|
*/
|
|
public static function regexp_match_get($pattern, $subject, &$matches)
|
|
{
|
|
return preg_match($pattern . 'u', $subject, $matches);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_match_all
|
|
*
|
|
* @param string $pattern Regular expression
|
|
* @param string $subject String to apply regular expression to
|
|
* @param array $matches Reference to receive matches
|
|
*
|
|
* @return int|boolean Returns number of full matches of given subject, or FALSE if an error occurred.
|
|
*/
|
|
public static function regexp_match_all($pattern, $subject, &$matches)
|
|
{
|
|
return preg_match_all($pattern . 'u', $subject, $matches);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_replace
|
|
*
|
|
* @param string $pattern Regular expression
|
|
* @param string $replacement String to replace matches in $subject with
|
|
* @param string $subject String to apply regular expression to
|
|
* @param int $limit Number of replacements to perform, maximum, or -1 for no limit.
|
|
*/
|
|
public static function regexp_replace($pattern, $replacement, $subject, $limit = -1)
|
|
{
|
|
return preg_replace($pattern . 'u', (string) $replacement, (string) $subject, $limit);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_replace_callback
|
|
*
|
|
* @param string $pattern Regular expression
|
|
* @param callable $callback PHP callback to generate content to replace matches with
|
|
* @param string $subject String to apply regular expression to
|
|
* @param int $limit Number of replacements to perform, maximum, or -1 for no limit.
|
|
*/
|
|
public static function regexp_replace_callback($pattern, $callback, $subject, $limit = -1)
|
|
{
|
|
return preg_replace_callback($pattern . 'u', $callback, $subject, $limit);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/preg_split
|
|
*
|
|
* @param string $pattern Regular expression
|
|
* @param string $subject String to apply regular expression to
|
|
* @param int $limit Number of times to match; -1 for unlimited
|
|
*
|
|
* @return array Resulting string segments
|
|
*/
|
|
public static function regexp_split($pattern, $subject, $limit = -1)
|
|
{
|
|
return preg_split($pattern . 'u', $subject, $limit);
|
|
}
|
|
|
|
/**
|
|
* @see https://www.php.net/mime_content_type
|
|
*
|
|
* @param string $filename Filename to test.
|
|
* @param string $suggestedExtension Suggested file extension (used for common misconfigurations)
|
|
*
|
|
* @return string Detected MIME type
|
|
*/
|
|
public static function mime_content_type($filename, $suggestedExtension = '')
|
|
{
|
|
$result = null;
|
|
|
|
if (function_exists('finfo_open')) {
|
|
$fi = & Registry::get('fileInfo', true, null);
|
|
if ($fi === null) {
|
|
$fi = finfo_open(FILEINFO_MIME, Config::getVar('finfo', 'mime_database_path'));
|
|
}
|
|
if ($fi !== false) {
|
|
$result = strtok(finfo_file($fi, $filename), ' ;');
|
|
}
|
|
}
|
|
|
|
if (!$result && function_exists('mime_content_type')) {
|
|
$result = mime_content_type($filename);
|
|
// mime_content_type appears to return a charset
|
|
// (erroneously?) in recent versions of PHP5
|
|
if (($i = strpos($result, ';')) !== false) {
|
|
$result = trim(substr($result, 0, $i));
|
|
}
|
|
}
|
|
|
|
if (!$result) {
|
|
// Fall back on an external "file" tool
|
|
$f = escapeshellarg($filename);
|
|
$result = trim(`file --brief --mime $f`);
|
|
// Make sure we just return the mime type.
|
|
if (($i = strpos($result, ';')) !== false) {
|
|
$result = trim(substr($result, 0, $i));
|
|
}
|
|
}
|
|
|
|
// Check ambiguous mimetypes against extension
|
|
$exploded = explode('.', $filename);
|
|
$ext = array_pop($exploded);
|
|
if ($suggestedExtension) {
|
|
$ext = $suggestedExtension;
|
|
}
|
|
|
|
$ambiguities = self::getAmbiguousExtensionsMap();
|
|
if (isset($ambiguities[strtolower($ext . ':' . $result)])) {
|
|
$result = $ambiguities[strtolower($ext . ':' . $result)];
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* @return string[]
|
|
*
|
|
* @brief overrides for ambiguous mime types returned by finfo
|
|
* SUGGESTED_EXTENSION:DETECTED_MIME_TYPE => OVERRIDE_MIME_TYPE
|
|
*/
|
|
public static function getAmbiguousExtensionsMap()
|
|
{
|
|
return [
|
|
'html:text/xml' => 'text/html',
|
|
'css:text/x-c' => 'text/css',
|
|
'css:text/plain' => 'text/css',
|
|
'csv:text/plain' => 'text/csv',
|
|
'js:text/plain' => 'text/javascript',
|
|
'xlsx:application/zip' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
'xltx:application/zip' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
|
|
'potx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
|
|
'ppsx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
|
|
'pptx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'sldx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
|
|
'docm:application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'application/vnd.ms-word.document.macroEnabled.12',
|
|
'docx:application/zip' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'dotx:application/zip' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
|
|
'wma:video/x-ms-asf' => 'audio/x-ms-wma',
|
|
'wmv:video/x-ms-asf' => 'video/x-ms-wmv',
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Strip unsafe HTML from the input text. Covers XSS attacks like scripts,
|
|
* onclick(...) attributes, javascript: urls, and special characters.
|
|
*
|
|
* @param string $input input string
|
|
* @param string $configKey The config section key['allowed_html', 'allowed_title_html']
|
|
*
|
|
* @return string
|
|
*/
|
|
public static function stripUnsafeHtml($input, $configKey = 'allowed_html')
|
|
{
|
|
static $purifier;
|
|
if (!isset($purifier)) {
|
|
$config = HTMLPurifier_Config::createDefault();
|
|
$config->set('Core.Encoding', 'utf-8');
|
|
$config->set('HTML.Doctype', 'HTML 4.01 Transitional');
|
|
$config->set('HTML.Allowed', Config::getVar('security', $configKey));
|
|
$config->set('Cache.SerializerPath', 'cache');
|
|
$purifier = new HTMLPurifier($config);
|
|
}
|
|
return $purifier->purify((string) $input);
|
|
}
|
|
|
|
/**
|
|
* Convert limited HTML into a string.
|
|
*
|
|
* @param string $html
|
|
*
|
|
* @return string
|
|
*/
|
|
public static function html2text($html)
|
|
{
|
|
$html = self::regexp_replace('/<[\/]?p>/', "\n", $html);
|
|
$html = self::regexp_replace('/<li>/', '• ', $html);
|
|
$html = self::regexp_replace('/<\/li>/', "\n", $html);
|
|
$html = self::regexp_replace('/<br[ ]?[\/]?>/', "\n", $html);
|
|
$html = html_entity_decode(strip_tags($html), ENT_COMPAT, 'UTF-8');
|
|
return $html;
|
|
}
|
|
|
|
/**
|
|
* Joins two title string fragments (in $fields) either with a
|
|
* space or a colon.
|
|
*
|
|
* @param array $fields
|
|
*
|
|
* @return string the joined string
|
|
*/
|
|
public static function concatTitleFields($fields)
|
|
{
|
|
// Set the characters that will avoid the use of
|
|
// a semicolon between title and subtitle.
|
|
$avoidColonChars = ['?', '!', '/', '&'];
|
|
|
|
// if the first field ends in a character in $avoidColonChars,
|
|
// concat with a space, otherwise use a colon.
|
|
// Check for any of these characters in
|
|
// the last position of current full title value.
|
|
if (in_array(substr($fields[0], -1, 1), $avoidColonChars)) {
|
|
$fullTitle = join(' ', $fields);
|
|
} else {
|
|
$fullTitle = join(': ', $fields);
|
|
}
|
|
|
|
return $fullTitle;
|
|
}
|
|
|
|
/**
|
|
* Transform "handler-class" to "HandlerClass"
|
|
* and "my-op" to "myOp".
|
|
*
|
|
* @param string $string input string
|
|
* @param int $type which kind of camel case?
|
|
*
|
|
* @return string the string in camel case
|
|
*/
|
|
public static function camelize($string, $type = self::CAMEL_CASE_HEAD_UP)
|
|
{
|
|
assert($type == static::CAMEL_CASE_HEAD_UP || $type == static::CAMEL_CASE_HEAD_DOWN);
|
|
|
|
// Transform "handler-class" to "HandlerClass" and "my-op" to "MyOp"
|
|
$string = implode(array_map('ucfirst_codesafe', explode('-', $string)));
|
|
|
|
// Transform "MyOp" to "myOp"
|
|
if ($type == static::CAMEL_CASE_HEAD_DOWN) {
|
|
$string = strtolower_codesafe(substr($string, 0, 1)) . substr($string, 1);
|
|
}
|
|
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* Transform "HandlerClass" to "handler-class"
|
|
* and "myOp" to "my-op".
|
|
*
|
|
* @param string $string
|
|
*
|
|
* @return string
|
|
*/
|
|
public static function uncamelize($string)
|
|
{
|
|
assert(!empty($string));
|
|
|
|
// Transform "myOp" to "MyOp"
|
|
$string = ucfirst_codesafe($string);
|
|
|
|
// Insert hyphens between words and return the string in lowercase
|
|
$words = [];
|
|
self::regexp_match_all('/[A-Z][a-z0-9]*/', $string, $words);
|
|
assert(isset($words[0]) && !empty($words[0]) && strlen(implode('', $words[0])) == strlen($string));
|
|
return strtolower_codesafe(implode('-', $words[0]));
|
|
}
|
|
|
|
/**
|
|
* Create a new UUID (version 4)
|
|
*
|
|
* @return string
|
|
*/
|
|
public static function generateUUID()
|
|
{
|
|
$charid = strtoupper(md5(uniqid(random_int(0, PHP_INT_MAX), true)));
|
|
$hyphen = '-';
|
|
$uuid = substr($charid, 0, 8) . $hyphen
|
|
. substr($charid, 8, 4) . $hyphen
|
|
. '4' . substr($charid, 13, 3) . $hyphen
|
|
. strtoupper(dechex(hexdec(ord(substr($charid, 16, 1))) % 4 + 8)) . substr($charid, 17, 3) . $hyphen
|
|
. substr($charid, 20, 12);
|
|
return $uuid;
|
|
}
|
|
|
|
/**
|
|
* Get a mapping from strftime to DateTime::format formatting equivalents.
|
|
* Old format: https://www.php.net/manual/en/function.strftime.php
|
|
* New format: https://www.php.net/manual/en/datetime.format.php
|
|
*
|
|
* Introduced in 3.4.0; remove this function (and calls to it) after this is distributed
|
|
* in an LTS release.
|
|
*/
|
|
public static function getStrftimeConversion(): array
|
|
{
|
|
return [
|
|
'%%' => '%', '%h' => 'M', '%d' => 'd', '%a' => 'D',
|
|
'%e' => 'j', '%A' => 'l', '%u' => 'N', '%w' => 'w',
|
|
'%U' => 'W', '%B' => 'F', '%m' => 'm', '%b' => 'M',
|
|
'%Y' => 'Y', '%y' => 'y', '%P' => 'a', '%p' => 'A',
|
|
'%l' => 'g', '%k' => 'G', '%I' => 'h', '%H' => 'H',
|
|
'%M' => 'i', '%S' => 's', '%Z' => 'T',
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Convert any strftime-based datetime formatting into DateTime::format equivalent.
|
|
* Passes through any strings that are already in the new format without modification.
|
|
* Old format: https://www.php.net/manual/en/function.strftime.php
|
|
* New format: https://www.php.net/manual/en/datetime.format.php
|
|
*
|
|
* Introduced in 3.4.0; remove this function (and calls to it) after this is distributed
|
|
* in an LTS release.
|
|
*/
|
|
public static function convertStrftimeFormat(string $format): string
|
|
{
|
|
// Following the lead of Smarty's date_format modifier, check the
|
|
// format string for "%" characters. If found, attempt to convert.
|
|
// We don't expect date/time formats to contain other uses of %.
|
|
if (strstr($format, '%')) {
|
|
if (Config::getVar('debug', 'deprecation_warnings')) {
|
|
trigger_error('Deprecated use of strftime-based date format.');
|
|
}
|
|
$format = strtr($format, self::getStrftimeConversion());
|
|
}
|
|
return $format;
|
|
}
|
|
|
|
/**
|
|
* Matches each symbol of PHP date format string
|
|
* to jQuery Datepicker widget date format.
|
|
*
|
|
* @param string $phpFormat
|
|
*
|
|
* @return string
|
|
*/
|
|
public static function dateformatPHP2JQueryDatepicker($phpFormat)
|
|
{
|
|
return str_replace(
|
|
['d', 'j', 'l', 'm', 'n', 'F', 'Y'],
|
|
['dd', 'd', 'DD', 'mm', 'm', 'MM', 'yy'],
|
|
$phpFormat
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get the word count of a string
|
|
*/
|
|
public static function getWordCount(string $str): int
|
|
{
|
|
return count(preg_split('/\s+/', trim(str_replace(' ', ' ', strip_tags($str)))));
|
|
}
|
|
}
|
|
|
|
if (!PKP_STRICT_MODE) {
|
|
class_alias('\PKP\core\PKPString', '\PKPString');
|
|
}
|