, * Pierre-Alain Joye , Amir Mohammad Saied ) * * Originally published under the "New BSD License" * http://www.opensource.org/licenses/bsd-license.php */ define('PCRE_URI', '(?:([a-z][-+.a-z0-9]*):)?' . // Scheme '(?://' . '(?:((?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'();:\&=+$,])*)@)?' . // User '(?:((?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)*[a-z](?:[a-z0-9]+)?\.?)' . // Hostname '|([0-9]{1,3}(?:\.[0-9]{1,3}){3}))' . // IP Address '(?::([0-9]*))?)' . // Port '((?:/(?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'():@\&=+$,;])*)*/?)?' . // Path '(?:\?([^#]*))?' . // Query String '(?:\#((?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'();/?:@\&=+$,])*))?'); // Fragment // RFC-2822 email addresses define('PCRE_EMAIL_ADDRESS', '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+' . // One or more atom characters. '(\.' . '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+)*'. // Followed by zero or more dot separated sets of one or more atom characters. '@'. // Followed by an "at" character. '(' . '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{1,63}\.)+'. // Followed by one or max 63 domain characters (dot separated). '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{2,63}' // Must be followed by one set consisting a period of two or max 63 domain characters. ); // Two different types of camel case: one for class names and one for method names define ('CAMEL_CASE_HEAD_UP', 0x01); define ('CAMEL_CASE_HEAD_DOWN', 0x02); class PKPString { /** * Perform initialization required for the string wrapper library. * @return null */ static function init() { $clientCharset = strtolower_codesafe(Config::getVar('i18n', 'client_charset')); // Check if mbstring is installed (requires PHP >= 4.3.0) if (self::hasMBString()) { // mbstring routines are available define('ENABLE_MBSTRING', true); // Set up required ini settings for mbstring // FIXME Do any other mbstring settings need to be set? mb_internal_encoding($clientCharset); mb_substitute_character('63'); // question mark } // Define modifier to be used in regexp_* routines // FIXME Should non-UTF-8 encodings be supported with mbstring? if ($clientCharset == 'utf-8' && self::hasPCREUTF8()) { define('PCRE_UTF8', 'u'); } else { define('PCRE_UTF8', ''); } } /** * Check if server has the mbstring library. * Currently requires PHP >= 4.3.0 (for mb_strtolower, mb_strtoupper, * and mb_substr_count) * @return boolean Returns true iff the server supports mbstring functions. */ static function hasMBString() { static $hasMBString; if (isset($hasMBString)) return $hasMBString; // If string overloading is active, it will break many of the // native implementations. mbstring.func_overload must be set // to 0, 1 or 4 in php.ini (string overloading disabled). if (ini_get('mbstring.func_overload') && defined('MB_OVERLOAD_STRING')) { $hasMBString = false; } else { $hasMBString = ( extension_loaded('mbstring') && function_exists('mb_strlen') && function_exists('mb_strpos') && function_exists('mb_strrpos') && function_exists('mb_substr') && function_exists('mb_strtolower') && function_exists('mb_strtoupper') && function_exists('mb_substr_count') && function_exists('mb_send_mail') ); } return $hasMBString; } /** * Check if server supports the PCRE_UTF8 modifier. * @return boolean True iff the server supports the PCRE_UTF8 modifier. */ static function hasPCREUTF8() { // The PCRE_UTF8 modifier is only supported on PHP >= 4.1.0 (*nix) or PHP >= 4.2.3 (win32) // Evil check to see if PCRE_UTF8 is supported if (@preg_match('//u', '')) { return true; } else { return false; } } // // Wrappers for basic string manipulation routines. // See the phputf8 documentation for usage. // /** * @see http://ca.php.net/manual/en/function.strlen.php * @param $string string Input string * @return int String length */ static function strlen($string) { if (defined('ENABLE_MBSTRING')) { require_once './lib/pkp/lib/phputf8/mbstring/core.php'; } else { require_once './lib/pkp/lib/phputf8/utils/unicode.php'; require_once './lib/pkp/lib/phputf8/native/core.php'; } return utf8_strlen($string); } /** * @see http://ca.php.net/manual/en/function.strpos.php * @param $haystack string Input haystack to search * @param $needle string Input needle to search for * @param $offset int Offset at which to begin searching * @return int Position of needle within haystack */ static function strpos($haystack, $needle, $offset = 0) { if (defined('ENABLE_MBSTRING')) { require_once './lib/pkp/lib/phputf8/mbstring/core.php'; } else { require_once './lib/pkp/lib/phputf8/utils/unicode.php'; require_once './lib/pkp/lib/phputf8/native/core.php'; } return utf8_strpos($haystack, $needle, $offset); } /** * @see http://ca.php.net/manual/en/function.strrpos.php * @param $haystack string Haystack to search * @param $needle string Needle to search haystack for * @return int String position of needle in haystack (starting from end of haystack) */ static function strrpos($haystack, $needle) { if (defined('ENABLE_MBSTRING')) { require_once './lib/pkp/lib/phputf8/mbstring/core.php'; } else { require_once './lib/pkp/lib/phputf8/utils/unicode.php'; require_once './lib/pkp/lib/phputf8/native/core.php'; } return utf8_strrpos($haystack, $needle); } /** * @see http://ca.php.net/manual/en/function.substr.php * @param $string string Subject to extract substring from * @param $start int Position to start from * @param $length int Length to extract, or false for entire string from start position * @return string Substring of $string */ static function substr($string, $start, $length = false) { if (defined('ENABLE_MBSTRING')) { require_once './lib/pkp/lib/phputf8/mbstring/core.php'; } else { require_once './lib/pkp/lib/phputf8/utils/unicode.php'; require_once './lib/pkp/lib/phputf8/native/core.php'; // The default length value for the native implementation // differs if ($length === false) $length = null; } return utf8_substr($string, $start, $length); } /** * @see http://ca.php.net/manual/en/function.substr_replace.php * @param $string string Source string to perform replacement upon * @param $replacement string Replacement to move into $string * @param $start int Start location for replacement * @param $length int Number of characters to replace in source string with $replacement * @return string String resulting from replacement */ static function substr_replace($string, $replacement, $start, $length = null) { if (extension_loaded('mbstring') === true) { $string_length = self::strlen($string); if ($start < 0) { $start = max(0, $string_length + $start); } else if ($start > $string_length) { $start = $string_length; } if ($length < 0) { $length = max(0, $string_length - $start + $length); } else if ((is_null($length) === true) || ($length > $string_length)) { $length = $string_length; } if (($start + $length) > $string_length) { $length = $string_length - $start; } return self::substr($string, 0, $start) . $replacement . self::substr($string, $start + $length, $string_length - $start - $length); } return (is_null($length) === true) ? substr_replace($string, $replacement, $start) : substr_replace($string, $replacement, $start, $length); } /** * @see http://ca.php.net/manual/en/function.strtolower.php * @param $string string Input string * @return string Lower case version of input string */ static function strtolower($string) { if (defined('ENABLE_MBSTRING')) { require_once './lib/pkp/lib/phputf8/mbstring/core.php'; } else { require_once './lib/pkp/lib/phputf8/utils/unicode.php'; require_once './lib/pkp/lib/phputf8/native/core.php'; } return utf8_strtolower($string); } /** * @see http://ca.php.net/manual/en/function.strtoupper.php * @param $string string Input string * @return string Upper case version of input string */ static function strtoupper($string) { if (defined('ENABLE_MBSTRING')) { require_once './lib/pkp/lib/phputf8/mbstring/core.php'; } else { require_once './lib/pkp/lib/phputf8/utils/unicode.php'; require_once './lib/pkp/lib/phputf8/native/core.php'; } return utf8_strtoupper($string); } /** * @see http://ca.php.net/manual/en/function.ucfirst.php * @param $string string Input string * @return string ucfirst version of input string */ static function ucfirst($string) { if (defined('ENABLE_MBSTRING')) { require_once './lib/pkp/lib/phputf8/mbstring/core.php'; require_once './lib/pkp/lib/phputf8/ucfirst.php'; } else { require_once './lib/pkp/lib/phputf8/utils/unicode.php'; require_once './lib/pkp/lib/phputf8/native/core.php'; require_once './lib/pkp/lib/phputf8/ucfirst.php'; } return utf8_ucfirst($string); } /** * @see http://ca.php.net/manual/en/function.substr_count.php * @param $haystack string Input string to search * @param $needle string String to search within $haystack for * @return int Count of number of times $needle appeared in $haystack */ static function substr_count($haystack, $needle) { if (defined('ENABLE_MBSTRING')) { return mb_substr_count($haystack, $needle); // Requires PHP >= 4.3.0 } else { return substr_count($haystack, $needle); } } /** * @see http://ca.php.net/manual/en/function.encode_mime_header.php * @param $string string Input MIME header to encode. * @return string Encoded MIME header. */ static function encode_mime_header($string) { if (defined('ENABLE_MBSTRING')) { return mb_encode_mimeheader($string, mb_internal_encoding(), 'B', MAIL_EOL); } else { return $string; } } // // Wrappers for PCRE-compatible regular expression routines. // See the php.net documentation for usage. // /** * @see http://ca.php.net/manual/en/function.regexp_quote.php * @param $string string String to quote * @param $delimiter string Delimiter for regular expression * @return string Quoted equivalent of $string */ static function regexp_quote($string, $delimiter = '/') { return preg_quote($string, $delimiter); } /** * @see http://ca.php.net/manual/en/function.regexp_grep.php * @param $pattern string Regular expression * @param $input string Input string * @return array */ static function regexp_grep($pattern, $input) { if (PCRE_UTF8 && !self::utf8_compliant($input)) $input = self::utf8_bad_strip($input); return preg_grep($pattern . PCRE_UTF8, $input); } /** * @see http://ca.php.net/manual/en/function.regexp_match.php * @param $pattern string Regular expression * @param $subject string String to apply regular expression to * @return int */ static function regexp_match($pattern, $subject) { if (PCRE_UTF8 && !self::utf8_compliant($subject)) $subject = self::utf8_bad_strip($subject); return preg_match($pattern . PCRE_UTF8, $subject); } /** * @see http://ca.php.net/manual/en/function.regexp_match_get.php * @param $pattern string Regular expression * @param $subject string String to apply regular expression to * @param $matches array Reference to receive matches * @return int|boolean Returns 1 if the pattern matches given subject, 0 if it does not, or FALSE if an error occurred. */ static function regexp_match_get($pattern, $subject, &$matches) { // NOTE: This function was created since PHP < 5.x does not support optional reference parameters if (PCRE_UTF8 && !self::utf8_compliant($subject)) $subject = self::utf8_bad_strip($subject); return preg_match($pattern . PCRE_UTF8, $subject, $matches); } /** * @see http://ca.php.net/manual/en/function.regexp_match_all.php * @param $pattern string Regular expression * @param $subject string String to apply regular expression to * @param $matches array Reference to receive matches * @return int|boolean Returns number of full matches of given subject, or FALSE if an error occurred. */ static function regexp_match_all($pattern, $subject, &$matches) { if (PCRE_UTF8 && !self::utf8_compliant($subject)) $subject = self::utf8_bad_strip($subject); return preg_match_all($pattern . PCRE_UTF8, $subject, $matches); } /** * @see http://ca.php.net/manual/en/function.regexp_replace.php * @param $pattern string Regular expression * @param $replacement string String to replace matches in $subject with * @param $subject string String to apply regular expression to * @param $limit int Number of replacements to perform, maximum, or -1 for no limit. * @return mixed */ static function regexp_replace($pattern, $replacement, $subject, $limit = -1) { if (PCRE_UTF8 && !self::utf8_compliant($subject)) $subject = self::utf8_bad_strip($subject); return preg_replace($pattern . PCRE_UTF8, $replacement, $subject, $limit); } /** * @see http://ca.php.net/manual/en/function.regexp_replace_callback.php * @param $pattern string Regular expression * @param $callback callback PHP callback to generate content to replace matches with * @param $subject string String to apply regular expression to * @param $limit int Number of replacements to perform, maximum, or -1 for no limit. * @return mixed */ static function regexp_replace_callback($pattern, $callback, $subject, $limit = -1) { if (PCRE_UTF8 && !self::utf8_compliant($subject)) $subject = self::utf8_bad_strip($subject); return preg_replace_callback($pattern . PCRE_UTF8, $callback, $subject, $limit); } /** * @see http://ca.php.net/manual/en/function.regexp_split.php * @param $pattern string Regular expression * @param $subject string String to apply regular expression to * @param $limit int Number of times to match; -1 for unlimited * @return array Resulting string segments */ static function regexp_split($pattern, $subject, $limit = -1) { if (PCRE_UTF8 && !self::utf8_compliant($subject)) $subject = self::utf8_bad_strip($subject); return preg_split($pattern . PCRE_UTF8, $subject, $limit); } /** * @see http://ca.php.net/manual/en/function.mime_content_type.php * @param $filename string Filename to test. * @param $suggestedExtension string Suggested file extension (used for common misconfigurations) * @return string Detected MIME type */ static function mime_content_type($filename, $suggestedExtension = '') { $result = null; if (function_exists('finfo_open')) { $fi =& Registry::get('fileInfo', true, null); if ($fi === null) { $fi = finfo_open(FILEINFO_MIME, Config::getVar('finfo', 'mime_database_path')); } if ($fi !== false) { $result = strtok(finfo_file($fi, $filename), ' ;'); } } if (!$result && function_exists('mime_content_type')) { $result = mime_content_type($filename); // mime_content_type appears to return a charset // (erroneously?) in recent versions of PHP5 if (($i = strpos($result, ';')) !== false) { $result = trim(substr($result, 0, $i)); } } if (!$result) { // Fall back on an external "file" tool $f = escapeshellarg($filename); $result = trim(`file --brief --mime $f`); // Make sure we just return the mime type. if (($i = strpos($result, ';')) !== false) { $result = trim(substr($result, 0, $i)); } } // Check ambiguous mimetypes against extension $ext = array_pop(explode('.',$filename)); if ($suggestedExtension) { $ext = $suggestedExtension; } // SUGGESTED_EXTENSION:DETECTED_MIME_TYPE => OVERRIDE_MIME_TYPE $ambiguities = array( 'css:text/x-c' => 'text/css', 'css:text/plain' => 'text/css', 'xlsx:application/zip' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xltx:application/zip' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', 'potx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.template', 'ppsx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', 'pptx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'sldx:application/zip' => 'application/vnd.openxmlformats-officedocument.presentationml.slide', 'docx:application/zip' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'dotx:application/zip' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', ); if (isset($ambiguities[strtolower($ext.':'.$result)])) { $result = $ambiguities[strtolower($ext.':'.$result)]; } return $result; } /** * Strip unsafe HTML from the input text. Covers XSS attacks like scripts, * onclick(...) attributes, javascript: urls, and special characters. * @param $input string input string * @return string */ static function stripUnsafeHtml($input) { static $purifier; if (!isset($purifier)) { $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', Config::getVar('i18n', 'client_charset')); $config->set('HTML.Doctype', 'HTML 4.01 Transitional'); $config->set('HTML.Allowed', Config::getVar('security', 'allowed_html')); $config->set('Cache.SerializerPath', 'cache'); $purifier = new HTMLPurifier($config); } return $purifier->purify($input); } /** * Convert limited HTML into a string. * @param $html string * @return string */ static function html2text($html) { $html = self::regexp_replace('/<[\/]?p>/', "\n", $html); $html = self::regexp_replace('/
  • /', '• ', $html); $html = self::regexp_replace('/<\/li>/', "\n", $html); $html = self::regexp_replace('//', "\n", $html); $html = html_entity_decode(strip_tags($html), ENT_COMPAT, 'UTF-8'); return $html; } // // Wrappers for UTF-8 validation routines // See the phputf8 documentation for usage. // /** * Detect whether a string contains non-ascii multibyte sequences in the UTF-8 range * @param $str string input string * @return boolean */ static function utf8_is_valid($str) { require_once './lib/pkp/lib/phputf8/utils/validation.php'; return utf8_is_valid($str); } /** * Tests whether a string complies as UTF-8; faster and less strict than utf8_is_valid * see lib/phputf8/utils/validation.php for more details * @param $str string input string * @return boolean */ static function utf8_compliant($str) { require_once './lib/pkp/lib/phputf8/utils/validation.php'; return utf8_compliant($str); } /** * Locates the first bad byte in a UTF-8 string returning it's byte index in the string * @param $str string input string * @return string */ static function utf8_bad_find($str) { require_once './lib/pkp/lib/phputf8/utils/bad.php'; return utf8_bad_find($str); } /** * Strips out any bad bytes from a UTF-8 string and returns the rest * @param $str string input string * @return string */ static function utf8_bad_strip($str) { require_once './lib/pkp/lib/phputf8/utils/bad.php'; return utf8_bad_strip($str); } /** * Replace bad bytes with an alternative character - ASCII character * @param $str string input string * @param $replace string optional * @return string */ static function utf8_bad_replace($str, $replace = '?') { require_once './lib/pkp/lib/phputf8/utils/bad.php'; return utf8_bad_replace($str, $replace); } /** * Replace bad bytes with an alternative character - ASCII character * @param $str string input string * @return string */ static function utf8_strip_ascii_ctrl($str) { require_once './lib/pkp/lib/phputf8/utils/ascii.php'; return utf8_strip_ascii_ctrl($str); } /** * Normalize a string in an unknown (non-UTF8) encoding into a valid UTF-8 sequence * @param $str string input string * @return string */ static function utf8_normalize($str) { import('lib.pkp.classes.core.Transcoder'); if (self::hasMBString()) { // NB: CP-1252 often segfaults; we've left it out here but it will detect as 'ISO-8859-1' $mb_encoding_order = 'UTF-8, UTF-7, ASCII, ISO-8859-1, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP'; $detected_encoding = mb_detect_encoding($str, $mb_encoding_order, false); } elseif (function_exists('iconv') && strlen(iconv('CP1252', 'UTF-8', $str)) != strlen(iconv('ISO-8859-1', 'UTF-8', $str))) { // use iconv to detect CP-1252, assuming default ISO-8859-1 $detected_encoding = 'CP1252'; } else { // assume ISO-8859-1, PHP default $detected_encoding = 'ISO-8859-1'; } // transcode CP-1252/ISO-8859-1 into HTML entities; this works because CP-1252 is mapped onto ISO-8859-1 if ('ISO-8859-1' == $detected_encoding || 'CP1252' == $detected_encoding) { $trans = new Transcoder('CP1252', 'HTML-ENTITIES'); $str = $trans->trans($str); } // transcode from detected encoding to to UTF-8 $trans = new Transcoder($detected_encoding, 'UTF-8'); $str = $trans->trans($str); return $str; } /** * US-ASCII transliterations of Unicode text * @param $str string input string * @return string */ static function utf8_to_ascii($str) { require_once('./lib/pkp/lib/phputf8/utf8_to_ascii.php'); return utf8_to_ascii($str); } /** * Return an associative array of named->numeric HTML entities * Required to support HTML functions without objects in PHP4/PHP5 * From php.net: function.get-html-translation-table.php * @return string */ static function getHTMLEntities () { // define the conversion table $html_entities = array( "Á" => "Á", "á" => "á", "Â" => "Â", "â" => "â", "´" => "´", "Æ" => "Æ", "æ" => "æ", "À" => "À", "à" => "à", "ℵ" => "ℵ","Α" => "Α", "α" => "α", "&" => "&", "∧" => "∧", "∠" => "∠", "'" => "'", "Å" => "Å", "å" => "å", "≈" => "≈", "Ã" => "Ã", "ã" => "ã", "Ä" => "Ä", "ä" => "ä", "„" => "„", "Β" => "Β", "β" => "β", "¦" => "¦", "•" => "•", "∩" => "∩", "Ç" => "Ç", "ç" => "ç", "¸" => "¸", "¢" => "¢", "Χ" => "Χ", "χ" => "χ", "ˆ" => "^", "♣" => "♣", "≅" => "≅", "©" => "©", "↵" => "↵", "∪" => "∪", "¤" => "¤", "†" => "†","‡" => "‡", "↓" => "↓", "⇓" => "⇓", "°" => "°", "Δ" => "Δ", "δ" => "δ", "♦" => "♦", "÷" => "÷", "É" => "É", "é" => "é", "Ê" => "Ê", "ê" => "ê", "È" => "È", "è" => "è", "∅" => "∅", " " => " ", " " => " ", "Ε" => "Ε","ε" => "ε","≡" => "≡", "Η" => "Η", "η" => "η", "Ð" => "Ð", "ð" => "ð", "Ë" => "Ë", "ë" => "ë", "€" => "€", "∃" => "∃", "ƒ" => "ƒ", "∀" => "∀","½" => "½", "¼" => "¼", "¾" => "¾", "⁄" => "⁄", "Γ" => "Γ", "γ" => "γ", "≥" => "≥", ">" => ">", "↔" => "↔", "⇔" => "⇔", "♥" => "♥", "…" => "…","Í" => "Í", "í" => "í", "Î" => "Î", "î" => "î", "¡" => "¡", "Ì" => "Ì", "ì" => "ì", "ℑ" => "ℑ", "∞" => "∞", "∫" => "∫", "Ι" => "Ι", "ι" => "ι", "¿" => "¿", "∈" => "∈", "Ï" => "Ï", "ï" => "ï", "Κ" => "Κ", "κ" => "κ", "Λ" => "Λ", "λ" => "λ", "⟨" => "〈", "«" => "«", "←" => "←", "⇐" => "⇐", "⌈" => "⌈", "“" => "“", "≤" => "≤", "⌊" => "⌊", "∗" => "∗","◊" => "◊", "‎" => "‎", "‹" => "‹","‘" => "‘", "<" => "<", "¯" => "¯", "—" => "—", "µ" => "µ", "·" => "·", "−" => "-", "Μ" => "Μ", "μ" => "μ", "∇" => "∇", " " => " ", "–" => "–", "≠" => "≠", "∋" => "∋", "¬" => "¬", "∉" => "∉", "⊄" => "⊄", "Ñ" => "Ñ", "ñ" => "ñ", "Ν" => "Ν", "ν" => "ν", "Ó" => "Ó", "ó" => "ó", "Ô" => "Ô", "ô" => "ô", "Œ" => "Œ", "œ" => "œ", "Ò" => "Ò", "ò" => "ò", "‾" => "‾", "Ω" => "Ω", "ω" => "ω", "Ο" => "Ο","ο" => "ο","⊕" => "⊕", "∨" => "∨", "ª" => "ª", "º" => "º", "Ø" => "Ø", "ø" => "ø", "Õ" => "Õ", "õ" => "õ", "⊗" => "⊗","Ö" => "Ö", "ö" => "ö", "¶" => "¶", "∂" => "∂", "‰" => "‰","⊥" => "⊥", "Φ" => "Φ", "φ" => "φ", "Π" => "Π", "π" => "π", "ϖ" => "ϖ", "±" => "±", "£" => "£", "′" => "′", "″" => "″", "∏" => "∏", "∝" => "∝", "Ψ" => "Ψ", "ψ" => "ψ", """ => """, "√" => "√", "⟩" => "〉", "»" => "»", "→" => "→", "⇒" => "⇒", "⌉" => "⌉", "”" => "”", "ℜ" => "ℜ", "®" => "®", "⌋" => "⌋","Ρ" => "Ρ", "ρ" => "ρ", "‏" => "‏", "›" => "›", "’" => "’", "‚" => "‚", "Š" => "Š", "š" => "š", "⋅" => "⋅", "§" => "§", "­" => "­", "Σ" => "Σ", "σ" => "σ", "ς" => "ς", "∼" => "∼", "♠" => "♠", "⊂" => "⊂", "⊆" => "⊆", "∑" => "∑", "¹" => "¹", "²" => "²", "³" => "³", "⊃" => "⊃", "⊇" => "⊇", "ß" => "ß", "Τ" => "Τ", "τ" => "τ", "∴" => "∴", "Θ" => "Θ", "θ" => "θ", "ϑ" => "ϑ", " " => " ","Þ" => "Þ", "þ" => "þ", "˜" => "~", "×" => "×", "™" => "™", "Ú" => "Ú", "ú" => "ú", "↑" => "↑", "⇑" => "⇑", "Û" => "Û", "û" => "û", "Ù" => "Ù", "ù" => "ù", "¨" => "¨", "ϒ" => "ϒ", "Υ" => "Υ","υ" => "υ", "Ü" => "Ü", "ü" => "ü", "℘" => "℘", "Ξ" => "Ξ", "ξ" => "ξ", "Ý" => "Ý", "ý" => "ý", "¥" => "¥", "ÿ" => "ÿ", "Ÿ" => "Ÿ", "Ζ" => "Ζ", "ζ" => "ζ", "‍" => "‍", "‌" => "‌" ); return $html_entities; } /** * Trim punctuation from a string * @param $string string input string * @return string the trimmed string */ static function trimPunctuation($string) { return trim($string, ' ,.;:!?&()[]\\/'); } /** * Convert a string to proper title case * @param $title string * @return string */ static function titleCase($title) { AppLocale::requireComponents(LOCALE_COMPONENT_PKP_COMMON); $smallWords = explode(' ', __('common.titleSmallWords')); $words = explode(' ', $title); foreach ($words as $key => $word) { if ($key == 0 or !in_array(self::strtolower($word), $smallWords)) { $words[$key] = ucfirst(self::strtolower($word)); } else { $words[$key] = self::strtolower($word); } } $newTitle = implode(' ', $words); return $newTitle; } /** * Joins two title string fragments (in $fields) either with a * space or a colon. * @param $fields array * @return string the joined string */ static function concatTitleFields($fields) { // Set the characters that will avoid the use of // a semicolon between title and subtitle. $avoidColonChars = array('?', '!', '/', '&'); // if the first field ends in a character in $avoidColonChars, // concat with a space, otherwise use a colon. // Check for any of these characters in // the last position of current full title value. if (in_array(substr($fields[0], -1, 1), $avoidColonChars)) { $fullTitle = join(' ', $fields); } else { $fullTitle = join(': ', $fields); } return $fullTitle; } /** * Iterate over an array of delimiters and see whether * it exists in the given input string. If so, then use * it to explode the string into an array. * @param $delimiters array * @param $input string * @return array */ static function iterativeExplode($delimiters, $input) { // Run through the delimiters and try them out // one by one. foreach($delimiters as $delimiter) { if (strstr($input, $delimiter) !== false) { return explode($delimiter, $input); } } // If none of the delimiters works then return // the original string as an array. return (array($input)); } /** * Transform "handler-class" to "HandlerClass" * and "my-op" to "myOp". * @param $string input string * @param $type which kind of camel case? * @return string the string in camel case */ static function camelize($string, $type = CAMEL_CASE_HEAD_UP) { assert($type == CAMEL_CASE_HEAD_UP || $type == CAMEL_CASE_HEAD_DOWN); // Transform "handler-class" to "HandlerClass" and "my-op" to "MyOp" $string = str_replace(' ', '', ucwords(str_replace('-', ' ', $string))); // Transform "MyOp" to "myOp" if ($type == CAMEL_CASE_HEAD_DOWN) { // lcfirst() is PHP>5.3, so use workaround $string = strtolower(substr($string, 0, 1)).substr($string, 1); } return $string; } /** * Transform "HandlerClass" to "handler-class" * and "myOp" to "my-op". * @param $string * @return string */ static function uncamelize($string) { assert(!empty($string)); // Transform "myOp" to "MyOp" $string = ucfirst($string); // Insert hyphens between words and return the string in lowercase $words = array(); self::regexp_match_all('/[A-Z][a-z0-9]*/', $string, $words); assert(isset($words[0]) && !empty($words[0]) && strlen(implode('', $words[0])) == strlen($string)); return strtolower(implode('-', $words[0])); } /** * Calculate the differences between two strings and * produce an array with three types of entries: added * substrings, deleted substrings and unchanged substrings. * * The calculation is optimized to identify the common * largest substring. * * The return value is an array of the following format: * * array( * array( diff-type => substring ), * array(...) * ) * * whereby diff-type can be one of: * -1 = deletion * 0 = common substring * 1 = addition * * @param $originalString string * @param $editedString string * @return array */ static function diff($originalString, $editedString) { // Split strings into character arrays (multi-byte compatible). foreach(array('originalStringCharacters' => $originalString, 'editedStringCharacters' => $editedString) as $characterArrayName => $string) { ${$characterArrayName} = array(); self::regexp_match_all('/./', $string, ${$characterArrayName}); if (isset(${$characterArrayName}[0])) { ${$characterArrayName} = ${$characterArrayName}[0]; } } // Determine the length of the strings. $originalStringLength = count($originalStringCharacters); $editedStringLength = count($editedStringCharacters); // Is there anything to compare? if ($originalStringLength == 0 && $editedStringLength == 0) return array(); // Is the original string empty? if ($originalStringLength == 0) { // Return the edited string as addition. return array(array(1 => $editedString)); } // Is the edited string empty? if ($editedStringLength == 0) { // Return the original string as deletion. return array(array(-1 => $originalString)); } // Initialize the local indices: // 1) Create a character index for the edited string. $characterIndex = array(); for($characterPosition = 0; $characterPosition < $editedStringLength; $characterPosition++) { $characterIndex[$editedStringCharacters[$characterPosition]][] = $characterPosition; } // 2) Initialize the substring and the length index. $substringIndex = $lengthIndex = array(); // Iterate over the original string to identify // the largest common string. for($originalPosition = 0; $originalPosition < $originalStringLength; $originalPosition++) { // Find all occurrences of the original character // in the target string. $comparedCharacter = $originalStringCharacters[$originalPosition]; // Do we have a commonality between the original string // and the edited string? if (isset($characterIndex[$comparedCharacter])) { // Loop over all commonalities. foreach($characterIndex[$comparedCharacter] as $editedPosition) { // Calculate the current and the preceding position // ids for indexation. $currentPosition = $originalPosition . '-' . $editedPosition; $previousPosition = ($originalPosition-1) . '-' . ($editedPosition-1); // Does the occurrence in the target string continue // an existing common substring or does it start // a new one? if (isset($substringIndex[$previousPosition])) { // This is a continuation of an existing common // substring... $newSubstring = $substringIndex[$previousPosition].$comparedCharacter; $newSubstringLength = self::strlen($newSubstring); // Move the substring in the substring index. $substringIndex[$currentPosition] = $newSubstring; unset($substringIndex[$previousPosition]); // Move the substring in the length index. $lengthIndex[$newSubstringLength][$currentPosition] = $newSubstring; unset($lengthIndex[$newSubstringLength - 1][$previousPosition]); } else { // Start a new common substring... // Add the substring to the substring index. $substringIndex[$currentPosition] = $comparedCharacter; // Add the substring to the length index. $lengthIndex[1][$currentPosition] = $comparedCharacter; } } } } // If we have no commonalities at all then mark the original // string as deleted and the edited string as added and // return. if (empty($lengthIndex)) { return array( array( -1 => $originalString ), array( 1 => $editedString ) ); } // Pop the largest common substrings from the length index. end($lengthIndex); $largestSubstringLength = key($lengthIndex); // Take the first common substring if we have more than // one substring with the same length. // FIXME: Find a better heuristic for this decision. reset($lengthIndex[$largestSubstringLength]); $largestSubstringPosition = key($lengthIndex[$largestSubstringLength]); list($largestSubstringEndOriginal, $largestSubstringEndEdited) = explode('-', $largestSubstringPosition); $largestSubstring = $lengthIndex[$largestSubstringLength][$largestSubstringPosition]; // Add the largest common substring to the result set $diffResult = array(array( 0 => $largestSubstring )); // Prepend the diff of the substrings before the common substring // to the result diff (by recursion). $precedingSubstringOriginal = self::substr($originalString, 0, $largestSubstringEndOriginal-$largestSubstringLength+1); $precedingSubstringEdited = self::substr($editedString, 0, $largestSubstringEndEdited-$largestSubstringLength+1); $diffResult = array_merge(self::diff($precedingSubstringOriginal, $precedingSubstringEdited), $diffResult); // Append the diff of the substrings after thr common substring // to the result diff (by recursion). $succeedingSubstringOriginal = self::substr($originalString, $largestSubstringEndOriginal+1); $succeedingSubstringEdited = self::substr($editedString, $largestSubstringEndEdited+1); $diffResult = array_merge($diffResult, self::diff($succeedingSubstringOriginal, $succeedingSubstringEdited)); // Return the array representing the diff. return $diffResult; } /** * Get a letter $steps places after 'A' * @param $steps int * @return string Letter */ static function enumerateAlphabetically($steps) { return chr(ord('A') + $steps); } /** * Create a new UUID (version 4) * @return string */ static function generateUUID() { mt_srand((double)microtime()*10000); $charid = strtoupper(md5(uniqid(rand(), true))); $hyphen = '-'; $uuid = substr($charid, 0, 8).$hyphen .substr($charid, 8, 4).$hyphen .'4'.substr($charid,13, 3).$hyphen .strtoupper(dechex(hexdec(ord(substr($charid,16,1))) % 4 + 8)).substr($charid,17, 3).$hyphen .substr($charid,20,12); return $uuid; } /** * Matches each symbol of PHP strftime format string * to jQuery Datepicker widget date format. * @param $phpFormat string * @return string */ static function dateformatPHP2JQueryDatepicker($phpFormat) { $symbols = array( // Day 'a' => 'D', // date() format 'D' 'A' => 'DD', // date() format 'DD' 'd' => 'dd', // date() format 'd' 'e' => 'd', // date() format 'j' 'j' => 'oo', // date() format none 'u' => '', // date() format 'N' 'w' => '', // date() format 'w' // Week 'U' => '', // date() format none 'V' => '', // date() format none 'W' => '', // date() format 'W' // Month 'b' => 'M', // date() format 'M' 'h' => 'M', // date() format 'M' 'B' => 'MM', // date() format 'F' 'm' => 'mm', // date() format 'm' // Year 'C' => '', // date() format none 'g' => 'y', // date() format none 'G' => 'yy', // date() format 'o' 'y' => 'y', // date() format 'y' 'Y' => 'yy', // date() format 'Y' // Time 'H' => '', // date() format 'H' 'k' => '', // date() format none 'I' => '', // date() format 'h' 'l' => '', // date() format 'g' 'P' => '', // date() format 'a' 'p' => '', // date() format 'A' 'M' => '', // date() format 'i' 'S' => '', // date() format 's' 's' => '', // date() format 'u' // Timezone 'z' => '', // date() format 'O' 'Z' => '', // date() format 'T' // Full Date/Time 'r' => '', // date() format none 'R' => '', // date() format none 'X' => '', // date() format none 'D' => '', // date() format none 'F' => '', // date() format none 'x' => '', // date() format none 'c' => '', // date() format none // Other '%' => '' ); $datepickerFormat = ""; $escaping = false; for ($i = 0; $i < strlen($phpFormat); $i++) { $char = $phpFormat[$i]; if($char === '\\') { $i++; $datepickerFormat .= $escaping ? $phpFormat[$i] : '\'' . $phpFormat[$i]; $escaping = true; } else { if($escaping) { $datepickerFormat .= "'"; $escaping = false; } $datepickerFormat .= isset($symbols[$char]) ? $symbols[$char] : $char; } } return $datepickerFormat; } } ?>