| [ Index ] |
PHP Cross Reference of Joomla 2.5.4 DE |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * @package Joomla.Administrator 4 * @subpackage com_finder 5 * 6 * @copyright Copyright (C) 2005 - 2012 Open Source Matters, Inc. All rights reserved. 7 * @license GNU General Public License version 2 or later; see LICENSE 8 */ 9 10 defined('_JEXEC') or die; 11 12 // Register dependent classes. 13 JLoader::register('FinderIndexerStemmer', dirname(__FILE__) . '/stemmer.php'); 14 JLoader::register('FinderIndexerToken', dirname(__FILE__) . '/token.php'); 15 16 /** 17 * Helper class for the Finder indexer package. 18 * 19 * @package Joomla.Administrator 20 * @subpackage com_finder 21 * @since 2.5 22 */ 23 class FinderIndexerHelper 24 { 25 /** 26 * The token stemmer object. The stemmer is set by whatever class 27 * wishes to use it but it must be an instance of FinderIndexerStemmer. 28 * 29 * @var FinderIndexerStemmer 30 * @since 2.5 31 */ 32 public static $stemmer; 33 34 /** 35 * Method to parse input into plain text. 36 * 37 * @param string $input The raw input. 38 * @param string $format The format of the input. [optional] 39 * 40 * @return string The parsed input. 41 * 42 * @since 2.5 43 * @throws Exception on invalid parser. 44 */ 45 public static function parse($input, $format = 'html') 46 { 47 // Get a parser for the specified format and parse the input. 48 return FinderIndexerParser::getInstance($format)->parse($input); 49 } 50 51 /** 52 * Method to tokenize a text string. 53 * 54 * @param string $input The input to tokenize. 55 * @param string $lang The language of the input. 56 * @param boolean $phrase Flag to indicate whether input could be a phrase. [optional] 57 * 58 * @return array An array of FinderIndexerToken objects. 59 * 60 * @since 2.5 61 */ 62 public static function tokenize($input, $lang, $phrase = false) 63 { 64 static $cache; 65 $store = JString::strlen($input) < 128 ? md5($input . '::' . $lang . '::' . $phrase) : null; 66 67 // Check if the string has been tokenized already. 68 if ($store && isset($cache[$store])) 69 { 70 return $cache[$store]; 71 } 72 73 $tokens = array(); 74 $terms = array(); 75 $quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8'); 76 77 // Get the simple language key. 78 $lang = FinderIndexerHelper::getPrimaryLanguage($lang); 79 80 /* 81 * Parsing the string input into terms is a multi-step process. 82 * 83 * Regexes: 84 * 1. Remove everything except letters, numbers, quotes, apostrophe, plus, dash, period, and comma. 85 * 2. Remove plus, dash, period, and comma characters located before letter characters. 86 * 3. Remove plus, dash, period, and comma characters located after other characters. 87 * 4. Remove plus, period, and comma characters enclosed in alphabetical characters. Ungreedy. 88 * 5. Remove orphaned apostrophe, plus, dash, period, and comma characters. 89 * 6. Remove orphaned quote characters. 90 * 7. Replace the assorted single quotation marks with the ASCII standard single quotation. 91 * 8. Remove multiple space characters and replaces with a single space. 92 */ 93 $input = JString::strtolower($input); 94 $input = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $input); 95 $input = preg_replace('#(^|\s)[+-.,]+([\pL\pM]+)#mui', ' $1', $input); 96 $input = preg_replace('#([\pL\pM\pN]+)[+-.,]+(\s|$)#mui', '$1 ', $input); 97 $input = preg_replace('#([\pL\pM]+)[+.,]+([\pL\pM]+)#muiU', '$1 $2', $input); // Ungreedy 98 $input = preg_replace('#(^|\s)[\'+-.,]+(\s|$)#mui', ' ', $input); 99 $input = preg_replace('#(^|\s)[\p{Pi}\p{Pf}]+(\s|$)#mui', ' ', $input); 100 $input = preg_replace('#[' . $quotes . ']+#mui', '\'', $input); 101 $input = preg_replace('#\s+#mui', ' ', $input); 102 $input = JString::trim($input); 103 104 // Explode the normalized string to get the terms. 105 $terms = explode(' ', $input); 106 107 /* 108 * If we have Unicode support and are dealing with Chinese text, Chinese 109 * has to be handled specially because there are not necessarily any spaces 110 * between the "words". So, we have to test if the words belong to the Chinese 111 * character set and if so, explode them into single glyphs or "words". 112 */ 113 if ($lang === 'zh') 114 { 115 // Iterate through the terms and test if they contain Chinese. 116 for ($i = 0, $n = count($terms); $i < $n; $i++) 117 { 118 $charMatches = array(); 119 $charCount = preg_match_all('#[\p{Han}]#mui', $terms[$i], $charMatches); 120 121 // Split apart any groups of Chinese characters. 122 for ($j = 0; $j < $charCount; $j++) 123 { 124 $tSplit = JString::str_ireplace($charMatches[0][$j], '', $terms[$i], false); 125 if (!empty($tSplit)) 126 { 127 $terms[$i] = $tSplit; 128 } 129 else 130 { 131 unset($terms[$i]); 132 } 133 134 $terms[] = $charMatches[0][$j]; 135 } 136 } 137 138 // Reset array keys. 139 $terms = array_values($terms); 140 } 141 142 /* 143 * If we have to handle the input as a phrase, that means we don't 144 * tokenize the individual terms and we do not create the two and three 145 * term combinations. The phrase must contain more than one word! 146 */ 147 if ($phrase === true && count($terms) > 1) 148 { 149 // Create tokens from the phrase. 150 $tokens[] = new FinderIndexerToken($terms, $lang); 151 } 152 else 153 { 154 // Create tokens from the terms. 155 for ($i = 0, $n = count($terms); $i < $n; $i++) 156 { 157 $tokens[] = new FinderIndexerToken($terms[$i], $lang); 158 } 159 160 // Create two and three word phrase tokens from the individual words. 161 for ($i = 0, $n = count($tokens); $i < $n; $i++) 162 { 163 // Setup the phrase positions. 164 $i2 = $i + 1; 165 $i3 = $i + 2; 166 167 // Create the two word phrase. 168 if ($i2 < $n && isset($tokens[$i2])) 169 { 170 // Tokenize the two word phrase. 171 $token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term), $lang, $lang === 'zh' ? '' : ' '); 172 $token->derived = true; 173 174 // Add the token to the stack. 175 $tokens[] = $token; 176 } 177 178 // Create the three word phrase. 179 if ($i3 < $n && isset($tokens[$i3])) 180 { 181 // Tokenize the three word phrase. 182 $token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term, $tokens[$i3]->term), $lang, $lang === 'zh' ? '' : ' '); 183 $token->derived = true; 184 185 // Add the token to the stack. 186 $tokens[] = $token; 187 } 188 } 189 } 190 191 if ($store) 192 { 193 $cache[$store] = count($tokens) > 1 ? $tokens : array_shift($tokens); 194 return $cache[$store]; 195 } 196 else 197 { 198 return count($tokens) > 1 ? $tokens : array_shift($tokens); 199 } 200 } 201 202 /** 203 * Method to get the base word of a token. This method uses the public 204 * {@link FinderIndexerHelper::$stemmer} object if it is set. If no stemmer is set, 205 * the original token is returned. 206 * 207 * @param string $token The token to stem. 208 * @param string $lang The language of the token. 209 * 210 * @return string The root token. 211 * 212 * @since 2.5 213 */ 214 public static function stem($token, $lang) 215 { 216 // Trim apostrophes at either end of the token. 217 $token = JString::trim($token, '\''); 218 219 // Trim everything after any apostrophe in the token. 220 if (($pos = JString::strpos($token, '\'')) !== false) 221 { 222 $token = JString::substr($token, 0, $pos); 223 } 224 225 // Stem the token if we have a valid stemmer to use. 226 if (self::$stemmer instanceof FinderIndexerStemmer) 227 { 228 return self::$stemmer->stem($token, $lang); 229 } 230 else 231 { 232 return $token; 233 } 234 } 235 236 /** 237 * Method to add a content type to the database. 238 * 239 * @param string $title The type of content. For example: PDF 240 * @param string $mime The mime type of the content. For example: PDF [optional] 241 * 242 * @return integer The id of the content type. 243 * 244 * @since 2.5 245 * @throws Exception on database error. 246 */ 247 public static function addContentType($title, $mime = null) 248 { 249 static $types; 250 251 $db = JFactory::getDBO(); 252 $query = $db->getQuery(true); 253 254 // Check if the types are loaded. 255 if (empty($types)) 256 { 257 // Build the query to get the types. 258 $query->select('*'); 259 $query->from($db->quoteName('#__finder_types')); 260 261 // Get the types. 262 $db->setQuery($query); 263 $types = $db->loadObjectList('title'); 264 265 // Check for a database error. 266 if ($db->getErrorNum()) 267 { 268 // Throw database error exception. 269 throw new Exception($db->getErrorMsg(), 500); 270 } 271 } 272 273 // Check if the type already exists. 274 if (isset($types[$title])) 275 { 276 return (int) $types[$title]->id; 277 } 278 279 // Add the type. 280 $query->clear(); 281 $query->insert($db->quoteName('#__finder_types')); 282 $query->columns(array($db->quoteName('title'), $db->quoteName('mime'))); 283 $query->values($db->quote($title) . ', ' . $db->quote($mime)); 284 $db->setQuery($query); 285 $db->query(); 286 287 // Check for a database error. 288 if ($db->getErrorNum()) 289 { 290 // Throw database error exception. 291 throw new Exception($db->getErrorMsg(), 500); 292 } 293 294 // Return the new id. 295 return (int) $db->insertid(); 296 } 297 298 /** 299 * Method to check if a token is common in a language. 300 * 301 * @param string $token The token to test. 302 * @param string $lang The language to reference. 303 * 304 * @return boolean True if common, false otherwise. 305 * 306 * @since 2.5 307 */ 308 public static function isCommon($token, $lang) 309 { 310 static $data; 311 312 // Load the common tokens for the language if necessary. 313 if (!isset($data[$lang])) 314 { 315 $data[$lang] = FinderIndexerHelper::getCommonWords($lang); 316 } 317 318 // Check if the token is in the common array. 319 if (in_array($token, $data[$lang])) 320 { 321 return true; 322 } 323 else 324 { 325 return false; 326 } 327 } 328 329 /** 330 * Method to get an array of common terms for a language. 331 * 332 * @param string $lang The language to use. 333 * 334 * @return array Array of common terms. 335 * 336 * @since 2.5 337 * @throws Exception on database error. 338 */ 339 public static function getCommonWords($lang) 340 { 341 $db = JFactory::getDBO(); 342 343 // Create the query to load all the common terms for the language. 344 $query = $db->getQuery(true); 345 $query->select($db->quoteName('term')); 346 $query->from($db->quoteName('#__finder_terms_common')); 347 $query->where($db->quoteName('language') . ' = ' . $db->quote($lang)); 348 349 // Load all of the common terms for the language. 350 $db->setQuery($query); 351 $results = $db->loadColumn(); 352 353 // Check for a database error. 354 if ($db->getErrorNum()) 355 { 356 // Throw database error exception. 357 throw new Exception($db->getErrorMsg(), 500); 358 } 359 360 return $results; 361 } 362 363 /** 364 * Method to get the default language for the site. 365 * 366 * @return string The default language string. 367 * 368 * @since 2.5 369 */ 370 public static function getDefaultLanguage() 371 { 372 static $lang; 373 374 // Get the default language. 375 if (empty($lang)) 376 { 377 $lang = JComponentHelper::getParams('com_languages')->get('site', 'en-GB'); 378 } 379 380 return $lang; 381 } 382 383 /** 384 * Method to parse a language/locale key and return a simple language string. 385 * 386 * @param string $lang The language/locale key. For example: en-GB 387 * 388 * @return string The simple language string. For example: en 389 * 390 * @since 2.5 391 */ 392 public static function getPrimaryLanguage($lang) 393 { 394 static $data; 395 396 // Only parse the identifier if necessary. 397 if (!isset($data[$lang])) 398 { 399 if (is_callable(array('Locale', 'getPrimaryLanguage'))) 400 { 401 // Get the language key using the Locale package. 402 $data[$lang] = Locale::getPrimaryLanguage($lang); 403 } 404 else 405 { 406 // Get the language key using string position. 407 $data[$lang] = JString::substr($lang, 0, JString::strpos($lang, '-')); 408 } 409 } 410 411 return $data[$lang]; 412 } 413 414 /** 415 * Method to get the path (SEF route) for a content item. 416 * 417 * @param string $url The non-SEF route to the content item. 418 * 419 * @return string The path for the content item. 420 * 421 * @since 2.5 422 */ 423 public static function getContentPath($url) 424 { 425 static $router; 426 427 // Only get the router once. 428 if (!($router instanceof JRouter)) 429 { 430 jimport('joomla.application.router'); 431 include_once JPATH_SITE . '/includes/application.php'; 432 433 // Get and configure the site router. 434 $config = JFactory::getConfig(); 435 $router = JRouter::getInstance('site'); 436 $router->setMode($config->get('sef', 1)); 437 } 438 439 // Build the relative route. 440 $uri = $router->build($url); 441 $route = $uri->toString(array('path', 'query', 'fragment')); 442 $route = str_replace(JURI::base(true) . '/', '', $route); 443 444 return $route; 445 } 446 447 /** 448 * Method to get extra data for a content before being indexed. This is how 449 * we add Comments, Tags, Labels, etc. that should be available to Finder. 450 * 451 * @param FinderIndexerResult &$item The item to index as an FinderIndexerResult object. 452 * 453 * @return boolean True on success, false on failure. 454 * 455 * @since 2.5 456 * @throws Exception on database error. 457 */ 458 public static function getContentExtras(FinderIndexerResult &$item) 459 { 460 // Get the event dispatcher. 461 $dispatcher = JDispatcher::getInstance(); 462 463 // Load the finder plugin group. 464 JPluginHelper::importPlugin('finder'); 465 466 try 467 { 468 // Trigger the event. 469 $results = $dispatcher->trigger('onPrepareFinderContent', array(&$item)); 470 471 // Check the returned results. This is for plugins that don't throw 472 // exceptions when they encounter serious errors. 473 if (in_array(false, $results)) 474 { 475 throw new Exception($dispatcher->getError(), 500); 476 } 477 } 478 catch (Exception $e) 479 { 480 // Handle a caught exception. 481 throw $e; 482 } 483 484 return true; 485 } 486 487 /** 488 * Method to process content text using the onContentPrepare event trigger. 489 * 490 * @param string $text The content to process. 491 * @param JRegistry $params The parameters object. [optional] 492 * 493 * @return string The processed content. 494 * 495 * @since 2.5 496 */ 497 public static function prepareContent($text, $params = null) 498 { 499 static $loaded; 500 501 // Get the dispatcher. 502 $dispatcher = JDispatcher::getInstance(); 503 504 // Load the content plugins if necessary and remove any problematic ones. 505 if (empty($loaded)) 506 { 507 JPluginHelper::importPlugin('content'); 508 $loaded = true; 509 510 // Create an array of problematic plugins 511 $conflicts = array('plgContentEmailCloak', 'plgContentLoadmodule'); 512 513 // Check if we can access the observers 514 if (isset($dispatcher->_observers)) 515 { 516 // Remove problematic plugins. 517 foreach ($dispatcher->_observers as $key => $handler) 518 { 519 // Remove any function based event handlers that conflict with Finder. 520 if (is_array($handler) && isset($handler['handler']) && in_array($handler['handler'], $conflicts)) 521 { 522 unset($dispatcher->_observers[$key]); 523 } 524 // Remove any object based event handlers that conflict with Finder. 525 elseif (is_object($handler) && method_exists($handler, 'update') && in_array(get_class($handler), $conflicts)) 526 { 527 unset($dispatcher->_observers[$key]); 528 } 529 } 530 } 531 } 532 533 // Instantiate the parameter object if necessary. 534 if (!($params instanceof JRegistry)) 535 { 536 $registry = new JRegistry; 537 $registry->loadString($params); 538 $params = $registry; 539 } 540 541 // Create a mock content object. 542 $content = JTable::getInstance('Content'); 543 $content->text = $text; 544 545 // Fire the onContentPrepare event. 546 $dispatcher->trigger('onContentPrepare', array('com_finder.indexer', &$content, &$params, 0)); 547 548 return $content->text; 549 } 550 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Tue Apr 3 11:40:28 2012 | Cross-referenced by PHPXref 0.7.1 |