| [ Index ] |
PHP Cross Reference of Joomla 2.5.4 DE |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * @package Joomla.Administrator 4 * @subpackage com_finder 5 * 6 * @copyright Copyright (C) 2005 - 2012 Open Source Matters, Inc. All rights reserved. 7 * @license GNU General Public License version 2 or later; see LICENSE 8 */ 9 10 defined('_JEXEC') or die; 11 12 // Register dependent classes. 13 JLoader::register('FinderIndexerHelper', dirname(__FILE__) . '/helper.php'); 14 JLoader::register('FinderIndexerParser', dirname(__FILE__) . '/parser.php'); 15 JLoader::register('FinderIndexerStemmer', dirname(__FILE__) . '/stemmer.php'); 16 JLoader::register('FinderIndexerTaxonomy', dirname(__FILE__) . '/taxonomy.php'); 17 JLoader::register('FinderIndexerToken', dirname(__FILE__) . '/token.php'); 18 19 jimport('joomla.filesystem.file'); 20 21 /** 22 * Main indexer class for the Finder indexer package. 23 * 24 * The indexer class provides the core functionality of the Finder 25 * search engine. It is responsible for adding and updating the 26 * content links table; extracting and scoring tokens; and maintaining 27 * all referential information for the content. 28 * 29 * Note: All exceptions thrown from within this class should be caught 30 * by the controller. 31 * 32 * @package Joomla.Administrator 33 * @subpackage com_finder 34 * @since 2.5 35 */ 36 class FinderIndexer 37 { 38 /** 39 * The title context identifier. 40 * 41 * @var integer 42 * @since 2.5 43 */ 44 const TITLE_CONTEXT = 1; 45 46 /** 47 * The text context identifier. 48 * 49 * @var integer 50 * @since 2.5 51 */ 52 const TEXT_CONTEXT = 2; 53 54 /** 55 * The meta context identifier. 56 * 57 * @var integer 58 * @since 2.5 59 */ 60 const META_CONTEXT = 3; 61 62 /** 63 * The path context identifier. 64 * 65 * @var integer 66 * @since 2.5 67 */ 68 const PATH_CONTEXT = 4; 69 70 /** 71 * The misc context identifier. 72 * 73 * @var integer 74 * @since 2.5 75 */ 76 const MISC_CONTEXT = 5; 77 78 /** 79 * The indexer state object. 80 * 81 * @var object 82 * @since 2.5 83 */ 84 public static $state; 85 86 /** 87 * The indexer profiler object. 88 * 89 * @var object 90 * @since 2.5 91 */ 92 public static $profiler; 93 94 /** 95 * Method to get the indexer state. 96 * 97 * @return object The indexer state object. 98 * 99 * @since 2.5 100 */ 101 public static function getState() 102 { 103 // First, try to load from the internal state. 104 if (!empty(self::$state)) 105 { 106 return self::$state; 107 } 108 109 // If we couldn't load from the internal state, try the session. 110 $session = JFactory::getSession(); 111 $data = $session->get('_finder.state', null); 112 113 // If the state is empty, load the values for the first time. 114 if (empty($data)) 115 { 116 $data = new JObject; 117 118 // Load the default configuration options. 119 $data->options = JComponentHelper::getParams('com_finder'); 120 121 // Setup the weight lookup information. 122 $data->weights = array( 123 self::TITLE_CONTEXT => round($data->options->get('title_multiplier', 1.7), 2), 124 self::TEXT_CONTEXT => round($data->options->get('text_multiplier', 0.7), 2), 125 self::META_CONTEXT => round($data->options->get('meta_multiplier', 1.2), 2), 126 self::PATH_CONTEXT => round($data->options->get('path_multiplier', 2.0), 2), 127 self::MISC_CONTEXT => round($data->options->get('misc_multiplier', 0.3), 2) 128 ); 129 130 // Set the current time as the start time. 131 $data->startTime = JFactory::getDate()->toSQL(); 132 133 // Set the remaining default values. 134 $data->batchSize = (int) $data->options->get('batch_size', 50); 135 $data->batchOffset = 0; 136 $data->totalItems = 0; 137 $data->pluginState = array(); 138 } 139 140 // Setup the profiler if debugging is enabled. 141 if (JFactory::getApplication()->getCfg('debug')) 142 { 143 jimport('joomla.error.profiler'); 144 self::$profiler = JProfiler::getInstance('FinderIndexer'); 145 } 146 147 // Setup the stemmer. 148 if ($data->options->get('stem', 1) && $data->options->get('stemmer', 'porter_en')) 149 { 150 FinderIndexerHelper::$stemmer = FinderIndexerStemmer::getInstance($data->options->get('stemmer', 'porter_en')); 151 } 152 153 // Set the state. 154 self::$state = $data; 155 156 return self::$state; 157 } 158 159 /** 160 * Method to set the indexer state. 161 * 162 * @param object $data A new indexer state object. 163 * 164 * @return boolean True on success, false on failure. 165 * 166 * @since 2.5 167 */ 168 public static function setState($data) 169 { 170 // Check the state object. 171 if (empty($data) || !is_a($data, 'JObject')) 172 { 173 return false; 174 } 175 176 // Set the new internal state. 177 self::$state = $data; 178 179 // Set the new session state. 180 $session = JFactory::getSession(); 181 $session->set('_finder.state', $data); 182 183 return true; 184 } 185 186 /** 187 * Method to reset the indexer state. 188 * 189 * @return void 190 * 191 * @since 2.5 192 */ 193 public static function resetState() 194 { 195 // Reset the internal state to null. 196 self::$state = null; 197 198 // Reset the session state to null. 199 $session = JFactory::getSession(); 200 $session->set('_finder.state', null); 201 } 202 203 /** 204 * Method to index a content item. 205 * 206 * @param FinderIndexerResult $item The content item to index. 207 * @param string $format The format of the content. [optional] 208 * 209 * @return integer The ID of the record in the links table. 210 * 211 * @since 2.5 212 * @throws Exception on database error. 213 */ 214 public static function index($item, $format = 'html') 215 { 216 // Mark beforeIndexing in the profiler. 217 self::$profiler ? self::$profiler->mark('beforeIndexing') : null; 218 $db = JFactory::getDBO(); 219 $nd = $db->getNullDate(); 220 221 // Check if the item is in the database. 222 $query = $db->getQuery(true); 223 $query->select($db->quoteName('link_id') . ', ' . $db->quoteName('md5sum')); 224 $query->from($db->quoteName('#__finder_links')); 225 $query->where($db->quoteName('url') . ' = ' . $db->quote($item->url)); 226 227 // Load the item from the database. 228 $db->setQuery($query); 229 $link = $db->loadObject(); 230 231 // Check for a database error. 232 if ($db->getErrorNum()) 233 { 234 // Throw database error exception. 235 throw new Exception($db->getErrorMsg(), 500); 236 } 237 238 // Get the indexer state. 239 $state = FinderIndexer::getState(); 240 241 // Get the signatures of the item. 242 $curSig = self::getSignature($item); 243 $oldSig = isset($link->md5sum) ? $link->md5sum : null; 244 245 // Get the other item information. 246 $linkId = empty($link->link_id) ? null : $link->link_id; 247 $isNew = empty($link->link_id) ? true : false; 248 249 // Check the signatures. If they match, the item is up to date. 250 if (!$isNew && $curSig == $oldSig) 251 { 252 return $linkId; 253 } 254 255 /* 256 * If the link already exists, flush all the term maps for the item. 257 * Maps are stored in 16 tables so we need to iterate through and flush 258 * each table one at a time. 259 */ 260 if (!$isNew) 261 { 262 for ($i = 0; $i <= 15; $i++) 263 { 264 // Flush the maps for the link. 265 $query->clear(); 266 $query->delete(); 267 $query->from($db->quoteName('#__finder_links_terms' . dechex($i))); 268 $query->where($db->quoteName('link_id') . ' = ' . (int) $linkId); 269 $db->setQuery($query); 270 $db->query(); 271 272 // Check for a database error. 273 if ($db->getErrorNum()) 274 { 275 // Throw database error exception. 276 throw new Exception($db->getErrorMsg(), 500); 277 } 278 } 279 280 // Remove the taxonomy maps. 281 FinderIndexerTaxonomy::removeMaps($linkId); 282 } 283 284 // Mark afterUnmapping in the profiler. 285 self::$profiler ? self::$profiler->mark('afterUnmapping') : null; 286 287 // Perform cleanup on the item data. 288 $item->publish_start_date = intval($item->publish_start_date) != 0 ? $item->publish_start_date : $nd; 289 $item->publish_end_date = intval($item->publish_end_date) != 0 ? $item->publish_end_date : $nd; 290 $item->start_date = intval($item->start_date) != 0 ? $item->start_date : $nd; 291 $item->end_date = intval($item->end_date) != 0 ? $item->end_date : $nd; 292 293 // Prepare the item description. 294 $item->description = FinderIndexerHelper::parse($item->summary); 295 296 /* 297 * Now, we need to enter the item into the links table. If the item 298 * already exists in the database, we need to use an UPDATE query. 299 * Otherwise, we need to use an INSERT to get the link id back. 300 */ 301 302 if ($isNew) 303 { 304 $columnsArray = array( 305 $db->quoteName('url'), $db->quoteName('route'), $db->quoteName('title'), $db->quoteName('description'), 306 $db->quoteName('indexdate'), $db->quoteName('published'), $db->quoteName('state'), $db->quoteName('access'), 307 $db->quoteName('language'), $db->quoteName('type_id'), $db->quoteName('object'), $db->quoteName('publish_start_date'), 308 $db->quoteName('publish_end_date'), $db->quoteName('start_date'), $db->quoteName('end_date'), $db->quoteName('list_price'), 309 $db->quoteName('sale_price') 310 ); 311 312 // Insert the link. 313 $query->clear(); 314 $query->insert($db->quoteName('#__finder_links')); 315 $query->columns($columnsArray); 316 $query->values( 317 $db->quote($item->url) . ', ' 318 . $db->quote($item->route) . ', ' 319 . $db->quote($item->title) . ', ' 320 . $db->quote($item->description) . ', ' 321 . $query->currentTimestamp() . ', ' 322 . '1, ' 323 . (int) $item->state . ', ' 324 . (int) $item->access . ', ' 325 . $db->quote($item->language) . ', ' 326 . (int) $item->type_id . ', ' 327 . $db->quote(serialize($item)) . ', ' 328 . $db->quote($item->publish_start_date) . ', ' 329 . $db->quote($item->publish_end_date) . ', ' 330 . $db->quote($item->start_date) . ', ' 331 . $db->quote($item->end_date) . ', ' 332 . $db->quote($item->list_price) . ', ' 333 . $db->quote($item->sale_price) 334 ); 335 $db->setQuery($query); 336 $db->query(); 337 338 // Check for a database error. 339 if ($db->getErrorNum()) 340 { 341 // Throw database error exception. 342 throw new Exception($db->getErrorMsg(), 500); 343 } 344 345 // Get the link id. 346 $linkId = (int) $db->insertid(); 347 } 348 else 349 { 350 // Update the link. 351 //@TODO: Implement this 352 $query->clear(); 353 $query->update($db->qn('#__finder_links')); 354 $query->set($db->qn('route') . ' = ' . $db->quote($item->route)); 355 $query->set($db->qn('title') . ' = ' . $db->quote($item->title)); 356 $query->set($db->qn('description') . ' = ' . $db->quote($item->description)); 357 $query->set($db->qn('indexdate') . ' = ' . $query->currentTimestamp()); 358 $query->set($db->qn('state') . ' = ' . (int) $item->state); 359 $query->set($db->qn('access') . ' = ' . (int) $item->access); 360 $query->set($db->qn('language') . ' = ' . $db->quote($item->language)); 361 $query->set($db->qn('type_id') . ' = ' . (int) $item->type_id); 362 $query->set($db->qn('object') . ' = ' . $db->quote(serialize($item))); 363 $query->set($db->qn('publish_start_date') . ' = ' . $db->quote($item->publish_start_date)); 364 $query->set($db->qn('publish_end_date') . ' = ' . $db->quote($item->publish_end_date)); 365 $query->set($db->qn('start_date') . ' = ' . $db->quote($item->start_date)); 366 $query->set($db->qn('end_date') . ' = ' . $db->quote($item->end_date)); 367 $query->set($db->qn('list_price') . ' = ' . $db->quote($item->list_price)); 368 $query->set($db->qn('sale_price') . ' = ' . $db->quote($item->sale_price)); 369 $query->where('link_id = ' . (int) $linkId); 370 $db->setQuery($query); 371 $db->query(); 372 373 // Check for a database error. 374 if ($db->getErrorNum()) 375 { 376 // Throw database error exception. 377 throw new Exception($db->getErrorMsg(), 500); 378 } 379 } 380 381 // Set up the variables we will need during processing. 382 $tokens = array(); 383 $count = 0; 384 385 // Mark afterLinking in the profiler. 386 self::$profiler ? self::$profiler->mark('afterLinking') : null; 387 388 // Truncate the tokens tables. 389 $db->truncateTable('#__finder_tokens'); 390 391 // Check for a database error. 392 if ($db->getErrorNum()) 393 { 394 // Throw database error exception. 395 throw new Exception($db->getErrorMsg(), 500); 396 } 397 398 // Truncate the tokens aggregate table. 399 $db->truncateTable('#__finder_tokens_aggregate'); 400 401 // Check for a database error. 402 if ($db->getErrorNum()) 403 { 404 // Throw database error exception. 405 throw new Exception($db->getErrorMsg(), 500); 406 } 407 408 /* 409 * Process the item's content. The items can customize their 410 * processing instructions to define extra properties to process 411 * or rearrange how properties are weighted. 412 */ 413 foreach ($item->getInstructions() as $group => $properties) 414 { 415 // Iterate through the properties of the group. 416 foreach ($properties as $property) 417 { 418 // Check if the property exists in the item. 419 if (empty($item->$property)) 420 { 421 continue; 422 } 423 424 // Tokenize the property. 425 if (is_array($item->$property)) 426 { 427 // Tokenize an array of content and add it to the database. 428 foreach ($item->$property as $ip) 429 { 430 // If the group is path, we need to a few extra processing 431 // steps to strip the extension and convert slashes and dashes 432 // to spaces. 433 if ($group === self::PATH_CONTEXT) 434 { 435 $ip = JFile::stripExt($ip); 436 $ip = str_replace('/', ' ', $ip); 437 $ip = str_replace('-', ' ', $ip); 438 } 439 440 // Tokenize a string of content and add it to the database. 441 $count += FinderIndexer::tokenizeToDB($ip, $group, $item->language, $format); 442 443 // Check if we're approaching the memory limit of the token table. 444 if ($count > self::$state->options->get('memory_table_limit', 30000)) 445 { 446 FinderIndexer::toggleTables(false); 447 } 448 } 449 } 450 else 451 { 452 // If the group is path, we need to a few extra processing 453 // steps to strip the extension and convert slashes and dashes 454 // to spaces. 455 if ($group === self::PATH_CONTEXT) 456 { 457 $item->$property = JFile::stripExt($item->$property); 458 $item->$property = str_replace('/', ' ', $item->$property); 459 $item->$property = str_replace('-', ' ', $item->$property); 460 } 461 462 // Tokenize a string of content and add it to the database. 463 $count += FinderIndexer::tokenizeToDB($item->$property, $group, $item->language, $format); 464 465 // Check if we're approaching the memory limit of the token table. 466 if ($count > self::$state->options->get('memory_table_limit', 30000)) 467 { 468 FinderIndexer::toggleTables(false); 469 } 470 } 471 } 472 } 473 474 /* 475 * Process the item's taxonomy. The items can customize their 476 * taxonomy mappings to define extra properties to map. 477 */ 478 foreach ($item->getTaxonomy() as $branch => $nodes) 479 { 480 // Iterate through the nodes and map them to the branch. 481 foreach ($nodes as $node) 482 { 483 // Add the node to the tree. 484 $nodeId = FinderIndexerTaxonomy::addNode($branch, $node->title, $node->state, $node->access); 485 486 // Add the link => node map. 487 FinderIndexerTaxonomy::addMap($linkId, $nodeId); 488 489 // Tokenize the node title and add them to the database. 490 $count += FinderIndexer::tokenizeToDB($node->title, self::META_CONTEXT, $item->language, $format); 491 } 492 } 493 494 // Mark afterProcessing in the profiler. 495 self::$profiler ? self::$profiler->mark('afterProcessing') : null; 496 497 /* 498 * At this point, all of the item's content has been parsed, tokenized 499 * and inserted into the #__finder_tokens table. Now, we need to 500 * aggregate all the data into that table into a more usable form. The 501 * aggregated data will be inserted into #__finder_tokens_aggregate 502 * table. 503 */ 504 $query = 'INSERT INTO ' . $db->quoteName('#__finder_tokens_aggregate') . 505 ' (' . $db->quoteName('term_id') . 506 ', ' . $db->quoteName('term') . 507 ', ' . $db->quoteName('stem') . 508 ', ' . $db->quoteName('common') . 509 ', ' . $db->quoteName('phrase') . 510 ', ' . $db->quoteName('term_weight') . 511 ', ' . $db->quoteName('context') . 512 ', ' . $db->quoteName('context_weight') . ')' . 513 ' SELECT' . 514 ' t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context,' . 515 ' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight' . 516 ' FROM (' . 517 ' SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context' . 518 ' FROM ' . $db->quoteName('#__finder_tokens') . ' AS t1' . 519 ' WHERE t1.context = %d' . 520 ' ) AS t1' . 521 ' JOIN ' . $db->quoteName('#__finder_tokens') . ' AS t2 ON t2.term = t1.term' . 522 ' LEFT JOIN ' . $db->quoteName('#__finder_terms') . ' AS t ON t.term = t1.term' . 523 ' WHERE t2.context = %d' . 524 ' GROUP BY t1.term' . 525 ' ORDER BY t1.term DESC'; 526 527 // Iterate through the contexts and aggregate the tokens per context. 528 foreach ($state->weights as $context => $multiplier) 529 { 530 // Run the query to aggregate the tokens for this context.. 531 $db->setQuery(sprintf($query, $multiplier, $context, $context)); 532 $db->query(); 533 534 // Check for a database error. 535 if ($db->getErrorNum()) 536 { 537 // Throw database error exception. 538 throw new Exception($db->getErrorMsg(), 500); 539 } 540 } 541 542 // Mark afterAggregating in the profiler. 543 self::$profiler ? self::$profiler->mark('afterAggregating') : null; 544 545 /* 546 * When we pulled down all of the aggregate data, we did a LEFT JOIN 547 * over the terms table to try to find all the term ids that 548 * already exist for our tokens. If any of the rows in the aggregate 549 * table have a term of 0, then no term record exists for that 550 * term so we need to add it to the terms table. 551 */ 552 //@TODO: PostgreSQL doesn't support SOUNDEX out of the box 553 554 /* This edit is causing the indexer to fail. 555 $queryInsIgn = 'INSERT INTO ' . $db->quoteName('#__finder_terms') . 556 ' (' . $db->quoteName('term') . 557 ', ' . $db->quoteName('stem') . 558 ', ' . $db->quoteName('common') . 559 ', ' . $db->quoteName('phrase') . 560 ', ' . $db->quoteName('weight') . 561 ', ' . $db->quoteName('soundex') . ')' . 562 ' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term)' . 563 ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' . 564 ' WHERE 1 NOT IN ' . 565 '( SELECT 1 FROM ' . $db->quoteName('#__finder_terms') . 566 ' WHERE ta.term_id = 0 )' . 567 ' AND ta.term_id = 0' . 568 ' GROUP BY ta.term'; 569 570 $db->setQuery($queryInsIgn); 571 $db->query(); 572 573 // Check for a database error. 574 if ($db->getErrorNum()) 575 { 576 //@TODO: PostgreSQL doesn't support SOUNDEX out of the box 577 $query->clear(); 578 $query->select('ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term)') 579 ->from($db->quoteName('#__finder_tokens_aggregate') . ' AS ta') 580 ->where('ta.term_id = 0'); 581 $db->setQuery($query); 582 $subQuVal = $db->loadObject(); 583 584 $quRepl_p1 = 'UPDATE ' . $db->quoteName('#__finder_terms') . ' AS ta' . 585 ' SET ' . 586 ' (' . $db->quoteName('term') . 587 ', ' . $db->quoteName('stem') . 588 ', ' . $db->quoteName('common') . 589 ', ' . $db->quoteName('phrase') . 590 ', ' . $db->quoteName('weight') . 591 ', ' . $db->quoteName('soundex') . ')' . 592 ' = ' . 593 ' (' . $db->quote($subQuVal->term) . 594 ', ' . $db->quote($subQuVal->stem) . 595 ', ' . $db->quote($subQuVal->common) . 596 ', ' . $db->quote($subQuVal->phrase) . 597 ', ' . $db->quote($subQuVal->weight) . 598 ', ' . $db->quote($subQuVal->soundex) . ')' . 599 ' WHERE ' . 600 $db->quoteName('term') . ' = ' . $db->quote($subQuVal->term) . ' AND ' . 601 $db->quoteName('stem') . ' = ' . $db->quote($subQuVal->stem) . ' AND ' . 602 $db->quoteName('common') . ' = ' . $db->quote($subQuVal->common) . ' AND ' . 603 $db->quoteName('phrase') . ' = ' . $db->quote($subQuVal->phrase) . ' AND ' . 604 $db->quoteName('weight') . ' = ' . $db->quote($subQuVal->weight) . ' AND ' . 605 $db->quoteName('soundex') . ' = ' . $db->quote($subQuVal->soundex); 606 607 $db->setQuery($quRepl_p1); 608 $db->query(); 609 610 $quRepl_p2 = 'INSERT INTO ' . $db->quoteName('#__finder_terms') . 611 ' (' . $db->quoteName('term') . 612 ', ' . $db->quoteName('stem') . 613 ', ' . $db->quoteName('common') . 614 ', ' . $db->quoteName('phrase') . 615 ', ' . $db->quoteName('weight') . 616 ', ' . $db->quoteName('soundex') . ')' . 617 ' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term)' . 618 ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' . 619 ' WHERE 1 NOT IN ' . 620 '( SELECT 1 FROM ' . $db->quoteName('#__finder_terms') . 621 ' WHERE ta.term_id = 0 )' . 622 ' AND ta.term_id = 0' . 623 ' GROUP BY ta.term'; 624 625 $db->setQuery($quRepl_p2); 626 $db->query(); 627 628 // Check for a database error. 629 if ($db->getErrorNum()) 630 { 631 throw new Exception($db->getErrorMsg(), 500); 632 } 633 } 634 End of failing edit */ 635 636 //@TODO: PostgreSQL doesn't support INSERT IGNORE INTO 637 //@TODO: PostgreSQL doesn't support SOUNDEX out of the box 638 $db->setQuery( 639 'INSERT IGNORE INTO ' . $db->quoteName('#__finder_terms') . 640 ' (' . $db->quoteName('term') . 641 ', ' . $db->quoteName('stem') . 642 ', ' . $db->quoteName('common') . 643 ', ' . $db->quoteName('phrase') . 644 ', ' . $db->quoteName('weight') . 645 ', ' . $db->quoteName('soundex') . ')' . 646 ' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term)' . 647 ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' . 648 ' WHERE ta.term_id = 0' . 649 ' GROUP BY ta.term' 650 ); 651 $db->query(); 652 653 // Check for a database error. 654 if ($db->getErrorNum()) 655 { 656 { 657 throw new Exception($db->getErrorMsg(), 500); 658 } 659 } 660 661 /* 662 * Now, we just inserted a bunch of new records into the terms table 663 * so we need to go back and update the aggregate table with all the 664 * new term ids. 665 */ 666 $query = $db->getQuery(true); 667 $query->update($db->quoteName('#__finder_tokens_aggregate') . ' AS ta'); 668 $query->join('INNER', $db->quoteName('#__finder_terms') . ' AS t ON t.term = ta.term'); 669 $query->set('ta.term_id = t.term_id'); 670 $query->where('ta.term_id = 0'); 671 $db->setQuery($query); 672 $db->query(); 673 674 // Check for a database error. 675 if ($db->getErrorNum()) 676 { 677 // Throw database error exception. 678 throw new Exception($db->getErrorMsg(), 500); 679 } 680 681 // Mark afterTerms in the profiler. 682 self::$profiler ? self::$profiler->mark('afterTerms') : null; 683 684 /* 685 * After we've made sure that all of the terms are in the terms table 686 * and the aggregate table has the correct term ids, we need to update 687 * the links counter for each term by one. 688 */ 689 $query->clear(); 690 $query->update($db->quoteName('#__finder_terms') . ' AS t'); 691 $query->join('INNER', $db->quoteName('#__finder_tokens_aggregate') . ' AS ta ON ta.term_id = t.term_id'); 692 $query->set('t.' . $db->quoteName('links') . ' = t.links + 1'); 693 $db->setQuery($query); 694 $db->query(); 695 696 // Check for a database error. 697 if ($db->getErrorNum()) 698 { 699 // Throw database error exception. 700 throw new Exception($db->getErrorMsg(), 500); 701 } 702 703 // Mark afterTerms in the profiler. 704 self::$profiler ? self::$profiler->mark('afterTerms') : null; 705 706 /* 707 * Before we can insert all of the mapping rows, we have to figure out 708 * which mapping table the rows need to be inserted into. The mapping 709 * table for each term is based on the first character of the md5 of 710 * the first character of the term. In php, it would be expressed as 711 * substr(md5(substr($token, 0, 1)), 0, 1) 712 */ 713 $query->clear(); 714 $query->update($db->quoteName('#__finder_tokens_aggregate')); 715 $query->set($db->quoteName('map_suffix') . ' = SUBSTR(MD5(SUBSTR(' . $db->quoteName('term') . ', 1, 1)), 1, 1)'); 716 $db->setQuery($query); 717 $db->query(); 718 719 // Check for a database error. 720 if ($db->getErrorNum()) 721 { 722 // Throw database error exception. 723 throw new Exception($db->getErrorMsg(), 500); 724 } 725 726 /* 727 * At this point, the aggregate table contains a record for each 728 * term in each context. So, we're going to pull down all of that 729 * data while grouping the records by term and add all of the 730 * sub-totals together to arrive at the final total for each token for 731 * this link. Then, we insert all of that data into the appropriate 732 * mapping table. 733 */ 734 for ($i = 0; $i <= 15; $i++) 735 { 736 // Get the mapping table suffix. 737 $suffix = dechex($i); 738 739 /* 740 * We have to run this query 16 times, one for each link => term 741 * mapping table. 742 */ 743 //@TODO: Convert to JDatabaseQuery 744 $db->setQuery( 745 'INSERT INTO ' . $db->quoteName('#__finder_links_terms' . $suffix) . 746 ' (' . $db->quoteName('link_id') . 747 ', ' . $db->quoteName('term_id') . 748 ', ' . $db->quoteName('weight') . ')' . 749 ' SELECT ' . (int) $linkId . ', ' . $db->quoteName('term_id') . ',' . 750 ' ROUND(SUM(' . $db->quoteName('context_weight') . '), 8)' . 751 ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . 752 ' WHERE ' . $db->quoteName('map_suffix') . ' = ' . $db->quote($suffix) . 753 ' GROUP BY ' . $db->quoteName('term') . 754 ' ORDER BY ' . $db->quoteName('term') . ' DESC' 755 ); 756 $db->query(); 757 758 // Check for a database error. 759 if ($db->getErrorNum()) 760 { 761 // Throw database error exception. 762 throw new Exception($db->getErrorMsg(), 500); 763 } 764 } 765 766 // Mark afterMapping in the profiler. 767 self::$profiler ? self::$profiler->mark('afterMapping') : null; 768 769 // Update the signature. 770 $query->clear(); 771 $query->update($db->quoteName('#__finder_links')); 772 $query->set($db->quoteName('md5sum') . ' = ' . $db->quote($curSig)); 773 $query->where($db->quoteName('link_id') . ' = ' . $db->quote($linkId)); 774 $db->setQuery($query); 775 $db->query(); 776 777 // Check for a database error. 778 if ($db->getErrorNum()) 779 { 780 // Throw database error exception. 781 throw new Exception($db->getErrorMsg(), 500); 782 } 783 784 // Mark afterSigning in the profiler. 785 self::$profiler ? self::$profiler->mark('afterSigning') : null; 786 787 // Truncate the tokens tables. 788 $db->truncateTable('#__finder_tokens'); 789 790 // Check for a database error. 791 if ($db->getErrorNum()) 792 { 793 // Throw database error exception. 794 throw new Exception($db->getErrorMsg(), 500); 795 } 796 797 // Truncate the tokens aggregate table. 798 $db->truncateTable('#__finder_tokens_aggregate'); 799 800 // Check for a database error. 801 if ($db->getErrorNum()) 802 { 803 // Throw database error exception. 804 throw new Exception($db->getErrorMsg(), 500); 805 } 806 807 // Toggle the token tables back to memory tables. 808 FinderIndexer::toggleTables(true); 809 810 // Mark afterTruncating in the profiler. 811 self::$profiler ? self::$profiler->mark('afterTruncating') : null; 812 813 return $linkId; 814 } 815 816 /** 817 * Method to remove a link from the index. 818 * 819 * @param integer $linkId The id of the link. 820 * 821 * @return boolean True on success. 822 * 823 * @since 2.5 824 * @throws Exception on database error. 825 */ 826 public static function remove($linkId) 827 { 828 $db = JFactory::getDBO(); 829 $query = $db->getQuery(true); 830 831 // Get the indexer state. 832 $state = FinderIndexer::getState(); 833 834 // Update the link counts and remove the mapping records. 835 for ($i = 0; $i <= 15; $i++) 836 { 837 // Update the link counts for the terms. 838 $query->update($db->quoteName('#__finder_terms') . ' AS t'); 839 $query->join('INNER', $db->quoteName('#__finder_links_terms' . dechex($i)) . ' AS m ON m.term_id = t.term_id'); 840 $query->set($db->quoteName('t'). '.' . $db->quoteName('links') . ' ='. $db->quoteName('t') .'.' . $db->quoteName('links') . ' - 1'); 841 $query->where($db->quoteName('m') . '.' . $db->quoteName('link_id') . ' = ' . (int) $db->quote($linkId)); 842 $db->setQuery($query); 843 $db->query(); 844 845 // Check for a database error. 846 if ($db->getErrorNum()) 847 { 848 // Throw database error exception. 849 throw new Exception($db->getErrorMsg(), 500); 850 } 851 852 // Remove all records from the mapping tables. 853 $query->clear(); 854 $query->delete(); 855 $query->from($db->quoteName('#__finder_links_terms' . dechex($i))); 856 $query->where($db->quoteName('link_id') . ' = ' . (int) $linkId); 857 $db->setQuery($query); 858 $db->query(); 859 860 // Check for a database error. 861 if ($db->getErrorNum()) 862 { 863 // Throw database error exception. 864 throw new Exception($db->getErrorMsg(), 500); 865 } 866 } 867 868 // Delete all orphaned terms. 869 $query->clear(); 870 $query->delete(); 871 $query->from($db->quoteName('#__finder_terms')); 872 $query->where($db->quoteName('links') . ' <= 0'); 873 $db->setQuery($query); 874 $db->query(); 875 876 // Check for a database error. 877 if ($db->getErrorNum()) 878 { 879 // Throw database error exception. 880 throw new Exception($db->getErrorMsg(), 500); 881 } 882 883 // Delete the link from the index. 884 $query->clear(); 885 $query->delete(); 886 $query->from($db->quoteName('#__finder_links')); 887 $query->where($db->quoteName('link_id') . ' = ' . (int) $db->quote($linkId)); 888 $db->setQuery($query); 889 $db->query(); 890 891 // Check for a database error. 892 if ($db->getErrorNum()) 893 { 894 // Throw database error exception. 895 throw new Exception($db->getErrorMsg(), 500); 896 } 897 898 // Remove the taxonomy maps. 899 FinderIndexerTaxonomy::removeMaps($linkId); 900 901 // Remove the orphaned taxonomy nodes. 902 FinderIndexerTaxonomy::removeOrphanNodes(); 903 904 return true; 905 } 906 907 /** 908 * Method to optimize the index. We use this method to remove unused terms 909 * and any other optimizations that might be necessary. 910 * 911 * @return boolean True on success. 912 * 913 * @since 2.5 914 * @throws Exception on database error. 915 */ 916 public static function optimize() 917 { 918 // Get the indexer state. 919 $state = FinderIndexer::getState(); 920 921 // Get the database object. 922 $db = JFactory::getDBO(); 923 $query = $db->getQuery(true); 924 925 // Delete all orphaned terms. 926 $query->delete(); 927 $query->from($db->quoteName('#__finder_terms')); 928 $query->where($db->quoteName('links') . ' <= 0'); 929 $db->setQuery($query); 930 $db->query(); 931 932 // Check for a database error. 933 if ($db->getErrorNum()) 934 { 935 // Throw database error exception. 936 throw new Exception($db->getErrorMsg(), 500); 937 } 938 939 // Optimize the links table. 940 //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE 941 // Temporary workaround for non-MySQL solutions 942 if (strpos($db->name, 'mysql') === 0) 943 { 944 $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links')); 945 $db->query(); 946 947 // Check for a database error. 948 if ($db->getErrorNum()) 949 { 950 // Throw database error exception. 951 throw new Exception($db->getErrorMsg(), 500); 952 } 953 } 954 955 //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE 956 // Temporary workaround for non-MySQL solutions 957 if (strpos($db->name, 'mysql') === 0) 958 { 959 for ($i = 0; $i <= 15; $i++) 960 { 961 // Optimize the terms mapping table. 962 $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links_terms' . dechex($i))); 963 $db->query(); 964 965 // Check for a database error. 966 if ($db->getErrorNum()) 967 { 968 // Throw database error exception. 969 throw new Exception($db->getErrorMsg(), 500); 970 } 971 } 972 } 973 974 // Optimize the terms mapping table. 975 //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE 976 // Temporary workaround for non-MySQL solutions 977 if (strpos($db->name, 'mysql') === 0) 978 { 979 $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links_terms')); 980 $db->query(); 981 982 // Check for a database error. 983 if ($db->getErrorNum()) 984 { 985 // Throw database error exception. 986 throw new Exception($db->getErrorMsg(), 500); 987 } 988 } 989 990 // Remove the orphaned taxonomy nodes. 991 FinderIndexerTaxonomy::removeOrphanNodes(); 992 993 // Optimize the taxonomy mapping table. 994 //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE 995 // Temporary workaround for non-MySQL solutions 996 if (strpos($db->name, 'mysql') === 0) 997 { 998 $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_taxonomy_map')); 999 $db->query(); 1000 1001 // Check for a database error. 1002 if ($db->getErrorNum()) 1003 { 1004 // Throw database error exception. 1005 throw new Exception($db->getErrorMsg(), 500); 1006 } 1007 } 1008 1009 return true; 1010 } 1011 1012 /** 1013 * Method to get a content item's signature. 1014 * 1015 * @param object $item The content item to index. 1016 * 1017 * @return string The content item's signature. 1018 * 1019 * @since 2.5 1020 */ 1021 protected static function getSignature($item) 1022 { 1023 // Get the indexer state. 1024 $state = FinderIndexer::getState(); 1025 1026 // Get the relevant configuration variables. 1027 $config = array(); 1028 $config[] = $state->weights; 1029 $config[] = $state->options->get('stem', 1); 1030 $config[] = $state->options->get('stemmer', 'porter_en'); 1031 1032 return md5(serialize(array($item, $config))); 1033 } 1034 1035 /** 1036 * Method to parse input, tokenize it, and then add it to the database. 1037 * 1038 * @param mixed $input String or resource to use as input. A resource 1039 * input will automatically be chunked to conserve 1040 * memory. Strings will be chunked if longer than 1041 * 2K in size. 1042 * @param integer $context The context of the input. See context constants. 1043 * @param string $lang The language of the input. 1044 * @param string $format The format of the input. 1045 * 1046 * @return integer The number of tokens extracted from the input. 1047 * 1048 * @since 2.5 1049 */ 1050 protected static function tokenizeToDB($input, $context, $lang, $format) 1051 { 1052 $count = 0; 1053 $buffer = null; 1054 1055 // If the input is a resource, batch the process out. 1056 if (is_resource($input)) 1057 { 1058 // Batch the process out to avoid memory limits. 1059 while (!feof($input)) 1060 { 1061 // Read into the buffer. 1062 $buffer .= fread($input, 2048); 1063 1064 // If we haven't reached the end of the file, seek to the last 1065 // space character and drop whatever is after that to make sure 1066 // we didn't truncate a term while reading the input. 1067 if (!feof($input)) 1068 { 1069 // Find the last space character. 1070 $ls = strrpos($buffer, ' '); 1071 1072 // Adjust string based on the last space character. 1073 if ($ls) 1074 { 1075 // Truncate the string to the last space character. 1076 $string = substr($buffer, 0, $ls); 1077 1078 // Adjust the buffer based on the last space for the 1079 // next iteration and trim. 1080 $buffer = JString::trim(substr($buffer, $ls)); 1081 } 1082 // No space character was found. 1083 else 1084 { 1085 $string = $buffer; 1086 } 1087 } 1088 // We've reached the end of the file, so parse whatever remains. 1089 else 1090 { 1091 $string = $buffer; 1092 } 1093 1094 // Parse the input. 1095 $string = FinderIndexerHelper::parse($string, $format); 1096 1097 // Check the input. 1098 if (empty($string)) 1099 { 1100 continue; 1101 } 1102 1103 // Tokenize the input. 1104 $tokens = FinderIndexerHelper::tokenize($string, $lang); 1105 1106 // Add the tokens to the database. 1107 $count += FinderIndexer::addTokensToDB($tokens, $context); 1108 1109 // Check if we're approaching the memory limit of the token table. 1110 if ($count > self::$state->options->get('memory_table_limit', 30000)) 1111 { 1112 FinderIndexer::toggleTables(false); 1113 } 1114 1115 unset($string); 1116 unset($tokens); 1117 } 1118 } 1119 // If the input is greater than 2K in size, it is more efficient to 1120 // batch out the operation into smaller chunks of work. 1121 elseif (strlen($input) > 2048) 1122 { 1123 $start = 0; 1124 $end = strlen($input); 1125 $chunk = 2048; 1126 1127 // As it turns out, the complex regular expressions we use for 1128 // sanitizing input are not very efficient when given large 1129 // strings. It is much faster to process lots of short strings. 1130 while ($start < $end) 1131 { 1132 // Setup the string. 1133 $string = substr($input, $start, $chunk); 1134 1135 // Find the last space character if we aren't at the end. 1136 $ls = (($start + $chunk) < $end ? strrpos($string, ' ') : false); 1137 1138 // Truncate to the last space character. 1139 if ($ls !== false) 1140 { 1141 $string = substr($string, 0, $ls); 1142 } 1143 1144 // Adjust the start position for the next iteration. 1145 $start += ($ls !== false ? ($ls + 1 - $chunk) + $chunk : $chunk); 1146 1147 // Parse the input. 1148 $string = FinderIndexerHelper::parse($string, $format); 1149 1150 // Check the input. 1151 if (empty($string)) 1152 { 1153 continue; 1154 } 1155 1156 // Tokenize the input. 1157 $tokens = FinderIndexerHelper::tokenize($string, $lang); 1158 1159 // Add the tokens to the database. 1160 $count += FinderIndexer::addTokensToDB($tokens, $context); 1161 1162 // Check if we're approaching the memory limit of the token table. 1163 if ($count > self::$state->options->get('memory_table_limit', 30000)) 1164 { 1165 FinderIndexer::toggleTables(false); 1166 } 1167 } 1168 } 1169 else 1170 { 1171 // Parse the input. 1172 $input = FinderIndexerHelper::parse($input, $format); 1173 1174 // Check the input. 1175 if (empty($input)) 1176 { 1177 return $count; 1178 } 1179 1180 // Tokenize the input. 1181 $tokens = FinderIndexerHelper::tokenize($input, $lang); 1182 1183 // Add the tokens to the database. 1184 $count = FinderIndexer::addTokensToDB($tokens, $context); 1185 } 1186 1187 return $count; 1188 } 1189 1190 /** 1191 * Method to add a set of tokens to the database. 1192 * 1193 * @param mixed $tokens An array or single FinderIndexerToken object. 1194 * @param mixed $context The context of the tokens. See context constants. [optional] 1195 * 1196 * @return integer The number of tokens inserted into the database. 1197 * 1198 * @since 2.5 1199 * @throws Exception on database error. 1200 */ 1201 protected static function addTokensToDB($tokens, $context = '') 1202 { 1203 // Get the database object. 1204 $db = JFactory::getDBO(); 1205 $query = $db->getQuery(true); 1206 1207 // Force tokens to an array. 1208 $tokens = is_array($tokens) ? $tokens : array($tokens); 1209 1210 // Count the number of token values. 1211 $values = 0; 1212 1213 // Iterate through the tokens to create SQL value sets. 1214 foreach ($tokens as $token) 1215 { 1216 $query->values( 1217 $db->quote($token->term) . ', ' 1218 . $db->quote($token->stem) . ', ' 1219 . (int) $token->common . ', ' 1220 . (int) $token->phrase . ', ' 1221 . (float) $token->weight . ', ' 1222 . (int) $context 1223 ); 1224 $values++; 1225 } 1226 1227 // Insert the tokens into the database. 1228 $query->insert($db->quoteName('#__finder_tokens')); 1229 $query->columns( 1230 array( 1231 $db->quoteName('term'), 1232 $db->quoteName('stem'), 1233 $db->quoteName('common'), 1234 $db->quoteName('phrase'), 1235 $db->quoteName('weight'), 1236 $db->quoteName('context') 1237 ) 1238 ); 1239 $db->setQuery($query); 1240 $db->query(); 1241 1242 // Check for a database error. 1243 if ($db->getErrorNum()) 1244 { 1245 // Throw database error exception. 1246 throw new Exception($db->getErrorMsg(), 500); 1247 } 1248 1249 return $values; 1250 } 1251 1252 /** 1253 * Method to switch the token tables from Memory tables to MyISAM tables 1254 * when they are close to running out of memory. 1255 * 1256 * @param boolean $memory Flag to control how they should be toggled. 1257 * 1258 * @return boolean True on success. 1259 * 1260 * @since 2.5 1261 * @throws Exception on database error. 1262 * @todo PostgreSQL doesn't support setting ENGINEs, determine how to handle setting tables 1263 */ 1264 protected static function toggleTables($memory) 1265 { 1266 static $state; 1267 1268 // Get the database adapter. 1269 $db = JFactory::getDBO(); 1270 1271 // Temporary workaround for non-MySQL solutions 1272 if (strpos($db->name, 'mysql') !== 0) 1273 { 1274 return true; 1275 } 1276 1277 // Check if we are setting the tables to the Memory engine. 1278 if ($memory === true && $state !== true) 1279 { 1280 // Set the tokens table to Memory. 1281 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = MEMORY'); 1282 $db->query(); 1283 1284 // Check for a database error. 1285 if ($db->getErrorNum()) 1286 { 1287 // Throw database error exception. 1288 throw new Exception($db->getErrorMsg(), 500); 1289 } 1290 1291 // Set the tokens aggregate table to Memory. 1292 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = MEMORY'); 1293 $db->query(); 1294 1295 // Check for a database error. 1296 if ($db->getErrorNum()) 1297 { 1298 // Throw database error exception. 1299 throw new Exception($db->getErrorMsg(), 500); 1300 } 1301 1302 // Set the internal state. 1303 $state = $memory; 1304 } 1305 // We must be setting the tables to the MyISAM engine. 1306 elseif ($memory === false && $state !== false) 1307 { 1308 // Set the tokens table to MyISAM. 1309 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = MYISAM'); 1310 $db->query(); 1311 1312 // Check for a database error. 1313 if ($db->getErrorNum()) 1314 { 1315 // Throw database error exception. 1316 throw new Exception($db->getErrorMsg(), 500); 1317 } 1318 1319 // Set the tokens aggregate table to MyISAM. 1320 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = MYISAM'); 1321 $db->query(); 1322 1323 // Check for a database error. 1324 if ($db->getErrorNum()) 1325 { 1326 // Throw database error exception. 1327 throw new Exception($db->getErrorMsg(), 500); 1328 } 1329 1330 // Set the internal state. 1331 $state = $memory; 1332 } 1333 1334 return true; 1335 } 1336 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Tue Apr 3 11:40:28 2012 | Cross-referenced by PHPXref 0.7.1 |