| [ Index ] |
PHP Cross Reference of Joomla 2.5.4 DE |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * @package Joomla.Administrator 4 * @subpackage com_finder 5 * 6 * @copyright Copyright (C) 2005 - 2012 Open Source Matters, Inc. All rights reserved. 7 * @license GNU General Public License version 2 or later; see LICENSE 8 */ 9 10 defined('_JEXEC') or die; 11 12 // Register dependent classes. 13 JLoader::register('FinderIndexerHelper', dirname(__FILE__) . '/helper.php'); 14 JLoader::register('FinderIndexerTaxonomy', dirname(__FILE__) . '/taxonomy.php'); 15 JLoader::register('FinderHelperRoute', JPATH_SITE . '/components/com_finder/helpers/route.php'); 16 JLoader::register('FinderHelperLanguage', JPATH_ADMINISTRATOR . '/components/com_finder/helpers/language.php'); 17 18 /** 19 * Query class for the Finder indexer package. 20 * 21 * @package Joomla.Administrator 22 * @subpackage com_finder 23 * @since 2.5 24 */ 25 class FinderIndexerQuery 26 { 27 /** 28 * Flag to show whether the query can return results. 29 * 30 * @var boolean 31 * @since 2.5 32 */ 33 public $search; 34 35 /** 36 * The query input string. 37 * 38 * @var string 39 * @since 2.5 40 */ 41 public $input; 42 43 /** 44 * The language of the query. 45 * 46 * @var string 47 * @since 2.5 48 */ 49 public $language; 50 51 /** 52 * The query string matching mode. 53 * 54 * @var string 55 * @since 2.5 56 */ 57 public $mode; 58 59 /** 60 * The included tokens. 61 * 62 * @var array 63 * @since 2.5 64 */ 65 public $included = array(); 66 67 /** 68 * The excluded tokens. 69 * 70 * @var array 71 * @since 2.5 72 */ 73 public $excluded = array(); 74 75 /** 76 * The tokens to ignore because no matches exist. 77 * 78 * @var array 79 * @since 2.5 80 */ 81 public $ignored = array(); 82 83 /** 84 * The operators used in the query input string. 85 * 86 * @var array 87 * @since 2.5 88 */ 89 public $operators = array(); 90 91 /** 92 * The terms to highlight as matches. 93 * 94 * @var array 95 * @since 2.5 96 */ 97 public $highlight = array(); 98 99 /** 100 * The number of matching terms for the query input. 101 * 102 * @var integer 103 * @since 2.5 104 */ 105 public $terms; 106 107 /** 108 * The static filter id. 109 * 110 * @var string 111 * @since 2.5 112 */ 113 public $filter; 114 115 /** 116 * The taxonomy filters. This is a multi-dimensional array of taxonomy 117 * branches as the first level and then the taxonomy nodes as the values. 118 * 119 * For example: 120 * $filters = array( 121 * 'Type' = array(10, 32, 29, 11, ...); 122 * 'Label' = array(20, 314, 349, 91, 82, ...); 123 * ... 124 * ); 125 * 126 * @var array 127 * @since 2.5 128 */ 129 public $filters = array(); 130 131 /** 132 * The start date filter. 133 * 134 * @var string 135 * @since 2.5 136 */ 137 public $date1; 138 139 /** 140 * The end date filter. 141 * 142 * @var string 143 * @since 2.5 144 */ 145 public $date2; 146 147 /** 148 * The start date filter modifier. 149 * 150 * @var string 151 * @since 2.5 152 */ 153 public $when1; 154 155 /** 156 * The end date filter modifier. 157 * 158 * @var string 159 * @since 2.5 160 */ 161 public $when2; 162 163 /** 164 * Method to instantiate the query object. 165 * 166 * @param array $options An array of query options. 167 * 168 * @since 2.5 169 * @throws Exception on database error. 170 */ 171 public function __construct($options) 172 { 173 // Get the input string. 174 $this->input = isset($options['input']) ? $options['input'] : null; 175 176 // Get the empty query setting. 177 $this->empty = isset($options['empty']) ? (bool) $options['empty'] : false; 178 179 // Get the input language. 180 $this->language = !empty($options['language']) ? $options['language'] : FinderIndexerHelper::getDefaultLanguage(); 181 $this->language = FinderIndexerHelper::getPrimaryLanguage($this->language); 182 183 // Get the matching mode. 184 $this->mode = 'AND'; 185 186 // Initialize the temporary date storage. 187 $this->dates = new JRegistry; 188 189 // Populate the temporary date storage. 190 if (isset($options['date1']) && !empty($options['date1'])) 191 { 192 $this->dates->set('date1', $options['date1']); 193 } 194 if (isset($options['date2']) && !empty($options['date1'])) 195 { 196 $this->dates->set('date2', $options['date2']); 197 } 198 if (isset($options['when1']) && !empty($options['date1'])) 199 { 200 $this->dates->set('when1', $options['when1']); 201 } 202 if (isset($options['when2']) && !empty($options['date1'])) 203 { 204 $this->dates->set('when2', $options['when2']); 205 } 206 207 // Process the static taxonomy filters. 208 if (isset($options['filter']) && !empty($options['filter'])) 209 { 210 $this->processStaticTaxonomy($options['filter']); 211 } 212 213 // Process the dynamic taxonomy filters. 214 if (isset($options['filters']) && !empty($options['filters'])) 215 { 216 $this->processDynamicTaxonomy($options['filters']); 217 } 218 219 // Get the date filters. 220 $d1 = $this->dates->get('date1'); 221 $d2 = $this->dates->get('date2'); 222 $w1 = $this->dates->get('when1'); 223 $w2 = $this->dates->get('when2'); 224 225 // Process the date filters. 226 if (!empty($d1) || !empty($d2)) 227 { 228 $this->processDates($d1, $d2, $w1, $w2); 229 } 230 231 // Process the input string. 232 $this->processString($this->input, $this->language, $this->mode); 233 234 // Get the number of matching terms. 235 foreach ($this->included as $token) 236 { 237 $this->terms += count($token->matches); 238 } 239 240 // Remove the temporary date storage. 241 unset($this->dates); 242 243 /* 244 * Lastly, determine whether this query can return a result set. 245 */ 246 // Check if we have a query string. 247 if (!empty($this->input)) 248 { 249 $this->search = true; 250 } 251 // Check if we can search without a query string. 252 elseif ($this->empty && (!empty($this->filter) || !empty($this->filters) || !empty($this->date1) || !empty($this->date2))) 253 { 254 $this->search = true; 255 } 256 // We do not have a valid search query. 257 else 258 { 259 $this->search = false; 260 } 261 } 262 263 /** 264 * Method to convert the query object into a URI string. 265 * 266 * @param string $base The base URI. [optional] 267 * 268 * @return string The complete query URI. 269 * 270 * @since 2.5 271 */ 272 public function toURI($base = null) 273 { 274 // Set the base if not specified. 275 if (empty($base)) 276 { 277 $base = 'index.php?option=com_finder&view=search'; 278 } 279 280 // Get the base URI. 281 $uri = JURI::getInstance($base); 282 283 // Add the static taxonomy filter if present. 284 if (!empty($this->filter)) 285 { 286 $uri->setVar('f', $this->filter); 287 } 288 289 // Get the filters in the request. 290 $input = JFactory::getApplication()->input; 291 $t = $input->request->get('t', array(), 'array'); 292 293 // Add the dynamic taxonomy filters if present. 294 if (!empty($this->filters)) 295 { 296 foreach ($this->filters as $nodes) 297 { 298 foreach ($nodes as $node) 299 { 300 if (!in_array($node, $t)) 301 { 302 continue; 303 } 304 $uri->setVar('t[]', $node); 305 } 306 } 307 } 308 309 // Add the input string if present. 310 if (!empty($this->input)) 311 { 312 $uri->setVar('q', $this->input); 313 } 314 315 // Add the start date if present. 316 if (!empty($this->date1)) 317 { 318 $uri->setVar('d1', $this->date1); 319 } 320 321 // Add the end date if present. 322 if (!empty($this->date2)) 323 { 324 $uri->setVar('d2', $this->date2); 325 } 326 327 // Add the start date modifier if present. 328 if (!empty($this->when1)) 329 { 330 $uri->setVar('w1', $this->when1); 331 } 332 333 // Add the end date modifier if present. 334 if (!empty($this->when2)) 335 { 336 $uri->setVar('w2', $this->when2); 337 } 338 339 // Add a menu item id if one is not present. 340 if (!$uri->getVar('Itemid')) 341 { 342 // Get the menu item id. 343 $query = array( 344 'view' => $uri->getVar('view'), 345 'f' => $uri->getVar('f'), 346 'q' => $uri->getVar('q') 347 ); 348 $item = FinderHelperRoute::getItemid($query); 349 350 // Add the menu item id if present. 351 if ($item !== null) 352 { 353 $uri->setVar('Itemid', $item); 354 } 355 } 356 357 return $uri->toString(array('path', 'query')); 358 } 359 360 /** 361 * Method to get a list of excluded search term ids. 362 * 363 * @return array An array of excluded term ids. 364 * 365 * @since 2.5 366 */ 367 public function getExcludedTermIds() 368 { 369 $results = array(); 370 371 // Iterate through the excluded tokens and compile the matching terms. 372 for ($i = 0, $c = count($this->excluded); $i < $c; $i++) 373 { 374 $results = array_merge($results, $this->excluded[$i]->matches); 375 } 376 377 // Sanitize the terms. 378 //@TODO: Should toInteger use $return? 379 $return = array_unique($results); 380 JArrayHelper::toInteger($results); 381 382 return $results; 383 } 384 385 /** 386 * Method to get a list of included search term ids. 387 * 388 * @return array An array of included term ids. 389 * 390 * @since 2.5 391 */ 392 public function getIncludedTermIds() 393 { 394 $results = array(); 395 396 // Iterate through the included tokens and compile the matching terms. 397 for ($i = 0, $c = count($this->included); $i < $c; $i++) 398 { 399 // Check if we have any terms. 400 if (empty($this->included[$i]->matches)) 401 { 402 continue; 403 } 404 405 // Get the term. 406 $term = $this->included[$i]->term; 407 408 // Prepare the container for the term if necessary. 409 if (!array_key_exists($term, $results)) 410 { 411 $results[$term] = array(); 412 } 413 414 // Add the matches to the stack. 415 $results[$term] = array_merge($results[$term], $this->included[$i]->matches); 416 } 417 418 // Sanitize the terms. 419 foreach ($results as $key => $value) 420 { 421 $results[$key] = array_unique($results[$key]); 422 JArrayHelper::toInteger($results[$key]); 423 } 424 425 return $results; 426 } 427 428 /** 429 * Method to get a list of required search term ids. 430 * 431 * @return array An array of required term ids. 432 * 433 * @since 2.5 434 */ 435 public function getRequiredTermIds() 436 { 437 $results = array(); 438 439 // Iterate through the included tokens and compile the matching terms. 440 for ($i = 0, $c = count($this->included); $i < $c; $i++) 441 { 442 // Check if the token is required. 443 if ($this->included[$i]->required) 444 { 445 // Get the term. 446 $term = $this->included[$i]->term; 447 448 // Prepare the container for the term if necessary. 449 if (!array_key_exists($term, $results)) 450 { 451 $results[$term] = array(); 452 } 453 454 // Add the matches to the stack. 455 $results[$term] = array_merge($results[$term], $this->included[$i]->matches); 456 } 457 } 458 459 // Sanitize the terms. 460 foreach ($results as $key => $value) 461 { 462 $results[$key] = array_unique($results[$key]); 463 JArrayHelper::toInteger($results[$key]); 464 } 465 466 return $results; 467 } 468 469 /** 470 * Method to process the static taxonomy input. The static taxonomy input 471 * comes in the form of a pre-defined search filter that is assigned to the 472 * search form. 473 * 474 * @param integer $filterId The id of static filter. 475 * 476 * @return boolean True on success, false on failure. 477 * 478 * @since 2.5 479 * @throws Exception on database error. 480 */ 481 protected function processStaticTaxonomy($filterId) 482 { 483 // Get the database object. 484 $db = JFactory::getDBO(); 485 486 // Initialize user variables 487 $user = JFactory::getUser(); 488 $groups = implode(',', $user->getAuthorisedViewLevels()); 489 490 // Load the predefined filter. 491 $query = $db->getQuery(true); 492 $query->select('f.' . $db->quoteName('data') . ', f.' . $db->quoteName('params')); 493 $query->from($db->quoteName('#__finder_filters') . ' AS f'); 494 $query->where('f.' . $db->quoteName('filter_id') . ' = ' . (int) $filterId); 495 496 $db->setQuery($query); 497 $return = $db->loadObject(); 498 499 // Check for a database error. 500 if ($db->getErrorNum()) 501 { 502 // Throw database error exception. 503 throw new Exception($db->getErrorMsg(), 500); 504 } 505 506 // Check the returned filter. 507 if (empty($return)) 508 { 509 return false; 510 } 511 512 // Set the filter. 513 $this->filter = (int) $filterId; 514 515 // Get a parameter object for the filter date options. 516 $registry = new JRegistry; 517 $registry->loadString($return->params); 518 $params = $registry; 519 520 // Set the dates if not already set. 521 $this->dates->def('d1', $params->get('d1')); 522 $this->dates->def('d2', $params->get('d2')); 523 $this->dates->def('w1', $params->get('w1')); 524 $this->dates->def('w2', $params->get('w2')); 525 526 // Remove duplicates and sanitize. 527 $filters = explode(',', $return->data); 528 $filters = array_unique($filters); 529 JArrayHelper::toInteger($filters); 530 531 // Remove any values of zero. 532 if (array_search(0, $filters, true) !== false) 533 { 534 unset($filters[array_search(0, $filters, true)]); 535 } 536 537 // Check if we have any real input. 538 if (empty($filters)) 539 { 540 return true; 541 } 542 543 /* 544 * Create the query to get filters from the database. We do this for 545 * two reasons: one, it allows us to ensure that the filters being used 546 * are real; two, we need to sort the filters by taxonomy branch. 547 */ 548 $query->clear(); 549 $query->select('t1.id, t1.title, t2.title AS branch'); 550 $query->from($db->quoteName('#__finder_taxonomy') . ' AS t1'); 551 $query->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id'); 552 $query->where('t1.state = 1'); 553 $query->where('t1.' . $db->quoteName('access') . ' IN (' . $groups . ')'); 554 $query->where('t1.id IN (' . implode(',', $filters) . ')'); 555 $query->where('t2.state = 1'); 556 $query->where('t2.' . $db->quoteName('access') . ' IN (' . $groups . ')'); 557 558 // Load the filters. 559 $db->setQuery($query); 560 $results = $db->loadObjectList(); 561 562 // Check for a database error. 563 if ($db->getErrorNum()) 564 { 565 // Throw database error exception. 566 throw new Exception($db->getErrorMsg(), 500); 567 } 568 569 // Sort the filter ids by branch. 570 foreach ($results as $result) 571 { 572 $this->filters[$result->branch][$result->title] = (int) $result->id; 573 } 574 575 return true; 576 } 577 578 /** 579 * Method to process the dynamic taxonomy input. The dynamic taxonomy input 580 * comes in the form of select fields that the user chooses from. The 581 * dynamic taxonomy input is processed AFTER the static taxonomy input 582 * because the dynamic options can be used to further narrow a static 583 * taxonomy filter. 584 * 585 * @param array $filters An array of taxonomy node ids. 586 * 587 * @return boolean True on success. 588 * 589 * @since 2.5 590 * @throws Exception on database error. 591 */ 592 protected function processDynamicTaxonomy($filters) 593 { 594 // Initialize user variables 595 $user = JFactory::getUser(); 596 $groups = implode(',', $user->getAuthorisedViewLevels()); 597 598 // Remove duplicates and sanitize. 599 $filters = array_unique($filters); 600 JArrayHelper::toInteger($filters); 601 602 // Remove any values of zero. 603 if (array_search(0, $filters, true) !== false) 604 { 605 unset($filters[array_search(0, $filters, true)]); 606 } 607 608 // Check if we have any real input. 609 if (empty($filters)) 610 { 611 return true; 612 } 613 614 // Get the database object. 615 $db = JFactory::getDBO(); 616 $query = $db->getQuery(true); 617 618 /* 619 * Create the query to get filters from the database. We do this for 620 * two reasons: one, it allows us to ensure that the filters being used 621 * are real; two, we need to sort the filters by taxonomy branch. 622 */ 623 $query->select('t1.id, t1.title, t2.title AS branch'); 624 $query->from($db->quoteName('#__finder_taxonomy') . ' AS t1'); 625 $query->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id'); 626 $query->where('t1.state = 1'); 627 $query->where('t1.' . $db->quoteName('access') . ' IN (' . $groups . ')'); 628 $query->where('t1.id IN (' . implode(',', $filters) . ')'); 629 $query->where('t2.state = 1'); 630 $query->where('t2.' . $db->quoteName('access') . ' IN (' . $groups . ')'); 631 632 // Load the filters. 633 $db->setQuery($query); 634 $results = $db->loadObjectList(); 635 636 // Check for a database error. 637 if ($db->getErrorNum()) 638 { 639 // Throw database error exception. 640 throw new Exception($db->getErrorMsg(), 500); 641 } 642 643 // Cleared filter branches. 644 $cleared = array(); 645 646 /* 647 * Sort the filter ids by branch. Because these filters are designed to 648 * override and further narrow the items selected in the static filter, 649 * we will clear the values from the static filter on a branch by 650 * branch basis before adding the dynamic filters. So, if the static 651 * filter defines a type filter of "articles" and three "category" 652 * filters but the user only limits the category further, the category 653 * filters will be flushed but the type filters will not. 654 */ 655 foreach ($results as $result) 656 { 657 // Check if the branch has been cleared. 658 if (!in_array($result->branch, $cleared)) 659 { 660 // Clear the branch. 661 $this->filters[$result->branch] = array(); 662 663 // Add the branch to the cleared list. 664 $cleared[] = $result->branch; 665 } 666 667 // Add the filter to the list. 668 $this->filters[$result->branch][$result->title] = (int) $result->id; 669 } 670 671 return true; 672 } 673 674 /** 675 * Method to process the query date filters to determine start and end 676 * date limitations. 677 * 678 * @param string $date1 The first date filter. 679 * @param string $date2 The second date filter. 680 * @param string $when1 The first date modifier. 681 * @param string $when2 The second date modifier. 682 * 683 * @return boolean True on success. 684 * 685 * @since 2.5 686 */ 687 protected function processDates($date1, $date2, $when1, $when2) 688 { 689 // Clean up the inputs. 690 $date1 = JString::trim(JString::strtolower($date1)); 691 $date2 = JString::trim(JString::strtolower($date2)); 692 $when1 = JString::trim(JString::strtolower($when1)); 693 $when2 = JString::trim(JString::strtolower($when2)); 694 695 // Get the time offset. 696 $offset = JFactory::getApplication()->getCfg('offset'); 697 698 // Array of allowed when values. 699 $whens = array('before', 'after', 'exact'); 700 701 // The value of 'today' is a special case that we need to handle. 702 if ($date1 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY'))) 703 { 704 $today = JFactory::getDate('now', $offset); 705 $date1 = $today->format('%Y-%m-%d'); 706 } 707 708 // Try to parse the date string. 709 $date = JFactory::getDate($date1, $offset); 710 711 // Check if the date was parsed successfully. 712 if ($date->toUnix() !== null) 713 { 714 // Set the date filter. 715 $this->date1 = $date->toSQL(); 716 $this->when1 = in_array($when1, $whens) ? $when1 : 'before'; 717 } 718 719 // The value of 'today' is a special case that we need to handle. 720 if ($date2 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY'))) 721 { 722 $today = JFactory::getDate('now', $offset); 723 $date2 = $today->format('%Y-%m-%d'); 724 } 725 726 // Try to parse the date string. 727 $date = JFactory::getDate($date2, $offset); 728 729 // Check if the date was parsed successfully. 730 if ($date->toUnix() !== null) 731 { 732 // Set the date filter. 733 $this->date2 = $date->toSQL(); 734 $this->when2 = in_array($when2, $whens) ? $when2 : 'before'; 735 } 736 737 return true; 738 } 739 740 /** 741 * Method to process the query input string and extract required, optional, 742 * and excluded tokens; taxonomy filters; and date filters. 743 * 744 * @param string $input The query input string. 745 * @param string $lang The query input language. 746 * @param string $mode The query matching mode. 747 * 748 * @return boolean True on success. 749 * 750 * @since 2.5 751 * @throws Exception on database error. 752 */ 753 protected function processString($input, $lang, $mode) 754 { 755 // Clean up the input string. 756 $input = html_entity_decode($input, ENT_QUOTES, 'UTF-8'); 757 $input = JString::strtolower($input); 758 $input = preg_replace('#\s+#mi', ' ', $input); 759 $input = JString::trim($input); 760 $debug = JFactory::getConfig()->get('debug_lang'); 761 762 /* 763 * First, we need to handle string based modifiers. String based 764 * modifiers could potentially include things like "category:blah" or 765 * "before:2009-10-21" or "type:article", etc. 766 */ 767 $patterns = array( 768 'before' => JText::_('COM_FINDER_FILTER_WHEN_BEFORE'), 769 'after' => JText::_('COM_FINDER_FILTER_WHEN_AFTER') 770 ); 771 772 // Add the taxonomy branch titles to the possible patterns. 773 foreach (FinderIndexerTaxonomy::getBranchTitles() as $branch) 774 { 775 // Add the pattern. 776 $patterns[$branch] = JString::strtolower(JText::_(FinderHelperLanguage::branchSingular($branch))); 777 } 778 779 // Container for search terms and phrases. 780 $terms = array(); 781 $phrases = array(); 782 783 // Cleared filter branches. 784 $cleared = array(); 785 786 /* 787 * Compile the suffix pattern. This is used to match the values of the 788 * filter input string. Single words can be input directly, multi-word 789 * values have to be wrapped in double quotes. 790 */ 791 $quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8'); 792 $suffix = '(([\w\d' . $quotes . '-]+)|\"([\w\d\s' . $quotes . '-]+)\")'; 793 794 /* 795 * Iterate through the possible filter patterns and search for matches. 796 * We need to match the key, colon, and a value pattern for the match 797 * to be valid. 798 */ 799 foreach ($patterns as $modifier => $pattern) 800 { 801 $matches = array(); 802 803 if ($debug) 804 { 805 $pattern = substr($pattern, 2, -2); 806 } 807 808 // Check if the filter pattern is in the input string. 809 if (preg_match('#' . $pattern . '\s*:\s*' . $suffix . '#mi', $input, $matches)) 810 { 811 // Get the value given to the modifier. 812 $value = isset($matches[3]) ? $matches[3] : $matches[1]; 813 814 // Now we have to handle the filter string. 815 switch ($modifier) 816 { 817 // Handle a before and after date filters. 818 case 'before': 819 case 'after': 820 { 821 // Get the time offset. 822 $offset = JFactory::getApplication()->getCfg('offset'); 823 824 // Array of allowed when values. 825 $whens = array('before', 'after', 'exact'); 826 827 // The value of 'today' is a special case that we need to handle. 828 if ($value === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY'))) 829 { 830 $today = JFactory::getDate('now', $offset); 831 $value = $today->format('%Y-%m-%d'); 832 } 833 834 // Try to parse the date string. 835 $date = JFactory::getDate($value, $offset); 836 837 // Check if the date was parsed successfully. 838 if ($date->toUnix() !== null) 839 { 840 // Set the date filter. 841 $this->date1 = $date->toSQL(); 842 $this->when1 = in_array($modifier, $whens) ? $modifier : 'before'; 843 } 844 845 break; 846 } 847 848 // Handle a taxonomy branch filter. 849 default: 850 { 851 // Try to find the node id. 852 $return = FinderIndexerTaxonomy::getNodeByTitle($modifier, $value); 853 854 // Check if the node id was found. 855 if ($return) 856 { 857 // Check if the branch has been cleared. 858 if (!in_array($modifier, $cleared)) 859 { 860 // Clear the branch. 861 $this->filters[$modifier] = array(); 862 863 // Add the branch to the cleared list. 864 $cleared[] = $modifier; 865 } 866 867 // Add the filter to the list. 868 $this->filters[$modifier][$return->title] = (int) $return->id; 869 } 870 871 break; 872 } 873 } 874 875 // Clean up the input string again. 876 $input = str_replace($matches[0], '', $input); 877 $input = preg_replace('#\s+#mi', ' ', $input); 878 $input = JString::trim($input); 879 } 880 } 881 882 /* 883 * Extract the tokens enclosed in double quotes so that we can handle 884 * them as phrases. 885 */ 886 if (JString::strpos($input, '"') !== false) 887 { 888 $matches = array(); 889 890 // Extract the tokens enclosed in double quotes. 891 if (preg_match_all('#\"([^"]+)\"#mi', $input, $matches)) 892 { 893 /* 894 * One or more phrases were found so we need to iterate through 895 * them, tokenize them as phrases, and remove them from the raw 896 * input string before we move on to the next processing step. 897 */ 898 foreach ($matches[1] as $key => $match) 899 { 900 // Find the complete phrase in the input string. 901 $pos = JString::strpos($input, $matches[0][$key]); 902 $len = JString::strlen($matches[0][$key]); 903 904 // Add any terms that are before this phrase to the stack. 905 if (JString::trim(JString::substr($input, 0, $pos))) 906 { 907 $terms = array_merge($terms, explode(' ', JString::trim(JString::substr($input, 0, $pos)))); 908 } 909 910 // Strip out everything up to and including the phrase. 911 $input = JString::substr($input, $pos + $len); 912 913 // Clean up the input string again. 914 $input = preg_replace('#\s+#mi', ' ', $input); 915 $input = JString::trim($input); 916 917 // Get the number of words in the phrase. 918 $parts = explode(' ', $match); 919 920 // Check if the phrase is longer than three words. 921 if (count($parts) > 3) 922 { 923 /* 924 * If the phrase is longer than three words, we need to 925 * break it down into smaller chunks of phrases that 926 * are less than or equal to three words. We overlap 927 * the chunks so that we can ensure that a match is 928 * found for the complete phrase and not just portions 929 * of it. 930 */ 931 for ($i = 0, $c = count($parts); $i < $c; $i += 2) 932 { 933 // Set up the chunk. 934 $chunk = array(); 935 936 // The chunk has to be assembled based on how many 937 // pieces are available to use. 938 switch ($c - $i) 939 { 940 // If only one word is left, we can break from 941 // the switch and loop because the last word 942 // was already used at the end of the last 943 // chunk. 944 case 1: 945 break 2; 946 947 // If there words are left, we use them both as 948 // the last chunk of the phrase and we're done. 949 case 2: 950 $chunk[] = $parts[$i]; 951 $chunk[] = $parts[$i + 1]; 952 break; 953 954 // If there are three or more words left, we 955 // build a three word chunk and continue on. 956 default: 957 $chunk[] = $parts[$i]; 958 $chunk[] = $parts[$i + 1]; 959 $chunk[] = $parts[$i + 2]; 960 break; 961 } 962 963 // If the chunk is not empty, add it as a phrase. 964 if (count($chunk)) 965 { 966 $phrases[] = implode(' ', $chunk); 967 $terms[] = implode(' ', $chunk); 968 } 969 } 970 } 971 else 972 { 973 // The phrase is <= 3 words so we can use it as is. 974 $phrases[] = $match; 975 $terms[] = $match; 976 } 977 } 978 } 979 } 980 981 // Add the remaining terms if present. 982 if (!empty($input)) 983 { 984 $terms = array_merge($terms, explode(' ', $input)); 985 } 986 987 // An array of our boolean operators. $operator => $translation 988 $operators = array( 989 'AND' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_AND')), 990 'OR' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_OR')), 991 'NOT' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_NOT')) 992 ); 993 // If language debugging is enabled you need to ignore the debug strings in matching. 994 if (JDEBUG) 995 { 996 $debugStrings = array('**', '??'); 997 $operators = str_replace($debugStrings, '', $operators); 998 } 999 /* 1000 * Iterate through the terms and perform any sorting that needs to be 1001 * done based on boolean search operators. Terms that are before an 1002 * and/or/not modifier have to be handled in relation to their operator. 1003 */ 1004 for ($i = 0, $c = count($terms); $i < $c; $i++) 1005 { 1006 1007 // Check if the term is followed by an operator that we understand. 1008 if (isset($terms[$i + 1]) && in_array($terms[$i + 1], $operators)) 1009 { 1010 // Get the operator mode. 1011 $op = array_search($terms[$i + 1], $operators); 1012 1013 // Handle the AND operator. 1014 if ($op === 'AND' && isset($terms[$i + 2])) 1015 { 1016 // Tokenize the current term. 1017 $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true); 1018 $token = $this->getTokenData($token); 1019 1020 // Set the required flag. 1021 $token->required = true; 1022 1023 // Add the current token to the stack. 1024 $this->included[] = $token; 1025 $this->highlight = array_merge($this->highlight, array_keys($token->matches)); 1026 1027 // Skip the next token (the mode operator). 1028 $this->operators[] = $terms[$i + 1]; 1029 1030 // Tokenize the term after the next term (current plus two). 1031 $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true); 1032 $other = $this->getTokenData($other); 1033 1034 // Set the required flag. 1035 $other->required = true; 1036 1037 // Add the token after the next token to the stack. 1038 $this->included[] = $other; 1039 $this->highlight = array_merge($this->highlight, array_keys($other->matches)); 1040 1041 // Remove the processed phrases if possible. 1042 if (($pk = array_search($terms[$i], $phrases)) !== false) 1043 { 1044 unset($phrases[$pk]); 1045 } 1046 if (($pk = array_search($terms[$i + 2], $phrases)) !== false) 1047 { 1048 unset($phrases[$pk]); 1049 } 1050 1051 // Remove the processed terms. 1052 unset($terms[$i]); 1053 unset($terms[$i + 1]); 1054 unset($terms[$i + 2]); 1055 1056 // Adjust the loop. 1057 $i += 2; 1058 continue; 1059 } 1060 // Handle the OR operator. 1061 elseif ($op === 'OR' && isset($terms[$i + 2])) 1062 { 1063 // Tokenize the current term. 1064 $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true); 1065 $token = $this->getTokenData($token); 1066 1067 // Set the required flag. 1068 $token->required = false; 1069 1070 // Add the current token to the stack. 1071 if (count($token->matches)) 1072 { 1073 $this->included[] = $token; 1074 $this->highlight = array_merge($this->highlight, array_keys($token->matches)); 1075 } 1076 else 1077 { 1078 $this->ignored[] = $token; 1079 } 1080 1081 // Skip the next token (the mode operator). 1082 $this->operators[] = $terms[$i + 1]; 1083 1084 // Tokenize the term after the next term (current plus two). 1085 $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true); 1086 $other = $this->getTokenData($other); 1087 1088 // Set the required flag. 1089 $other->required = false; 1090 1091 // Add the token after the next token to the stack. 1092 if (count($other->matches)) 1093 { 1094 $this->included[] = $other; 1095 $this->highlight = array_merge($this->highlight, array_keys($other->matches)); 1096 } 1097 else 1098 { 1099 $this->ignored[] = $other; 1100 } 1101 1102 // Remove the processed phrases if possible. 1103 if (($pk = array_search($terms[$i], $phrases)) !== false) 1104 { 1105 unset($phrases[$pk]); 1106 } 1107 if (($pk = array_search($terms[$i + 2], $phrases)) !== false) 1108 { 1109 unset($phrases[$pk]); 1110 } 1111 1112 // Remove the processed terms. 1113 unset($terms[$i]); 1114 unset($terms[$i + 1]); 1115 unset($terms[$i + 2]); 1116 1117 // Adjust the loop. 1118 $i += 2; 1119 continue; 1120 } 1121 } 1122 // Handle an orphaned OR operator. 1123 elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'OR') 1124 { 1125 // Skip the next token (the mode operator). 1126 $this->operators[] = $terms[$i]; 1127 1128 // Tokenize the next term (current plus one). 1129 $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true); 1130 $other = $this->getTokenData($other); 1131 1132 // Set the required flag. 1133 $other->required = false; 1134 1135 // Add the token after the next token to the stack. 1136 if (count($other->matches)) 1137 { 1138 $this->included[] = $other; 1139 $this->highlight = array_merge($this->highlight, array_keys($other->matches)); 1140 } 1141 else 1142 { 1143 $this->ignored[] = $other; 1144 } 1145 1146 // Remove the processed phrase if possible. 1147 if (($pk = array_search($terms[$i + 1], $phrases)) !== false) 1148 { 1149 unset($phrases[$pk]); 1150 } 1151 1152 // Remove the processed terms. 1153 unset($terms[$i]); 1154 unset($terms[$i + 1]); 1155 1156 // Adjust the loop. 1157 $i += 1; 1158 continue; 1159 } 1160 // Handle the NOT operator. 1161 elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'NOT') 1162 { 1163 // Skip the next token (the mode operator). 1164 $this->operators[] = $terms[$i]; 1165 1166 // Tokenize the next term (current plus one). 1167 $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true); 1168 $other = $this->getTokenData($other); 1169 1170 // Set the required flag. 1171 $other->required = false; 1172 1173 // Add the next token to the stack. 1174 if (count($other->matches)) 1175 { 1176 $this->excluded[] = $other; 1177 } 1178 else 1179 { 1180 $this->ignored[] = $other; 1181 } 1182 1183 // Remove the processed phrase if possible. 1184 if (($pk = array_search($terms[$i + 1], $phrases)) !== false) 1185 { 1186 unset($phrases[$pk]); 1187 } 1188 1189 // Remove the processed terms. 1190 unset($terms[$i]); 1191 unset($terms[$i + 1]); 1192 1193 // Adjust the loop. 1194 $i += 1; 1195 continue; 1196 } 1197 } 1198 1199 /* 1200 * Iterate through any search phrases and tokenize them. We handle 1201 * phrases as autonomous units and do not break them down into two and 1202 * three word combinations. 1203 */ 1204 for ($i = 0, $c = count($phrases); $i < $c; $i++) 1205 { 1206 // Tokenize the phrase. 1207 $token = FinderIndexerHelper::tokenize($phrases[$i], $lang, true); 1208 $token = $this->getTokenData($token); 1209 1210 // Set the required flag. 1211 $token->required = true; 1212 1213 // Add the current token to the stack. 1214 $this->included[] = $token; 1215 $this->highlight = array_merge($this->highlight, array_keys($token->matches)); 1216 1217 // Remove the processed term if possible. 1218 if (($pk = array_search($phrases[$i], $terms)) !== false) 1219 { 1220 unset($terms[$pk]); 1221 } 1222 1223 // Remove the processed phrase. 1224 unset($phrases[$i]); 1225 } 1226 1227 /* 1228 * Handle any remaining tokens using the standard processing mechanism. 1229 */ 1230 if (!empty($terms)) 1231 { 1232 // Tokenize the terms. 1233 $terms = implode(' ', $terms); 1234 $tokens = FinderIndexerHelper::tokenize($terms, $lang, false); 1235 1236 // Make sure we are working with an array. 1237 $tokens = is_array($tokens) ? $tokens : array($tokens); 1238 1239 // Get the token data and required state for all the tokens. 1240 foreach ($tokens as $token) 1241 { 1242 // Get the token data. 1243 $token = $this->getTokenData($token); 1244 1245 // Set the required flag for the token. 1246 $token->required = $mode === 'AND' ? ($token->phrase ? false : true) : false; 1247 1248 // Add the token to the appropriate stack. 1249 if (count($token->matches) || $token->required) 1250 { 1251 $this->included[] = $token; 1252 $this->highlight = array_merge($this->highlight, array_keys($token->matches)); 1253 } 1254 else 1255 { 1256 $this->ignored[] = $token; 1257 } 1258 } 1259 } 1260 1261 return true; 1262 } 1263 1264 /** 1265 * Method to get the base and similar term ids and, if necessary, suggested 1266 * term data from the database. The terms ids are identified based on a 1267 * 'like' match in MySQL and/or a common stem. If no term ids could be 1268 * found, then we know that we will not be able to return any results for 1269 * that term and we should try to find a similar term to use that we can 1270 * match so that we can suggest the alternative search query to the user. 1271 * 1272 * @param FinderIndexerToken $token A FinderIndexerToken object. 1273 * 1274 * @return FinderIndexerToken A FinderIndexerToken object. 1275 * 1276 * @since 2.5 1277 * @throws Exception on database error. 1278 */ 1279 protected function getTokenData($token) 1280 { 1281 // Get the database object. 1282 $db = JFactory::getDBO(); 1283 1284 // Create a database query to build match the token. 1285 $query = $db->getQuery(true); 1286 $query->select('t.term, t.term_id'); 1287 $query->from('#__finder_terms AS t'); 1288 1289 /* 1290 * If the token is a phrase, the lookup process is fairly simple. If 1291 * the token is a word, it is a little more complicated. We have to 1292 * create two queries to lookup the term and the stem respectively, 1293 * then union the result sets together. This is MUCH faster than using 1294 * an or condition in the database query. 1295 */ 1296 if ($token->phrase) 1297 { 1298 // Add the phrase to the query. 1299 $query->where('t.term = ' . $db->quote($token->term)); 1300 $query->where('t.phrase = 1'); 1301 } 1302 else 1303 { 1304 // Add the term to the query. 1305 // $query->where('t.term = ' . $db->quote($token->term)); 1306 // $query->where('t.phrase = 0'); 1307 // 1308 // // Clone the query, replace the WHERE clause. 1309 // $sub = clone($query); 1310 // $sub->clear('where'); 1311 // $sub->where('t.stem = '.$db->quote($token->stem)); 1312 // $sub->where('t.phrase = 0'); 1313 // 1314 // // Union the two queries. 1315 // $query->union($sub); 1316 1317 $query->where('(t.term = ' . $db->quote($token->term) . ' OR t.stem = ' . $db->quote($token->stem) . ')'); 1318 $query->where('t.phrase = 0'); 1319 } 1320 1321 // Get the terms. 1322 $db->setQuery($query); 1323 $matches = $db->loadObjectList(); 1324 1325 // Check for a database error. 1326 if ($db->getErrorNum()) 1327 { 1328 // Throw database error exception. 1329 throw new Exception($db->getErrorMsg(), 500); 1330 } 1331 1332 // Setup the container. 1333 $token->matches = array(); 1334 1335 // Check the matching terms. 1336 if (!empty($matches)) 1337 { 1338 // Add the matches to the token. 1339 for ($i = 0, $c = count($matches); $i < $c; $i++) 1340 { 1341 $token->matches[$matches[$i]->term] = (int) $matches[$i]->term_id; 1342 } 1343 } 1344 1345 // If no matches were found, try to find a similar but better token. 1346 if (empty($token->matches)) 1347 { 1348 // Create a database query to get the similar terms. 1349 //@TODO: PostgreSQL doesn't support SOUNDEX out of the box 1350 $query->clear(); 1351 $query->select('DISTINCT t.term_id AS id, t.term AS term'); 1352 $query->from('#__finder_terms AS t'); 1353 //$query->where('t.soundex = ' . soundex($db->quote($token->term))); 1354 $query->where('t.soundex = SOUNDEX(' . $db->quote($token->term) . ')'); 1355 $query->where('t.phrase = ' . (int) $token->phrase); 1356 1357 // Get the terms. 1358 $db->setQuery($query); 1359 $results = $db->loadObjectList(); 1360 1361 // Check for a database error. 1362 if ($db->getErrorNum()) 1363 { 1364 // Throw database error exception. 1365 throw new Exception($db->getErrorMsg(), 500); 1366 } 1367 1368 // Check if any similar terms were found. 1369 if (empty($results)) 1370 { 1371 return $token; 1372 } 1373 1374 // Stack for sorting the similar terms. 1375 $suggestions = array(); 1376 1377 // Get the levnshtein distance for all suggested terms. 1378 foreach ($results as $sk => $st) 1379 { 1380 // Get the levenshtein distance between terms. 1381 $distance = levenshtein($st->term, $token->term); 1382 1383 // Make sure the levenshtein distance isn't over 50. 1384 if ($distance < 50) 1385 { 1386 $suggestions[$sk] = $distance; 1387 } 1388 } 1389 1390 // Sort the suggestions. 1391 asort($suggestions, SORT_NUMERIC); 1392 1393 // Get the closest match. 1394 $keys = array_keys($suggestions); 1395 $key = $keys[0]; 1396 1397 // Add the suggested term. 1398 $token->suggestion = $results[$key]->term; 1399 } 1400 1401 return $token; 1402 } 1403 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Tue Apr 3 11:40:28 2012 | Cross-referenced by PHPXref 0.7.1 |