[ Index ]

PHP Cross Reference of Joomla 2.5.4 DE

title

Body

[close]

/administrator/components/com_finder/helpers/indexer/ -> query.php (source)

   1  <?php
   2  /**
   3   * @package     Joomla.Administrator
   4   * @subpackage  com_finder
   5   *
   6   * @copyright   Copyright (C) 2005 - 2012 Open Source Matters, Inc. All rights reserved.
   7   * @license     GNU General Public License version 2 or later; see LICENSE
   8   */
   9  
  10  defined('_JEXEC') or die;
  11  
  12  // Register dependent classes.
  13  JLoader::register('FinderIndexerHelper', dirname(__FILE__) . '/helper.php');
  14  JLoader::register('FinderIndexerTaxonomy', dirname(__FILE__) . '/taxonomy.php');
  15  JLoader::register('FinderHelperRoute', JPATH_SITE . '/components/com_finder/helpers/route.php');
  16  JLoader::register('FinderHelperLanguage', JPATH_ADMINISTRATOR . '/components/com_finder/helpers/language.php');
  17  
  18  /**
  19   * Query class for the Finder indexer package.
  20   *
  21   * @package     Joomla.Administrator
  22   * @subpackage  com_finder
  23   * @since       2.5
  24   */
  25  class FinderIndexerQuery
  26  {
  27      /**
  28       * Flag to show whether the query can return results.
  29       *
  30       * @var    boolean
  31       * @since  2.5
  32       */
  33      public $search;
  34  
  35      /**
  36       * The query input string.
  37       *
  38       * @var    string
  39       * @since  2.5
  40       */
  41      public $input;
  42  
  43      /**
  44       * The language of the query.
  45       *
  46       * @var    string
  47       * @since  2.5
  48       */
  49      public $language;
  50  
  51      /**
  52       * The query string matching mode.
  53       *
  54       * @var    string
  55       * @since  2.5
  56       */
  57      public $mode;
  58  
  59      /**
  60       * The included tokens.
  61       *
  62       * @var    array
  63       * @since  2.5
  64       */
  65      public $included = array();
  66  
  67      /**
  68       * The excluded tokens.
  69       *
  70       * @var    array
  71       * @since  2.5
  72       */
  73      public $excluded = array();
  74  
  75      /**
  76       * The tokens to ignore because no matches exist.
  77       *
  78       * @var    array
  79       * @since  2.5
  80       */
  81      public $ignored = array();
  82  
  83      /**
  84       * The operators used in the query input string.
  85       *
  86       * @var    array
  87       * @since  2.5
  88       */
  89      public $operators = array();
  90  
  91      /**
  92       * The terms to highlight as matches.
  93       *
  94       * @var    array
  95       * @since  2.5
  96       */
  97      public $highlight = array();
  98  
  99      /**
 100       * The number of matching terms for the query input.
 101       *
 102       * @var    integer
 103       * @since  2.5
 104       */
 105      public $terms;
 106  
 107      /**
 108       * The static filter id.
 109       *
 110       * @var    string
 111       * @since  2.5
 112       */
 113      public $filter;
 114  
 115      /**
 116       * The taxonomy filters. This is a multi-dimensional array of taxonomy
 117       * branches as the first level and then the taxonomy nodes as the values.
 118       *
 119       * For example:
 120       * $filters = array(
 121       *     'Type' = array(10, 32, 29, 11, ...);
 122       *     'Label' = array(20, 314, 349, 91, 82, ...);
 123       *         ...
 124       * );
 125       *
 126       * @var    array
 127       * @since  2.5
 128       */
 129      public $filters = array();
 130  
 131      /**
 132       * The start date filter.
 133       *
 134       * @var    string
 135       * @since  2.5
 136       */
 137      public $date1;
 138  
 139      /**
 140       * The end date filter.
 141       *
 142       * @var    string
 143       * @since  2.5
 144       */
 145      public $date2;
 146  
 147      /**
 148       * The start date filter modifier.
 149       *
 150       * @var    string
 151       * @since  2.5
 152       */
 153      public $when1;
 154  
 155      /**
 156       * The end date filter modifier.
 157       *
 158       * @var    string
 159       * @since  2.5
 160       */
 161      public $when2;
 162  
 163      /**
 164       * Method to instantiate the query object.
 165       *
 166       * @param   array  $options  An array of query options.
 167       *
 168       * @since   2.5
 169       * @throws  Exception on database error.
 170       */
 171  	public function __construct($options)
 172      {
 173          // Get the input string.
 174          $this->input = isset($options['input']) ? $options['input'] : null;
 175  
 176          // Get the empty query setting.
 177          $this->empty = isset($options['empty']) ? (bool) $options['empty'] : false;
 178  
 179          // Get the input language.
 180          $this->language = !empty($options['language']) ? $options['language'] : FinderIndexerHelper::getDefaultLanguage();
 181          $this->language = FinderIndexerHelper::getPrimaryLanguage($this->language);
 182  
 183          // Get the matching mode.
 184          $this->mode = 'AND';
 185  
 186          // Initialize the temporary date storage.
 187          $this->dates = new JRegistry;
 188  
 189          // Populate the temporary date storage.
 190          if (isset($options['date1']) && !empty($options['date1']))
 191          {
 192              $this->dates->set('date1', $options['date1']);
 193          }
 194          if (isset($options['date2']) && !empty($options['date1']))
 195          {
 196              $this->dates->set('date2', $options['date2']);
 197          }
 198          if (isset($options['when1']) && !empty($options['date1']))
 199          {
 200              $this->dates->set('when1', $options['when1']);
 201          }
 202          if (isset($options['when2']) && !empty($options['date1']))
 203          {
 204              $this->dates->set('when2', $options['when2']);
 205          }
 206  
 207          // Process the static taxonomy filters.
 208          if (isset($options['filter']) && !empty($options['filter']))
 209          {
 210              $this->processStaticTaxonomy($options['filter']);
 211          }
 212  
 213          // Process the dynamic taxonomy filters.
 214          if (isset($options['filters']) && !empty($options['filters']))
 215          {
 216              $this->processDynamicTaxonomy($options['filters']);
 217          }
 218  
 219          // Get the date filters.
 220          $d1 = $this->dates->get('date1');
 221          $d2 = $this->dates->get('date2');
 222          $w1 = $this->dates->get('when1');
 223          $w2 = $this->dates->get('when2');
 224  
 225          // Process the date filters.
 226          if (!empty($d1) || !empty($d2))
 227          {
 228              $this->processDates($d1, $d2, $w1, $w2);
 229          }
 230  
 231          // Process the input string.
 232          $this->processString($this->input, $this->language, $this->mode);
 233  
 234          // Get the number of matching terms.
 235          foreach ($this->included as $token)
 236          {
 237              $this->terms += count($token->matches);
 238          }
 239  
 240          // Remove the temporary date storage.
 241          unset($this->dates);
 242  
 243          /*
 244           * Lastly, determine whether this query can return a result set.
 245           */
 246          // Check if we have a query string.
 247          if (!empty($this->input))
 248          {
 249              $this->search = true;
 250          }
 251          // Check if we can search without a query string.
 252          elseif ($this->empty && (!empty($this->filter) || !empty($this->filters) || !empty($this->date1) || !empty($this->date2)))
 253          {
 254              $this->search = true;
 255          }
 256          // We do not have a valid search query.
 257          else
 258          {
 259              $this->search = false;
 260          }
 261      }
 262  
 263      /**
 264       * Method to convert the query object into a URI string.
 265       *
 266       * @param   string  $base  The base URI. [optional]
 267       *
 268       * @return  string  The complete query URI.
 269       *
 270       * @since   2.5
 271       */
 272  	public function toURI($base = null)
 273      {
 274          // Set the base if not specified.
 275          if (empty($base))
 276          {
 277              $base = 'index.php?option=com_finder&view=search';
 278          }
 279  
 280          // Get the base URI.
 281          $uri = JURI::getInstance($base);
 282  
 283          // Add the static taxonomy filter if present.
 284          if (!empty($this->filter))
 285          {
 286              $uri->setVar('f', $this->filter);
 287          }
 288  
 289          // Get the filters in the request.
 290          $input = JFactory::getApplication()->input;
 291          $t = $input->request->get('t', array(), 'array');
 292  
 293          // Add the dynamic taxonomy filters if present.
 294          if (!empty($this->filters))
 295          {
 296              foreach ($this->filters as $nodes)
 297              {
 298                  foreach ($nodes as $node)
 299                  {
 300                      if (!in_array($node, $t))
 301                      {
 302                          continue;
 303                      }
 304                      $uri->setVar('t[]', $node);
 305                  }
 306              }
 307          }
 308  
 309          // Add the input string if present.
 310          if (!empty($this->input))
 311          {
 312              $uri->setVar('q', $this->input);
 313          }
 314  
 315          // Add the start date if present.
 316          if (!empty($this->date1))
 317          {
 318              $uri->setVar('d1', $this->date1);
 319          }
 320  
 321          // Add the end date if present.
 322          if (!empty($this->date2))
 323          {
 324              $uri->setVar('d2', $this->date2);
 325          }
 326  
 327          // Add the start date modifier if present.
 328          if (!empty($this->when1))
 329          {
 330              $uri->setVar('w1', $this->when1);
 331          }
 332  
 333          // Add the end date modifier if present.
 334          if (!empty($this->when2))
 335          {
 336              $uri->setVar('w2', $this->when2);
 337          }
 338  
 339          // Add a menu item id if one is not present.
 340          if (!$uri->getVar('Itemid'))
 341          {
 342              // Get the menu item id.
 343              $query = array(
 344                  'view' => $uri->getVar('view'),
 345                  'f' => $uri->getVar('f'),
 346                  'q' => $uri->getVar('q')
 347              );
 348              $item = FinderHelperRoute::getItemid($query);
 349  
 350              // Add the menu item id if present.
 351              if ($item !== null)
 352              {
 353                  $uri->setVar('Itemid', $item);
 354              }
 355          }
 356  
 357          return $uri->toString(array('path', 'query'));
 358      }
 359  
 360      /**
 361       * Method to get a list of excluded search term ids.
 362       *
 363       * @return  array  An array of excluded term ids.
 364       *
 365       * @since   2.5
 366       */
 367  	public function getExcludedTermIds()
 368      {
 369          $results = array();
 370  
 371          // Iterate through the excluded tokens and compile the matching terms.
 372          for ($i = 0, $c = count($this->excluded); $i < $c; $i++)
 373          {
 374              $results = array_merge($results, $this->excluded[$i]->matches);
 375          }
 376  
 377          // Sanitize the terms.
 378          //@TODO: Should toInteger use $return?
 379          $return = array_unique($results);
 380          JArrayHelper::toInteger($results);
 381  
 382          return $results;
 383      }
 384  
 385      /**
 386       * Method to get a list of included search term ids.
 387       *
 388       * @return  array  An array of included term ids.
 389       *
 390       * @since   2.5
 391       */
 392  	public function getIncludedTermIds()
 393      {
 394          $results = array();
 395  
 396          // Iterate through the included tokens and compile the matching terms.
 397          for ($i = 0, $c = count($this->included); $i < $c; $i++)
 398          {
 399              // Check if we have any terms.
 400              if (empty($this->included[$i]->matches))
 401              {
 402                  continue;
 403              }
 404  
 405              // Get the term.
 406              $term = $this->included[$i]->term;
 407  
 408              // Prepare the container for the term if necessary.
 409              if (!array_key_exists($term, $results))
 410              {
 411                  $results[$term] = array();
 412              }
 413  
 414              // Add the matches to the stack.
 415              $results[$term] = array_merge($results[$term], $this->included[$i]->matches);
 416          }
 417  
 418          // Sanitize the terms.
 419          foreach ($results as $key => $value)
 420          {
 421              $results[$key] = array_unique($results[$key]);
 422              JArrayHelper::toInteger($results[$key]);
 423          }
 424  
 425          return $results;
 426      }
 427  
 428      /**
 429       * Method to get a list of required search term ids.
 430       *
 431       * @return  array  An array of required term ids.
 432       *
 433       * @since   2.5
 434       */
 435  	public function getRequiredTermIds()
 436      {
 437          $results = array();
 438  
 439          // Iterate through the included tokens and compile the matching terms.
 440          for ($i = 0, $c = count($this->included); $i < $c; $i++)
 441          {
 442              // Check if the token is required.
 443              if ($this->included[$i]->required)
 444              {
 445                  // Get the term.
 446                  $term = $this->included[$i]->term;
 447  
 448                  // Prepare the container for the term if necessary.
 449                  if (!array_key_exists($term, $results))
 450                  {
 451                      $results[$term] = array();
 452                  }
 453  
 454                  // Add the matches to the stack.
 455                  $results[$term] = array_merge($results[$term], $this->included[$i]->matches);
 456              }
 457          }
 458  
 459          // Sanitize the terms.
 460          foreach ($results as $key => $value)
 461          {
 462              $results[$key] = array_unique($results[$key]);
 463              JArrayHelper::toInteger($results[$key]);
 464          }
 465  
 466          return $results;
 467      }
 468  
 469      /**
 470       * Method to process the static taxonomy input. The static taxonomy input
 471       * comes in the form of a pre-defined search filter that is assigned to the
 472       * search form.
 473       *
 474       * @param   integer  $filterId  The id of static filter.
 475       *
 476       * @return  boolean  True on success, false on failure.
 477       *
 478       * @since   2.5
 479       * @throws  Exception on database error.
 480       */
 481  	protected function processStaticTaxonomy($filterId)
 482      {
 483          // Get the database object.
 484          $db = JFactory::getDBO();
 485  
 486          // Initialize user variables
 487          $user = JFactory::getUser();
 488          $groups = implode(',', $user->getAuthorisedViewLevels());
 489  
 490          // Load the predefined filter.
 491          $query = $db->getQuery(true);
 492          $query->select('f.' . $db->quoteName('data') . ', f.' . $db->quoteName('params'));
 493          $query->from($db->quoteName('#__finder_filters') . ' AS f');
 494          $query->where('f.' . $db->quoteName('filter_id') . ' = ' . (int) $filterId);
 495  
 496          $db->setQuery($query);
 497          $return = $db->loadObject();
 498  
 499          // Check for a database error.
 500          if ($db->getErrorNum())
 501          {
 502              // Throw database error exception.
 503              throw new Exception($db->getErrorMsg(), 500);
 504          }
 505  
 506          // Check the returned filter.
 507          if (empty($return))
 508          {
 509              return false;
 510          }
 511  
 512          // Set the filter.
 513          $this->filter = (int) $filterId;
 514  
 515          // Get a parameter object for the filter date options.
 516          $registry = new JRegistry;
 517          $registry->loadString($return->params);
 518          $params = $registry;
 519  
 520          // Set the dates if not already set.
 521          $this->dates->def('d1', $params->get('d1'));
 522          $this->dates->def('d2', $params->get('d2'));
 523          $this->dates->def('w1', $params->get('w1'));
 524          $this->dates->def('w2', $params->get('w2'));
 525  
 526          // Remove duplicates and sanitize.
 527          $filters = explode(',', $return->data);
 528          $filters = array_unique($filters);
 529          JArrayHelper::toInteger($filters);
 530  
 531          // Remove any values of zero.
 532          if (array_search(0, $filters, true) !== false)
 533          {
 534              unset($filters[array_search(0, $filters, true)]);
 535          }
 536  
 537          // Check if we have any real input.
 538          if (empty($filters))
 539          {
 540              return true;
 541          }
 542  
 543          /*
 544           * Create the query to get filters from the database. We do this for
 545           * two reasons: one, it allows us to ensure that the filters being used
 546           * are real; two, we need to sort the filters by taxonomy branch.
 547           */
 548          $query->clear();
 549          $query->select('t1.id, t1.title, t2.title AS branch');
 550          $query->from($db->quoteName('#__finder_taxonomy') . ' AS t1');
 551          $query->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id');
 552          $query->where('t1.state = 1');
 553          $query->where('t1.' . $db->quoteName('access') . ' IN (' . $groups . ')');
 554          $query->where('t1.id IN (' . implode(',', $filters) . ')');
 555          $query->where('t2.state = 1');
 556          $query->where('t2.' . $db->quoteName('access') . ' IN (' . $groups . ')');
 557  
 558          // Load the filters.
 559          $db->setQuery($query);
 560          $results = $db->loadObjectList();
 561  
 562          // Check for a database error.
 563          if ($db->getErrorNum())
 564          {
 565              // Throw database error exception.
 566              throw new Exception($db->getErrorMsg(), 500);
 567          }
 568  
 569          // Sort the filter ids by branch.
 570          foreach ($results as $result)
 571          {
 572              $this->filters[$result->branch][$result->title] = (int) $result->id;
 573          }
 574  
 575          return true;
 576      }
 577  
 578      /**
 579       * Method to process the dynamic taxonomy input. The dynamic taxonomy input
 580       * comes in the form of select fields that the user chooses from. The
 581       * dynamic taxonomy input is processed AFTER the static taxonomy input
 582       * because the dynamic options can be used to further narrow a static
 583       * taxonomy filter.
 584       *
 585       * @param   array  $filters  An array of taxonomy node ids.
 586       *
 587       * @return  boolean  True on success.
 588       *
 589       * @since   2.5
 590       * @throws  Exception on database error.
 591       */
 592  	protected function processDynamicTaxonomy($filters)
 593      {
 594          // Initialize user variables
 595          $user = JFactory::getUser();
 596          $groups = implode(',', $user->getAuthorisedViewLevels());
 597  
 598          // Remove duplicates and sanitize.
 599          $filters = array_unique($filters);
 600          JArrayHelper::toInteger($filters);
 601  
 602          // Remove any values of zero.
 603          if (array_search(0, $filters, true) !== false)
 604          {
 605              unset($filters[array_search(0, $filters, true)]);
 606          }
 607  
 608          // Check if we have any real input.
 609          if (empty($filters))
 610          {
 611              return true;
 612          }
 613  
 614          // Get the database object.
 615          $db = JFactory::getDBO();
 616          $query = $db->getQuery(true);
 617  
 618          /*
 619           * Create the query to get filters from the database. We do this for
 620           * two reasons: one, it allows us to ensure that the filters being used
 621           * are real; two, we need to sort the filters by taxonomy branch.
 622           */
 623          $query->select('t1.id, t1.title, t2.title AS branch');
 624          $query->from($db->quoteName('#__finder_taxonomy') . ' AS t1');
 625          $query->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id');
 626          $query->where('t1.state = 1');
 627          $query->where('t1.' . $db->quoteName('access') . ' IN (' . $groups . ')');
 628          $query->where('t1.id IN (' . implode(',', $filters) . ')');
 629          $query->where('t2.state = 1');
 630          $query->where('t2.' . $db->quoteName('access') . ' IN (' . $groups . ')');
 631  
 632          // Load the filters.
 633          $db->setQuery($query);
 634          $results = $db->loadObjectList();
 635  
 636          // Check for a database error.
 637          if ($db->getErrorNum())
 638          {
 639              // Throw database error exception.
 640              throw new Exception($db->getErrorMsg(), 500);
 641          }
 642  
 643          // Cleared filter branches.
 644          $cleared = array();
 645  
 646          /*
 647           * Sort the filter ids by branch. Because these filters are designed to
 648           * override and further narrow the items selected in the static filter,
 649           * we will clear the values from the static filter on a branch by
 650           * branch basis before adding the dynamic filters. So, if the static
 651           * filter defines a type filter of "articles" and three "category"
 652           * filters but the user only limits the category further, the category
 653           * filters will be flushed but the type filters will not.
 654           */
 655          foreach ($results as $result)
 656          {
 657              // Check if the branch has been cleared.
 658              if (!in_array($result->branch, $cleared))
 659              {
 660                  // Clear the branch.
 661                  $this->filters[$result->branch] = array();
 662  
 663                  // Add the branch to the cleared list.
 664                  $cleared[] = $result->branch;
 665              }
 666  
 667              // Add the filter to the list.
 668              $this->filters[$result->branch][$result->title] = (int) $result->id;
 669          }
 670  
 671          return true;
 672      }
 673  
 674      /**
 675       * Method to process the query date filters to determine start and end
 676       * date limitations.
 677       *
 678       * @param   string  $date1  The first date filter.
 679       * @param   string  $date2  The second date filter.
 680       * @param   string  $when1  The first date modifier.
 681       * @param   string  $when2  The second date modifier.
 682       *
 683       * @return  boolean  True on success.
 684       *
 685       * @since   2.5
 686       */
 687  	protected function processDates($date1, $date2, $when1, $when2)
 688      {
 689          // Clean up the inputs.
 690          $date1 = JString::trim(JString::strtolower($date1));
 691          $date2 = JString::trim(JString::strtolower($date2));
 692          $when1 = JString::trim(JString::strtolower($when1));
 693          $when2 = JString::trim(JString::strtolower($when2));
 694  
 695          // Get the time offset.
 696          $offset = JFactory::getApplication()->getCfg('offset');
 697  
 698          // Array of allowed when values.
 699          $whens = array('before', 'after', 'exact');
 700  
 701          // The value of 'today' is a special case that we need to handle.
 702          if ($date1 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY')))
 703          {
 704              $today = JFactory::getDate('now', $offset);
 705              $date1 = $today->format('%Y-%m-%d');
 706          }
 707  
 708          // Try to parse the date string.
 709          $date = JFactory::getDate($date1, $offset);
 710  
 711          // Check if the date was parsed successfully.
 712          if ($date->toUnix() !== null)
 713          {
 714              // Set the date filter.
 715              $this->date1 = $date->toSQL();
 716              $this->when1 = in_array($when1, $whens) ? $when1 : 'before';
 717          }
 718  
 719          // The value of 'today' is a special case that we need to handle.
 720          if ($date2 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY')))
 721          {
 722              $today = JFactory::getDate('now', $offset);
 723              $date2 = $today->format('%Y-%m-%d');
 724          }
 725  
 726          // Try to parse the date string.
 727          $date = JFactory::getDate($date2, $offset);
 728  
 729          // Check if the date was parsed successfully.
 730          if ($date->toUnix() !== null)
 731          {
 732              // Set the date filter.
 733              $this->date2 = $date->toSQL();
 734              $this->when2 = in_array($when2, $whens) ? $when2 : 'before';
 735          }
 736  
 737          return true;
 738      }
 739  
 740      /**
 741       * Method to process the query input string and extract required, optional,
 742       * and excluded tokens; taxonomy filters; and date filters.
 743       *
 744       * @param   string  $input  The query input string.
 745       * @param   string  $lang   The query input language.
 746       * @param   string  $mode   The query matching mode.
 747       *
 748       * @return  boolean  True on success.
 749       *
 750       * @since   2.5
 751       * @throws  Exception on database error.
 752       */
 753  	protected function processString($input, $lang, $mode)
 754      {
 755          // Clean up the input string.
 756          $input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
 757          $input = JString::strtolower($input);
 758          $input = preg_replace('#\s+#mi', ' ', $input);
 759          $input = JString::trim($input);
 760          $debug = JFactory::getConfig()->get('debug_lang');
 761  
 762          /*
 763           * First, we need to handle string based modifiers. String based
 764           * modifiers could potentially include things like "category:blah" or
 765           * "before:2009-10-21" or "type:article", etc.
 766           */
 767          $patterns = array(
 768              'before' => JText::_('COM_FINDER_FILTER_WHEN_BEFORE'),
 769              'after' => JText::_('COM_FINDER_FILTER_WHEN_AFTER')
 770          );
 771  
 772          // Add the taxonomy branch titles to the possible patterns.
 773          foreach (FinderIndexerTaxonomy::getBranchTitles() as $branch)
 774          {
 775              // Add the pattern.
 776              $patterns[$branch] = JString::strtolower(JText::_(FinderHelperLanguage::branchSingular($branch)));
 777          }
 778  
 779          // Container for search terms and phrases.
 780          $terms = array();
 781          $phrases = array();
 782  
 783          // Cleared filter branches.
 784          $cleared = array();
 785  
 786          /*
 787           * Compile the suffix pattern. This is used to match the values of the
 788           * filter input string. Single words can be input directly, multi-word
 789           * values have to be wrapped in double quotes.
 790           */
 791          $quotes = html_entity_decode('&#8216;&#8217;&#39;', ENT_QUOTES, 'UTF-8');
 792          $suffix = '(([\w\d' . $quotes . '-]+)|\"([\w\d\s' . $quotes . '-]+)\")';
 793  
 794          /*
 795           * Iterate through the possible filter patterns and search for matches.
 796           * We need to match the key, colon, and a value pattern for the match
 797           * to be valid.
 798           */
 799          foreach ($patterns as $modifier => $pattern)
 800          {
 801              $matches = array();
 802  
 803              if ($debug)
 804              {
 805                  $pattern = substr($pattern, 2, -2);
 806              }
 807  
 808              // Check if the filter pattern is in the input string.
 809              if (preg_match('#' . $pattern . '\s*:\s*' . $suffix . '#mi', $input, $matches))
 810              {
 811                  // Get the value given to the modifier.
 812                  $value = isset($matches[3]) ? $matches[3] : $matches[1];
 813  
 814                  // Now we have to handle the filter string.
 815                  switch ($modifier)
 816                  {
 817                      // Handle a before and after date filters.
 818                      case 'before':
 819                      case 'after':
 820                      {
 821                          // Get the time offset.
 822                          $offset = JFactory::getApplication()->getCfg('offset');
 823  
 824                          // Array of allowed when values.
 825                          $whens = array('before', 'after', 'exact');
 826  
 827                          // The value of 'today' is a special case that we need to handle.
 828                          if ($value === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY')))
 829                          {
 830                              $today = JFactory::getDate('now', $offset);
 831                              $value = $today->format('%Y-%m-%d');
 832                          }
 833  
 834                          // Try to parse the date string.
 835                          $date = JFactory::getDate($value, $offset);
 836  
 837                          // Check if the date was parsed successfully.
 838                          if ($date->toUnix() !== null)
 839                          {
 840                              // Set the date filter.
 841                              $this->date1 = $date->toSQL();
 842                              $this->when1 = in_array($modifier, $whens) ? $modifier : 'before';
 843                          }
 844  
 845                          break;
 846                      }
 847  
 848                      // Handle a taxonomy branch filter.
 849                      default:
 850                      {
 851                          // Try to find the node id.
 852                          $return = FinderIndexerTaxonomy::getNodeByTitle($modifier, $value);
 853  
 854                          // Check if the node id was found.
 855                          if ($return)
 856                          {
 857                              // Check if the branch has been cleared.
 858                              if (!in_array($modifier, $cleared))
 859                              {
 860                                  // Clear the branch.
 861                                  $this->filters[$modifier] = array();
 862  
 863                                  // Add the branch to the cleared list.
 864                                  $cleared[] = $modifier;
 865                              }
 866  
 867                              // Add the filter to the list.
 868                              $this->filters[$modifier][$return->title] = (int) $return->id;
 869                          }
 870  
 871                          break;
 872                      }
 873                  }
 874  
 875                  // Clean up the input string again.
 876                  $input = str_replace($matches[0], '', $input);
 877                  $input = preg_replace('#\s+#mi', ' ', $input);
 878                  $input = JString::trim($input);
 879              }
 880          }
 881  
 882          /*
 883           * Extract the tokens enclosed in double quotes so that we can handle
 884           * them as phrases.
 885           */
 886          if (JString::strpos($input, '"') !== false)
 887          {
 888              $matches = array();
 889  
 890              // Extract the tokens enclosed in double quotes.
 891              if (preg_match_all('#\"([^"]+)\"#mi', $input, $matches))
 892              {
 893                  /*
 894                   * One or more phrases were found so we need to iterate through
 895                   * them, tokenize them as phrases, and remove them from the raw
 896                   * input string before we move on to the next processing step.
 897                   */
 898                  foreach ($matches[1] as $key => $match)
 899                  {
 900                      // Find the complete phrase in the input string.
 901                      $pos = JString::strpos($input, $matches[0][$key]);
 902                      $len = JString::strlen($matches[0][$key]);
 903  
 904                      // Add any terms that are before this phrase to the stack.
 905                      if (JString::trim(JString::substr($input, 0, $pos)))
 906                      {
 907                          $terms = array_merge($terms, explode(' ', JString::trim(JString::substr($input, 0, $pos))));
 908                      }
 909  
 910                      // Strip out everything up to and including the phrase.
 911                      $input = JString::substr($input, $pos + $len);
 912  
 913                      // Clean up the input string again.
 914                      $input = preg_replace('#\s+#mi', ' ', $input);
 915                      $input = JString::trim($input);
 916  
 917                      // Get the number of words in the phrase.
 918                      $parts = explode(' ', $match);
 919  
 920                      // Check if the phrase is longer than three words.
 921                      if (count($parts) > 3)
 922                      {
 923                          /*
 924                           * If the phrase is longer than three words, we need to
 925                           * break it down into smaller chunks of phrases that
 926                           * are less than or equal to three words. We overlap
 927                           * the chunks so that we can ensure that a match is
 928                           * found for the complete phrase and not just portions
 929                           * of it.
 930                           */
 931                          for ($i = 0, $c = count($parts); $i < $c; $i += 2)
 932                          {
 933                              // Set up the chunk.
 934                              $chunk = array();
 935  
 936                              // The chunk has to be assembled based on how many
 937                              // pieces are available to use.
 938                              switch ($c - $i)
 939                              {
 940                                  // If only one word is left, we can break from
 941                                  // the switch and loop because the last word
 942                                  // was already used at the end of the last
 943                                  // chunk.
 944                                  case 1:
 945                                      break 2;
 946  
 947                                  // If there words are left, we use them both as
 948                                  // the last chunk of the phrase and we're done.
 949                                  case 2:
 950                                      $chunk[] = $parts[$i];
 951                                      $chunk[] = $parts[$i + 1];
 952                                      break;
 953  
 954                                  // If there are three or more words left, we
 955                                  // build a three word chunk and continue on.
 956                                  default:
 957                                      $chunk[] = $parts[$i];
 958                                      $chunk[] = $parts[$i + 1];
 959                                      $chunk[] = $parts[$i + 2];
 960                                      break;
 961                              }
 962  
 963                              // If the chunk is not empty, add it as a phrase.
 964                              if (count($chunk))
 965                              {
 966                                  $phrases[] = implode(' ', $chunk);
 967                                  $terms[] = implode(' ', $chunk);
 968                              }
 969                          }
 970                      }
 971                      else
 972                      {
 973                          // The phrase is <= 3 words so we can use it as is.
 974                          $phrases[] = $match;
 975                          $terms[] = $match;
 976                      }
 977                  }
 978              }
 979          }
 980  
 981          // Add the remaining terms if present.
 982          if (!empty($input))
 983          {
 984              $terms = array_merge($terms, explode(' ', $input));
 985          }
 986  
 987          // An array of our boolean operators. $operator => $translation
 988          $operators = array(
 989              'AND' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_AND')),
 990              'OR' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_OR')),
 991              'NOT' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_NOT'))
 992          );
 993          // If language debugging is enabled you need to ignore the debug strings in matching.
 994          if (JDEBUG)
 995          {
 996              $debugStrings = array('**', '??');
 997              $operators = str_replace($debugStrings, '', $operators);
 998          }
 999          /*
1000           * Iterate through the terms and perform any sorting that needs to be
1001           * done based on boolean search operators. Terms that are before an
1002           * and/or/not modifier have to be handled in relation to their operator.
1003           */
1004          for ($i = 0, $c = count($terms); $i < $c; $i++)
1005          {
1006  
1007              // Check if the term is followed by an operator that we understand.
1008              if (isset($terms[$i + 1]) && in_array($terms[$i + 1], $operators))
1009              {
1010                  // Get the operator mode.
1011                  $op = array_search($terms[$i + 1], $operators);
1012  
1013                  // Handle the AND operator.
1014                  if ($op === 'AND' && isset($terms[$i + 2]))
1015                  {
1016                      // Tokenize the current term.
1017                      $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
1018                      $token = $this->getTokenData($token);
1019  
1020                      // Set the required flag.
1021                      $token->required = true;
1022  
1023                      // Add the current token to the stack.
1024                      $this->included[] = $token;
1025                      $this->highlight = array_merge($this->highlight, array_keys($token->matches));
1026  
1027                      // Skip the next token (the mode operator).
1028                      $this->operators[] = $terms[$i + 1];
1029  
1030                      // Tokenize the term after the next term (current plus two).
1031                      $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
1032                      $other = $this->getTokenData($other);
1033  
1034                      // Set the required flag.
1035                      $other->required = true;
1036  
1037                      // Add the token after the next token to the stack.
1038                      $this->included[] = $other;
1039                      $this->highlight = array_merge($this->highlight, array_keys($other->matches));
1040  
1041                      // Remove the processed phrases if possible.
1042                      if (($pk = array_search($terms[$i], $phrases)) !== false)
1043                      {
1044                          unset($phrases[$pk]);
1045                      }
1046                      if (($pk = array_search($terms[$i + 2], $phrases)) !== false)
1047                      {
1048                          unset($phrases[$pk]);
1049                      }
1050  
1051                      // Remove the processed terms.
1052                      unset($terms[$i]);
1053                      unset($terms[$i + 1]);
1054                      unset($terms[$i + 2]);
1055  
1056                      // Adjust the loop.
1057                      $i += 2;
1058                      continue;
1059                  }
1060                  // Handle the OR operator.
1061                  elseif ($op === 'OR' && isset($terms[$i + 2]))
1062                  {
1063                      // Tokenize the current term.
1064                      $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
1065                      $token = $this->getTokenData($token);
1066  
1067                      // Set the required flag.
1068                      $token->required = false;
1069  
1070                      // Add the current token to the stack.
1071                      if (count($token->matches))
1072                      {
1073                          $this->included[] = $token;
1074                          $this->highlight = array_merge($this->highlight, array_keys($token->matches));
1075                      }
1076                      else
1077                      {
1078                          $this->ignored[] = $token;
1079                      }
1080  
1081                      // Skip the next token (the mode operator).
1082                      $this->operators[] = $terms[$i + 1];
1083  
1084                      // Tokenize the term after the next term (current plus two).
1085                      $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
1086                      $other = $this->getTokenData($other);
1087  
1088                      // Set the required flag.
1089                      $other->required = false;
1090  
1091                      // Add the token after the next token to the stack.
1092                      if (count($other->matches))
1093                      {
1094                          $this->included[] = $other;
1095                          $this->highlight = array_merge($this->highlight, array_keys($other->matches));
1096                      }
1097                      else
1098                      {
1099                          $this->ignored[] = $other;
1100                      }
1101  
1102                      // Remove the processed phrases if possible.
1103                      if (($pk = array_search($terms[$i], $phrases)) !== false)
1104                      {
1105                          unset($phrases[$pk]);
1106                      }
1107                      if (($pk = array_search($terms[$i + 2], $phrases)) !== false)
1108                      {
1109                          unset($phrases[$pk]);
1110                      }
1111  
1112                      // Remove the processed terms.
1113                      unset($terms[$i]);
1114                      unset($terms[$i + 1]);
1115                      unset($terms[$i + 2]);
1116  
1117                      // Adjust the loop.
1118                      $i += 2;
1119                      continue;
1120                  }
1121              }
1122              // Handle an orphaned OR operator.
1123              elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'OR')
1124              {
1125                  // Skip the next token (the mode operator).
1126                  $this->operators[] = $terms[$i];
1127  
1128                  // Tokenize the next term (current plus one).
1129                  $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
1130                  $other = $this->getTokenData($other);
1131  
1132                  // Set the required flag.
1133                  $other->required = false;
1134  
1135                  // Add the token after the next token to the stack.
1136                  if (count($other->matches))
1137                  {
1138                      $this->included[] = $other;
1139                      $this->highlight = array_merge($this->highlight, array_keys($other->matches));
1140                  }
1141                  else
1142                  {
1143                      $this->ignored[] = $other;
1144                  }
1145  
1146                  // Remove the processed phrase if possible.
1147                  if (($pk = array_search($terms[$i + 1], $phrases)) !== false)
1148                  {
1149                      unset($phrases[$pk]);
1150                  }
1151  
1152                  // Remove the processed terms.
1153                  unset($terms[$i]);
1154                  unset($terms[$i + 1]);
1155  
1156                  // Adjust the loop.
1157                  $i += 1;
1158                  continue;
1159              }
1160              // Handle the NOT operator.
1161              elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'NOT')
1162              {
1163                  // Skip the next token (the mode operator).
1164                  $this->operators[] = $terms[$i];
1165  
1166                  // Tokenize the next term (current plus one).
1167                  $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
1168                  $other = $this->getTokenData($other);
1169  
1170                  // Set the required flag.
1171                  $other->required = false;
1172  
1173                  // Add the next token to the stack.
1174                  if (count($other->matches))
1175                  {
1176                      $this->excluded[] = $other;
1177                  }
1178                  else
1179                  {
1180                      $this->ignored[] = $other;
1181                  }
1182  
1183                  // Remove the processed phrase if possible.
1184                  if (($pk = array_search($terms[$i + 1], $phrases)) !== false)
1185                  {
1186                      unset($phrases[$pk]);
1187                  }
1188  
1189                  // Remove the processed terms.
1190                  unset($terms[$i]);
1191                  unset($terms[$i + 1]);
1192  
1193                  // Adjust the loop.
1194                  $i += 1;
1195                  continue;
1196              }
1197          }
1198  
1199          /*
1200           * Iterate through any search phrases and tokenize them. We handle
1201           * phrases as autonomous units and do not break them down into two and
1202           * three word combinations.
1203           */
1204          for ($i = 0, $c = count($phrases); $i < $c; $i++)
1205          {
1206              // Tokenize the phrase.
1207              $token = FinderIndexerHelper::tokenize($phrases[$i], $lang, true);
1208              $token = $this->getTokenData($token);
1209  
1210              // Set the required flag.
1211              $token->required = true;
1212  
1213              // Add the current token to the stack.
1214              $this->included[] = $token;
1215              $this->highlight = array_merge($this->highlight, array_keys($token->matches));
1216  
1217              // Remove the processed term if possible.
1218              if (($pk = array_search($phrases[$i], $terms)) !== false)
1219              {
1220                  unset($terms[$pk]);
1221              }
1222  
1223              // Remove the processed phrase.
1224              unset($phrases[$i]);
1225          }
1226  
1227          /*
1228           * Handle any remaining tokens using the standard processing mechanism.
1229           */
1230          if (!empty($terms))
1231          {
1232              // Tokenize the terms.
1233              $terms = implode(' ', $terms);
1234              $tokens = FinderIndexerHelper::tokenize($terms, $lang, false);
1235  
1236              // Make sure we are working with an array.
1237              $tokens = is_array($tokens) ? $tokens : array($tokens);
1238  
1239              // Get the token data and required state for all the tokens.
1240              foreach ($tokens as $token)
1241              {
1242                  // Get the token data.
1243                  $token = $this->getTokenData($token);
1244  
1245                  // Set the required flag for the token.
1246                  $token->required = $mode === 'AND' ? ($token->phrase ? false : true) : false;
1247  
1248                  // Add the token to the appropriate stack.
1249                  if (count($token->matches) || $token->required)
1250                  {
1251                      $this->included[] = $token;
1252                      $this->highlight = array_merge($this->highlight, array_keys($token->matches));
1253                  }
1254                  else
1255                  {
1256                      $this->ignored[] = $token;
1257                  }
1258              }
1259          }
1260  
1261          return true;
1262      }
1263  
1264      /**
1265       * Method to get the base and similar term ids and, if necessary, suggested
1266       * term data from the database. The terms ids are identified based on a
1267       * 'like' match in MySQL and/or a common stem. If no term ids could be
1268       * found, then we know that we will not be able to return any results for
1269       * that term and we should try to find a similar term to use that we can
1270       * match so that we can suggest the alternative search query to the user.
1271       *
1272       * @param   FinderIndexerToken  $token  A FinderIndexerToken object.
1273       *
1274       * @return  FinderIndexerToken  A FinderIndexerToken object.
1275       *
1276       * @since   2.5
1277       * @throws  Exception on database error.
1278       */
1279  	protected function getTokenData($token)
1280      {
1281          // Get the database object.
1282          $db = JFactory::getDBO();
1283  
1284          // Create a database query to build match the token.
1285          $query = $db->getQuery(true);
1286          $query->select('t.term, t.term_id');
1287          $query->from('#__finder_terms AS t');
1288  
1289          /*
1290           * If the token is a phrase, the lookup process is fairly simple. If
1291           * the token is a word, it is a little more complicated. We have to
1292           * create two queries to lookup the term and the stem respectively,
1293           * then union the result sets together. This is MUCH faster than using
1294           * an or condition in the database query.
1295           */
1296          if ($token->phrase)
1297          {
1298              // Add the phrase to the query.
1299              $query->where('t.term = ' . $db->quote($token->term));
1300              $query->where('t.phrase = 1');
1301          }
1302          else
1303          {
1304              // Add the term to the query.
1305  //            $query->where('t.term = ' . $db->quote($token->term));
1306  //            $query->where('t.phrase = 0');
1307  //
1308  //            // Clone the query, replace the WHERE clause.
1309  //            $sub = clone($query);
1310  //            $sub->clear('where');
1311  //            $sub->where('t.stem = '.$db->quote($token->stem));
1312  //            $sub->where('t.phrase = 0');
1313  //
1314  //            // Union the two queries.
1315  //            $query->union($sub);
1316  
1317              $query->where('(t.term = ' . $db->quote($token->term) . ' OR t.stem = ' . $db->quote($token->stem) . ')');
1318              $query->where('t.phrase = 0');
1319          }
1320  
1321          // Get the terms.
1322          $db->setQuery($query);
1323          $matches = $db->loadObjectList();
1324  
1325          // Check for a database error.
1326          if ($db->getErrorNum())
1327          {
1328              // Throw database error exception.
1329              throw new Exception($db->getErrorMsg(), 500);
1330          }
1331  
1332          // Setup the container.
1333          $token->matches = array();
1334  
1335          // Check the matching terms.
1336          if (!empty($matches))
1337          {
1338              // Add the matches to the token.
1339              for ($i = 0, $c = count($matches); $i < $c; $i++)
1340              {
1341                  $token->matches[$matches[$i]->term] = (int) $matches[$i]->term_id;
1342              }
1343          }
1344  
1345          // If no matches were found, try to find a similar but better token.
1346          if (empty($token->matches))
1347          {
1348              // Create a database query to get the similar terms.
1349              //@TODO: PostgreSQL doesn't support SOUNDEX out of the box
1350              $query->clear();
1351              $query->select('DISTINCT t.term_id AS id, t.term AS term');
1352              $query->from('#__finder_terms AS t');
1353              //$query->where('t.soundex = ' . soundex($db->quote($token->term)));
1354              $query->where('t.soundex = SOUNDEX(' . $db->quote($token->term) . ')');
1355              $query->where('t.phrase = ' . (int) $token->phrase);
1356  
1357              // Get the terms.
1358              $db->setQuery($query);
1359              $results = $db->loadObjectList();
1360  
1361              // Check for a database error.
1362              if ($db->getErrorNum())
1363              {
1364                  // Throw database error exception.
1365                  throw new Exception($db->getErrorMsg(), 500);
1366              }
1367  
1368              // Check if any similar terms were found.
1369              if (empty($results))
1370              {
1371                  return $token;
1372              }
1373  
1374              // Stack for sorting the similar terms.
1375              $suggestions = array();
1376  
1377              // Get the levnshtein distance for all suggested terms.
1378              foreach ($results as $sk => $st)
1379              {
1380                  // Get the levenshtein distance between terms.
1381                  $distance = levenshtein($st->term, $token->term);
1382  
1383                  // Make sure the levenshtein distance isn't over 50.
1384                  if ($distance < 50)
1385                  {
1386                      $suggestions[$sk] = $distance;
1387                  }
1388              }
1389  
1390              // Sort the suggestions.
1391              asort($suggestions, SORT_NUMERIC);
1392  
1393              // Get the closest match.
1394              $keys = array_keys($suggestions);
1395              $key = $keys[0];
1396  
1397              // Add the suggested term.
1398              $token->suggestion = $results[$key]->term;
1399          }
1400  
1401          return $token;
1402      }
1403  }


Generated: Tue Apr 3 11:40:28 2012 Cross-referenced by PHPXref 0.7.1