primo commit
This commit is contained in:
942
administrator/components/com_finder/src/Indexer/Adapter.php
Normal file
942
administrator/components/com_finder/src/Indexer/Adapter.php
Normal file
@ -0,0 +1,942 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\CMS\Factory;
|
||||
use Joomla\CMS\Plugin\CMSPlugin;
|
||||
use Joomla\CMS\Table\Table;
|
||||
use Joomla\Database\DatabaseInterface;
|
||||
use Joomla\Database\QueryInterface;
|
||||
use Joomla\Event\DispatcherInterface;
|
||||
use Joomla\Utilities\ArrayHelper;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Prototype adapter class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
abstract class Adapter extends CMSPlugin
|
||||
{
|
||||
/**
|
||||
* The context is somewhat arbitrary but it must be unique or there will be
|
||||
* conflicts when managing plugin/indexer state. A good best practice is to
|
||||
* use the plugin name suffix as the context. For example, if the plugin is
|
||||
* named 'plgFinderContent', the context could be 'Content'.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $context;
|
||||
|
||||
/**
|
||||
* The extension name.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $extension;
|
||||
|
||||
/**
|
||||
* The sublayout to use when rendering the results.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $layout;
|
||||
|
||||
/**
|
||||
* The mime type of the content the adapter indexes.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $mime;
|
||||
|
||||
/**
|
||||
* The access level of an item before save.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $old_access;
|
||||
|
||||
/**
|
||||
* The access level of a category before save.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $old_cataccess;
|
||||
|
||||
/**
|
||||
* The type of content the adapter indexes.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $type_title;
|
||||
|
||||
/**
|
||||
* The type id of the content.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $type_id;
|
||||
|
||||
/**
|
||||
* The database object.
|
||||
*
|
||||
* @var DatabaseInterface
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $db;
|
||||
|
||||
/**
|
||||
* The table name.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $table;
|
||||
|
||||
/**
|
||||
* The indexer object.
|
||||
*
|
||||
* @var Indexer
|
||||
* @since 3.0
|
||||
*/
|
||||
protected $indexer;
|
||||
|
||||
/**
|
||||
* The field the published state is stored in.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $state_field = 'state';
|
||||
|
||||
/**
|
||||
* Method to instantiate the indexer adapter.
|
||||
*
|
||||
* @param DispatcherInterface $dispatcher The object to observe.
|
||||
* @param array $config An array that holds the plugin configuration.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function __construct(DispatcherInterface $dispatcher, array $config)
|
||||
{
|
||||
// Call the parent constructor.
|
||||
parent::__construct($dispatcher, $config);
|
||||
|
||||
// Get the type id.
|
||||
$this->type_id = $this->getTypeId();
|
||||
|
||||
// Add the content type if it doesn't exist and is set.
|
||||
if (empty($this->type_id) && !empty($this->type_title)) {
|
||||
$this->type_id = Helper::addContentType($this->type_title, $this->mime);
|
||||
}
|
||||
|
||||
// Check for a layout override.
|
||||
if ($this->params->get('layout')) {
|
||||
$this->layout = $this->params->get('layout');
|
||||
}
|
||||
|
||||
// Get the indexer object
|
||||
$this->indexer = new Indexer($this->db);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of events this subscriber will listen to.
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public static function getSubscribedEvents(): array
|
||||
{
|
||||
return [
|
||||
'onBeforeIndex' => 'onBeforeIndex',
|
||||
'onBuildIndex' => 'onBuildIndex',
|
||||
'onFinderGarbageCollection' => 'onFinderGarbageCollection',
|
||||
'onStartIndex' => 'onStartIndex',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the adapter state and push it into the indexer.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on error.
|
||||
*/
|
||||
public function onStartIndex()
|
||||
{
|
||||
// Get the indexer state.
|
||||
$iState = Indexer::getState();
|
||||
|
||||
// Get the number of content items.
|
||||
$total = (int) $this->getContentCount();
|
||||
|
||||
// Add the content count to the total number of items.
|
||||
$iState->totalItems += $total;
|
||||
|
||||
// Populate the indexer state information for the adapter.
|
||||
$iState->pluginState[$this->context]['total'] = $total;
|
||||
$iState->pluginState[$this->context]['offset'] = 0;
|
||||
|
||||
// Set the indexer state.
|
||||
Indexer::setState($iState);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to prepare for the indexer to be run. This method will often
|
||||
* be used to include dependencies and things of that nature.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on error.
|
||||
*/
|
||||
public function onBeforeIndex()
|
||||
{
|
||||
// Get the indexer and adapter state.
|
||||
$iState = Indexer::getState();
|
||||
$aState = $iState->pluginState[$this->context];
|
||||
|
||||
// Check the progress of the indexer and the adapter.
|
||||
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Run the setup method.
|
||||
return $this->setup();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to index a batch of content items. This method can be called by
|
||||
* the indexer many times throughout the indexing process depending on how
|
||||
* much content is available for indexing. It is important to track the
|
||||
* progress correctly so we can display it to the user.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on error.
|
||||
*/
|
||||
public function onBuildIndex()
|
||||
{
|
||||
// Get the indexer and adapter state.
|
||||
$iState = Indexer::getState();
|
||||
$aState = $iState->pluginState[$this->context];
|
||||
|
||||
// Check the progress of the indexer and the adapter.
|
||||
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the batch offset and size.
|
||||
$offset = (int) $aState['offset'];
|
||||
$limit = (int) ($iState->batchSize - $iState->batchOffset);
|
||||
|
||||
// Get the content items to index.
|
||||
$items = $this->getItems($offset, $limit);
|
||||
|
||||
// Iterate through the items and index them.
|
||||
foreach ($items as $item) {
|
||||
// Index the item.
|
||||
$this->index($item);
|
||||
|
||||
// Adjust the offsets.
|
||||
$offset++;
|
||||
$iState->batchOffset++;
|
||||
$iState->totalItems--;
|
||||
}
|
||||
|
||||
// Update the indexer state.
|
||||
$aState['offset'] = $offset;
|
||||
$iState->pluginState[$this->context] = $aState;
|
||||
Indexer::setState($iState);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove outdated index entries
|
||||
*
|
||||
* @return integer
|
||||
*
|
||||
* @since 4.2.0
|
||||
*/
|
||||
public function onFinderGarbageCollection()
|
||||
{
|
||||
$db = $this->db;
|
||||
$type_id = $this->getTypeId();
|
||||
|
||||
$query = $db->getQuery(true);
|
||||
$subquery = $db->getQuery(true);
|
||||
$subquery->select('CONCAT(' . $db->quote($this->getUrl('', $this->extension, $this->layout)) . ', id)')
|
||||
->from($db->quoteName($this->table));
|
||||
$query->select($db->quoteName('l.link_id'))
|
||||
->from($db->quoteName('#__finder_links', 'l'))
|
||||
->where($db->quoteName('l.type_id') . ' = ' . $type_id)
|
||||
->where($db->quoteName('l.url') . ' LIKE ' . $db->quote($this->getUrl('%', $this->extension, $this->layout)))
|
||||
->where($db->quoteName('l.url') . ' NOT IN (' . $subquery . ')');
|
||||
$db->setQuery($query);
|
||||
$items = $db->loadColumn();
|
||||
|
||||
foreach ($items as $item) {
|
||||
$this->indexer->remove($item);
|
||||
}
|
||||
|
||||
return \count($items);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to change the value of a content item's property in the links
|
||||
* table. This is used to synchronize published and access states that
|
||||
* are changed when not editing an item directly.
|
||||
*
|
||||
* @param string $id The ID of the item to change.
|
||||
* @param string $property The property that is being changed.
|
||||
* @param integer $value The new value of that property.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function change($id, $property, $value)
|
||||
{
|
||||
// Check for a property we know how to handle.
|
||||
if ($property !== 'state' && $property !== 'access') {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the URL for the content id.
|
||||
$item = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
|
||||
|
||||
// Update the content items.
|
||||
$query = $this->db->getQuery(true)
|
||||
->update($this->db->quoteName('#__finder_links'))
|
||||
->set($this->db->quoteName($property) . ' = ' . (int) $value)
|
||||
->where($this->db->quoteName('url') . ' = ' . $item);
|
||||
$this->db->setQuery($query);
|
||||
$this->db->execute();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to index an item.
|
||||
*
|
||||
* @param Result $item The item to index as a Result object.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
abstract protected function index(Result $item);
|
||||
|
||||
/**
|
||||
* Method to reindex an item.
|
||||
*
|
||||
* @param integer $id The ID of the item to reindex.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function reindex($id)
|
||||
{
|
||||
// Run the setup method.
|
||||
$this->setup();
|
||||
|
||||
// Get the item.
|
||||
$item = $this->getItem($id);
|
||||
|
||||
// Index the item.
|
||||
$this->index($item);
|
||||
|
||||
Taxonomy::removeOrphanNodes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove an item from the index.
|
||||
*
|
||||
* @param string $id The ID of the item to remove.
|
||||
* @param bool $removeTaxonomies Remove empty taxonomies
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function remove($id, $removeTaxonomies = true)
|
||||
{
|
||||
// Get the item's URL
|
||||
$url = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
|
||||
|
||||
// Get the link ids for the content items.
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('link_id'))
|
||||
->from($this->db->quoteName('#__finder_links'))
|
||||
->where($this->db->quoteName('url') . ' = ' . $url);
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadColumn();
|
||||
|
||||
// Check the items.
|
||||
if (empty($items)) {
|
||||
Factory::getApplication()->triggerEvent('onFinderIndexAfterDelete', [$id]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Remove the items.
|
||||
foreach ($items as $item) {
|
||||
$this->indexer->remove($item, $removeTaxonomies);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to setup the adapter before indexing.
|
||||
*
|
||||
* @return boolean True on success, false on failure.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
abstract protected function setup();
|
||||
|
||||
/**
|
||||
* Method to update index data on category access level changes
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function categoryAccessChange($row)
|
||||
{
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('c.id = ' . (int) $row->id);
|
||||
|
||||
// Get the access level.
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadObjectList();
|
||||
|
||||
// Adjust the access level for each item within the category.
|
||||
foreach ($items as $item) {
|
||||
// Set the access level.
|
||||
$temp = max($item->access, $row->access);
|
||||
|
||||
// Update the item.
|
||||
$this->change((int) $item->id, 'access', $temp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data on category access level changes
|
||||
*
|
||||
* @param array $pks A list of primary key ids of the content that has changed state.
|
||||
* @param integer $value The value of the state that the content has been changed to.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function categoryStateChange($pks, $value)
|
||||
{
|
||||
/*
|
||||
* The item's published state is tied to the category
|
||||
* published state so we need to look up all published states
|
||||
* before we change anything.
|
||||
*/
|
||||
foreach ($pks as $pk) {
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('c.id = ' . (int) $pk);
|
||||
|
||||
// Get the published states.
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadObjectList();
|
||||
|
||||
// Adjust the state for each item within the category.
|
||||
foreach ($items as $item) {
|
||||
// Translate the state.
|
||||
$temp = $this->translateState($item->state, $value);
|
||||
|
||||
// Update the item.
|
||||
$this->change($item->id, 'state', $temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to check the existing access level for categories
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function checkCategoryAccess($row)
|
||||
{
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('access'))
|
||||
->from($this->db->quoteName('#__categories'))
|
||||
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
|
||||
$this->db->setQuery($query);
|
||||
|
||||
// Store the access level to determine if it changes
|
||||
$this->old_cataccess = $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to check the existing access level for items
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function checkItemAccess($row)
|
||||
{
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('access'))
|
||||
->from($this->db->quoteName($this->table))
|
||||
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
|
||||
$this->db->setQuery($query);
|
||||
|
||||
// Store the access level to determine if it changes
|
||||
$this->old_access = $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the number of content items available to index.
|
||||
*
|
||||
* @return integer The number of content items available to index.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getContentCount()
|
||||
{
|
||||
$return = 0;
|
||||
|
||||
// Get the list query.
|
||||
$query = $this->getListQuery();
|
||||
|
||||
// Check if the query is valid.
|
||||
if (empty($query)) {
|
||||
return $return;
|
||||
}
|
||||
|
||||
// Tweak the SQL query to make the total lookup faster.
|
||||
if ($query instanceof QueryInterface) {
|
||||
$query = clone $query;
|
||||
$query->clear('select')
|
||||
->select('COUNT(*)')
|
||||
->clear('order');
|
||||
}
|
||||
|
||||
// Get the total number of content items to index.
|
||||
$this->db->setQuery($query);
|
||||
|
||||
return (int) $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get a content item to index.
|
||||
*
|
||||
* @param integer $id The id of the content item.
|
||||
*
|
||||
* @return Result A Result object.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getItem($id)
|
||||
{
|
||||
// Get the list query and add the extra WHERE clause.
|
||||
$query = $this->getListQuery();
|
||||
$query->where('a.id = ' . (int) $id);
|
||||
|
||||
// Get the item to index.
|
||||
$this->db->setQuery($query);
|
||||
$item = $this->db->loadAssoc();
|
||||
|
||||
// Convert the item to a result object.
|
||||
$item = ArrayHelper::toObject((array) $item, Result::class);
|
||||
|
||||
// Set the item type.
|
||||
$item->type_id = $this->type_id;
|
||||
|
||||
// Set the item layout.
|
||||
$item->layout = $this->layout;
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get a list of content items to index.
|
||||
*
|
||||
* @param integer $offset The list offset.
|
||||
* @param integer $limit The list limit.
|
||||
* @param QueryInterface $query A QueryInterface object. [optional]
|
||||
*
|
||||
* @return Result[] An array of Result objects.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getItems($offset, $limit, $query = null)
|
||||
{
|
||||
// Get the content items to index.
|
||||
$this->db->setQuery($this->getListQuery($query)->setLimit($limit, $offset));
|
||||
$items = $this->db->loadAssocList();
|
||||
|
||||
foreach ($items as &$item) {
|
||||
$item = ArrayHelper::toObject($item, Result::class);
|
||||
|
||||
// Set the item type.
|
||||
$item->type_id = $this->type_id;
|
||||
|
||||
// Set the mime type.
|
||||
$item->mime = $this->mime;
|
||||
|
||||
// Set the item layout.
|
||||
$item->layout = $this->layout;
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the SQL query used to retrieve the list of content items.
|
||||
*
|
||||
* @param mixed $query A QueryInterface object. [optional]
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function getListQuery($query = null)
|
||||
{
|
||||
// Check if we can use the supplied SQL query.
|
||||
return $query instanceof QueryInterface ? $query : $this->db->getQuery(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the plugin type
|
||||
*
|
||||
* @param integer $id The plugin ID
|
||||
*
|
||||
* @return string|null The plugin type
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function getPluginType($id)
|
||||
{
|
||||
// Prepare the query
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('element'))
|
||||
->from($this->db->quoteName('#__extensions'))
|
||||
->where($this->db->quoteName('folder') . ' = ' . $this->db->quote('finder'))
|
||||
->where($this->db->quoteName('extension_id') . ' = ' . (int) $id);
|
||||
$this->db->setQuery($query);
|
||||
|
||||
return $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get a SQL query to load the published and access states for
|
||||
* an article and category.
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function getStateQuery()
|
||||
{
|
||||
$query = $this->db->getQuery(true);
|
||||
|
||||
// Item ID
|
||||
$query->select('a.id');
|
||||
|
||||
// Item and category published state
|
||||
$query->select('a.' . $this->state_field . ' AS state, c.published AS cat_state');
|
||||
|
||||
// Item and category access levels
|
||||
$query->select('a.access, c.access AS cat_access')
|
||||
->from($this->table . ' AS a')
|
||||
->join('LEFT', '#__categories AS c ON c.id = a.catid');
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the query clause for getting items to update by time.
|
||||
*
|
||||
* @param string $time The modified timestamp.
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function getUpdateQueryByTime($time)
|
||||
{
|
||||
// Build an SQL query based on the modified time.
|
||||
$query = $this->db->getQuery(true)
|
||||
->where('a.modified >= ' . $this->db->quote($time));
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the query clause for getting items to update by id.
|
||||
*
|
||||
* @param array $ids The ids to load.
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function getUpdateQueryByIds($ids)
|
||||
{
|
||||
// Build an SQL query based on the item ids.
|
||||
$query = $this->db->getQuery(true)
|
||||
->where('a.id IN(' . implode(',', $ids) . ')');
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the type id for the adapter content.
|
||||
*
|
||||
* @return integer The numeric type id for the content.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getTypeId()
|
||||
{
|
||||
// Get the type id from the database.
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('id'))
|
||||
->from($this->db->quoteName('#__finder_types'))
|
||||
->where($this->db->quoteName('title') . ' = ' . $this->db->quote($this->type_title));
|
||||
$this->db->setQuery($query);
|
||||
|
||||
return (int) $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the URL for the item. The URL is how we look up the link
|
||||
* in the Finder index.
|
||||
*
|
||||
* @param integer $id The id of the item.
|
||||
* @param string $extension The extension the category is in.
|
||||
* @param string $view The view for the URL.
|
||||
*
|
||||
* @return string The URL of the item.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function getUrl($id, $extension, $view)
|
||||
{
|
||||
return 'index.php?option=' . $extension . '&view=' . $view . '&id=' . $id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the page title of any menu item that is linked to the
|
||||
* content item, if it exists and is set.
|
||||
*
|
||||
* @param string $url The URL of the item.
|
||||
*
|
||||
* @return mixed The title on success, null if not found.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getItemMenuTitle($url)
|
||||
{
|
||||
$return = null;
|
||||
|
||||
// Set variables
|
||||
$user = Factory::getUser();
|
||||
$groups = implode(',', $user->getAuthorisedViewLevels());
|
||||
|
||||
// Build a query to get the menu params.
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('params'))
|
||||
->from($this->db->quoteName('#__menu'))
|
||||
->where($this->db->quoteName('link') . ' = ' . $this->db->quote($url))
|
||||
->where($this->db->quoteName('published') . ' = 1')
|
||||
->where($this->db->quoteName('access') . ' IN (' . $groups . ')');
|
||||
|
||||
// Get the menu params from the database.
|
||||
$this->db->setQuery($query);
|
||||
$params = $this->db->loadResult();
|
||||
|
||||
// Check the results.
|
||||
if (empty($params)) {
|
||||
return $return;
|
||||
}
|
||||
|
||||
// Instantiate the params.
|
||||
$params = json_decode($params);
|
||||
|
||||
// Get the page title if it is set.
|
||||
if (isset($params->page_title) && $params->page_title) {
|
||||
$return = $params->page_title;
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data on access level changes
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function itemAccessChange($row)
|
||||
{
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('a.id = ' . (int) $row->id);
|
||||
|
||||
// Get the access level.
|
||||
$this->db->setQuery($query);
|
||||
$item = $this->db->loadObject();
|
||||
|
||||
// Set the access level.
|
||||
$temp = max($row->access, $item->cat_access);
|
||||
|
||||
// Update the item.
|
||||
$this->change((int) $row->id, 'access', $temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data on published state changes
|
||||
*
|
||||
* @param array $pks A list of primary key ids of the content that has changed state.
|
||||
* @param integer $value The value of the state that the content has been changed to.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function itemStateChange($pks, $value)
|
||||
{
|
||||
/*
|
||||
* The item's published state is tied to the category
|
||||
* published state so we need to look up all published states
|
||||
* before we change anything.
|
||||
*/
|
||||
foreach ($pks as $pk) {
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('a.id = ' . (int) $pk);
|
||||
|
||||
// Get the published states.
|
||||
$this->db->setQuery($query);
|
||||
$item = $this->db->loadObject();
|
||||
|
||||
// Translate the state.
|
||||
$temp = $this->translateState($value, $item->cat_state);
|
||||
|
||||
// Update the item.
|
||||
$this->change($pk, 'state', $temp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data when a plugin is disabled
|
||||
*
|
||||
* @param array $pks A list of primary key ids of the content that has changed state.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function pluginDisable($pks)
|
||||
{
|
||||
// Since multiple plugins may be disabled at a time, we need to check first
|
||||
// that we're handling the appropriate one for the context
|
||||
foreach ($pks as $pk) {
|
||||
if ($this->getPluginType($pk) == strtolower($this->context)) {
|
||||
// Get all of the items to unindex them
|
||||
$query = clone $this->getStateQuery();
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadColumn();
|
||||
|
||||
// Remove each item
|
||||
foreach ($items as $item) {
|
||||
$this->remove($item);
|
||||
}
|
||||
// Stop processing plugins
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to translate the native content states into states that the
|
||||
* indexer can use.
|
||||
*
|
||||
* @param integer $item The item state.
|
||||
* @param integer $category The category state. [optional]
|
||||
*
|
||||
* @return integer The translated indexer state.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function translateState($item, $category = null)
|
||||
{
|
||||
// If category is present, factor in its states as well
|
||||
if ($category !== null && $category == 0) {
|
||||
$item = 0;
|
||||
}
|
||||
|
||||
// Translate the state
|
||||
switch ($item) {
|
||||
case 1:
|
||||
// Published items should always show up in search results
|
||||
return 1;
|
||||
|
||||
case 2:
|
||||
// Archived items should only show up when option is enabled
|
||||
if ($this->params->get('search_archived', 1) == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
default:
|
||||
// All other states should return an unpublished state
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
969
administrator/components/com_finder/src/Indexer/DebugAdapter.php
Normal file
969
administrator/components/com_finder/src/Indexer/DebugAdapter.php
Normal file
@ -0,0 +1,969 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2022 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\CMS\Plugin\CMSPlugin;
|
||||
use Joomla\CMS\Table\Table;
|
||||
use Joomla\Database\DatabaseInterface;
|
||||
use Joomla\Database\QueryInterface;
|
||||
use Joomla\Event\DispatcherInterface;
|
||||
use Joomla\Utilities\ArrayHelper;
|
||||
|
||||
/**
|
||||
* Prototype debug adapter class for the Finder indexer package.
|
||||
* THIS CLASS IS ONLY TO BE USED FOR DEBUGGING PURPOSES! DON'T
|
||||
* USE IT FOR PRODUCTIVE USE!
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @internal
|
||||
*/
|
||||
abstract class DebugAdapter extends CMSPlugin
|
||||
{
|
||||
/**
|
||||
* The context is somewhat arbitrary but it must be unique or there will be
|
||||
* conflicts when managing plugin/indexer state. A good best practice is to
|
||||
* use the plugin name suffix as the context. For example, if the plugin is
|
||||
* named 'plgFinderContent', the context could be 'Content'.
|
||||
*
|
||||
* @var string
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $context;
|
||||
|
||||
/**
|
||||
* The extension name.
|
||||
*
|
||||
* @var string
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $extension;
|
||||
|
||||
/**
|
||||
* The sublayout to use when rendering the results.
|
||||
*
|
||||
* @var string
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $layout;
|
||||
|
||||
/**
|
||||
* The mime type of the content the adapter indexes.
|
||||
*
|
||||
* @var string
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $mime;
|
||||
|
||||
/**
|
||||
* The access level of an item before save.
|
||||
*
|
||||
* @var integer
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $old_access;
|
||||
|
||||
/**
|
||||
* The access level of a category before save.
|
||||
*
|
||||
* @var integer
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $old_cataccess;
|
||||
|
||||
/**
|
||||
* The type of content the adapter indexes.
|
||||
*
|
||||
* @var string
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $type_title;
|
||||
|
||||
/**
|
||||
* The type id of the content.
|
||||
*
|
||||
* @var integer
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $type_id;
|
||||
|
||||
/**
|
||||
* The database object.
|
||||
*
|
||||
* @var DatabaseInterface
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $db;
|
||||
|
||||
/**
|
||||
* The table name.
|
||||
*
|
||||
* @var string
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $table;
|
||||
|
||||
/**
|
||||
* The indexer object.
|
||||
*
|
||||
* @var Indexer
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $indexer;
|
||||
|
||||
/**
|
||||
* The field the published state is stored in.
|
||||
*
|
||||
* @var string
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected $state_field = 'state';
|
||||
|
||||
/**
|
||||
* Method to instantiate the indexer adapter.
|
||||
*
|
||||
* @param DispatcherInterface $dispatcher The object to observe.
|
||||
* @param array $config An array that holds the plugin configuration.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public function __construct(DispatcherInterface $dispatcher, array $config)
|
||||
{
|
||||
// Call the parent constructor.
|
||||
parent::__construct($dispatcher, $config);
|
||||
|
||||
// Get the type id.
|
||||
$this->type_id = $this->getTypeId();
|
||||
|
||||
// Add the content type if it doesn't exist and is set.
|
||||
if (empty($this->type_id) && !empty($this->type_title)) {
|
||||
$this->type_id = Helper::addContentType($this->type_title, $this->mime);
|
||||
}
|
||||
|
||||
// Check for a layout override.
|
||||
if ($this->params->get('layout')) {
|
||||
$this->layout = $this->params->get('layout');
|
||||
}
|
||||
|
||||
// Get the indexer object
|
||||
$this->indexer = new Indexer($this->db);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of events this subscriber will listen to.
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public static function getSubscribedEvents(): array
|
||||
{
|
||||
return [
|
||||
'onBeforeIndex' => 'onBeforeIndex',
|
||||
'onBuildIndex' => 'onBuildIndex',
|
||||
'onFinderGarbageCollection' => 'onFinderGarbageCollection',
|
||||
'onStartIndex' => 'onStartIndex',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the adapter state and push it into the indexer.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on error.
|
||||
*/
|
||||
public function onStartIndex()
|
||||
{
|
||||
// Get the indexer state.
|
||||
$iState = Indexer::getState();
|
||||
|
||||
// Get the number of content items.
|
||||
$total = (int) $this->getContentCount();
|
||||
|
||||
// Add the content count to the total number of items.
|
||||
$iState->totalItems += $total;
|
||||
|
||||
// Populate the indexer state information for the adapter.
|
||||
$iState->pluginState[$this->context]['total'] = $total;
|
||||
$iState->pluginState[$this->context]['offset'] = 0;
|
||||
|
||||
// Set the indexer state.
|
||||
Indexer::setState($iState);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to prepare for the indexer to be run. This method will often
|
||||
* be used to include dependencies and things of that nature.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on error.
|
||||
*/
|
||||
public function onBeforeIndex()
|
||||
{
|
||||
// Get the indexer and adapter state.
|
||||
$iState = Indexer::getState();
|
||||
$aState = $iState->pluginState[$this->context];
|
||||
|
||||
// Check the progress of the indexer and the adapter.
|
||||
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Run the setup method.
|
||||
return $this->setup();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to index a batch of content items. This method can be called by
|
||||
* the indexer many times throughout the indexing process depending on how
|
||||
* much content is available for indexing. It is important to track the
|
||||
* progress correctly so we can display it to the user.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on error.
|
||||
*/
|
||||
public function onBuildIndex()
|
||||
{
|
||||
// Get the indexer and adapter state.
|
||||
$iState = Indexer::getState();
|
||||
$aState = $iState->pluginState[$this->context];
|
||||
|
||||
// Check the progress of the indexer and the adapter.
|
||||
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the batch offset and size.
|
||||
$offset = (int) $aState['offset'];
|
||||
$limit = (int) ($iState->batchSize - $iState->batchOffset);
|
||||
|
||||
// Get the content items to index.
|
||||
$items = $this->getItems($offset, $limit);
|
||||
|
||||
// Iterate through the items and index them.
|
||||
foreach ($items as $item) {
|
||||
// Index the item.
|
||||
$this->index($item);
|
||||
|
||||
// Adjust the offsets.
|
||||
$offset++;
|
||||
$iState->batchOffset++;
|
||||
$iState->totalItems--;
|
||||
}
|
||||
|
||||
// Update the indexer state.
|
||||
$aState['offset'] = $offset;
|
||||
$iState->pluginState[$this->context] = $aState;
|
||||
Indexer::setState($iState);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove outdated index entries
|
||||
*
|
||||
* @return integer
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public function onFinderGarbageCollection()
|
||||
{
|
||||
$db = $this->db;
|
||||
$type_id = $this->getTypeId();
|
||||
|
||||
$query = $db->getQuery(true);
|
||||
$subquery = $db->getQuery(true);
|
||||
$subquery->select('CONCAT(' . $db->quote($this->getUrl('', $this->extension, $this->layout)) . ', id)')
|
||||
->from($db->quoteName($this->table));
|
||||
$query->select($db->quoteName('l.link_id'))
|
||||
->from($db->quoteName('#__finder_links', 'l'))
|
||||
->where($db->quoteName('l.type_id') . ' = ' . $type_id)
|
||||
->where($db->quoteName('l.url') . ' LIKE ' . $db->quote($this->getUrl('%', $this->extension, $this->layout)))
|
||||
->where($db->quoteName('l.url') . ' NOT IN (' . $subquery . ')');
|
||||
$db->setQuery($query);
|
||||
$items = $db->loadColumn();
|
||||
|
||||
foreach ($items as $item) {
|
||||
$this->indexer->remove($item);
|
||||
}
|
||||
|
||||
return \count($items);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to change the value of a content item's property in the links
|
||||
* table. This is used to synchronize published and access states that
|
||||
* are changed when not editing an item directly.
|
||||
*
|
||||
* @param string $id The ID of the item to change.
|
||||
* @param string $property The property that is being changed.
|
||||
* @param integer $value The new value of that property.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function change($id, $property, $value)
|
||||
{
|
||||
// Check for a property we know how to handle.
|
||||
if ($property !== 'state' && $property !== 'access') {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the URL for the content id.
|
||||
$item = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
|
||||
|
||||
// Update the content items.
|
||||
$query = $this->db->getQuery(true)
|
||||
->update($this->db->quoteName('#__finder_links'))
|
||||
->set($this->db->quoteName($property) . ' = ' . (int) $value)
|
||||
->where($this->db->quoteName('url') . ' = ' . $item);
|
||||
$this->db->setQuery($query);
|
||||
$this->db->execute();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to index an item.
|
||||
*
|
||||
* @param Result $item The item to index as a Result object.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
abstract protected function index(Result $item);
|
||||
|
||||
/**
|
||||
* Method to reindex an item.
|
||||
*
|
||||
* @param integer $id The ID of the item to reindex.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function reindex($id)
|
||||
{
|
||||
// Run the setup method.
|
||||
$this->setup();
|
||||
|
||||
// Remove the old item.
|
||||
$this->remove($id, false);
|
||||
|
||||
// Get the item.
|
||||
$item = $this->getItem($id);
|
||||
|
||||
// Index the item.
|
||||
$this->index($item);
|
||||
|
||||
Taxonomy::removeOrphanNodes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove an item from the index.
|
||||
*
|
||||
* @param string $id The ID of the item to remove.
|
||||
* @param bool $removeTaxonomies Remove empty taxonomies
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function remove($id, $removeTaxonomies = true)
|
||||
{
|
||||
// Get the item's URL
|
||||
$url = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
|
||||
|
||||
// Get the link ids for the content items.
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('link_id'))
|
||||
->from($this->db->quoteName('#__finder_links'))
|
||||
->where($this->db->quoteName('url') . ' = ' . $url);
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadColumn();
|
||||
|
||||
// Check the items.
|
||||
if (empty($items)) {
|
||||
$this->getApplication()->triggerEvent('onFinderIndexAfterDelete', [$id]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Remove the items.
|
||||
foreach ($items as $item) {
|
||||
$this->indexer->remove($item, $removeTaxonomies);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to setup the adapter before indexing.
|
||||
*
|
||||
* @return boolean True on success, false on failure.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
abstract protected function setup();
|
||||
|
||||
/**
|
||||
* Method to update index data on category access level changes
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function categoryAccessChange($row)
|
||||
{
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('c.id = ' . (int) $row->id);
|
||||
|
||||
// Get the access level.
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadObjectList();
|
||||
|
||||
// Adjust the access level for each item within the category.
|
||||
foreach ($items as $item) {
|
||||
// Set the access level.
|
||||
$temp = max($item->access, $row->access);
|
||||
|
||||
// Update the item.
|
||||
$this->change((int) $item->id, 'access', $temp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data on category access level changes
|
||||
*
|
||||
* @param array $pks A list of primary key ids of the content that has changed state.
|
||||
* @param integer $value The value of the state that the content has been changed to.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function categoryStateChange($pks, $value)
|
||||
{
|
||||
/*
|
||||
* The item's published state is tied to the category
|
||||
* published state so we need to look up all published states
|
||||
* before we change anything.
|
||||
*/
|
||||
foreach ($pks as $pk) {
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('c.id = ' . (int) $pk);
|
||||
|
||||
// Get the published states.
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadObjectList();
|
||||
|
||||
// Adjust the state for each item within the category.
|
||||
foreach ($items as $item) {
|
||||
// Translate the state.
|
||||
$temp = $this->translateState($item->state, $value);
|
||||
|
||||
// Update the item.
|
||||
$this->change($item->id, 'state', $temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to check the existing access level for categories
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function checkCategoryAccess($row)
|
||||
{
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('access'))
|
||||
->from($this->db->quoteName('#__categories'))
|
||||
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
|
||||
$this->db->setQuery($query);
|
||||
|
||||
// Store the access level to determine if it changes
|
||||
$this->old_cataccess = $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to check the existing access level for items
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function checkItemAccess($row)
|
||||
{
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('access'))
|
||||
->from($this->db->quoteName($this->table))
|
||||
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
|
||||
$this->db->setQuery($query);
|
||||
|
||||
// Store the access level to determine if it changes
|
||||
$this->old_access = $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the number of content items available to index.
|
||||
*
|
||||
* @return integer The number of content items available to index.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getContentCount()
|
||||
{
|
||||
$return = 0;
|
||||
|
||||
// Get the list query.
|
||||
$query = $this->getListQuery();
|
||||
|
||||
// Check if the query is valid.
|
||||
if (empty($query)) {
|
||||
return $return;
|
||||
}
|
||||
|
||||
// Tweak the SQL query to make the total lookup faster.
|
||||
if ($query instanceof QueryInterface) {
|
||||
$query = clone $query;
|
||||
$query->clear('select')
|
||||
->select('COUNT(*)')
|
||||
->clear('order');
|
||||
}
|
||||
|
||||
// Get the total number of content items to index.
|
||||
$this->db->setQuery($query);
|
||||
|
||||
return (int) $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get a content item to index.
|
||||
*
|
||||
* @param integer $id The id of the content item.
|
||||
*
|
||||
* @return Result A Result object.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getItem($id)
|
||||
{
|
||||
// Get the list query and add the extra WHERE clause.
|
||||
$query = $this->getListQuery();
|
||||
$query->where('a.id = ' . (int) $id);
|
||||
|
||||
// Get the item to index.
|
||||
$this->db->setQuery($query);
|
||||
$item = $this->db->loadAssoc();
|
||||
|
||||
// Convert the item to a result object.
|
||||
$item = ArrayHelper::toObject((array) $item, Result::class);
|
||||
|
||||
// Set the item type.
|
||||
$item->type_id = $this->type_id;
|
||||
|
||||
// Set the item layout.
|
||||
$item->layout = $this->layout;
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get a list of content items to index.
|
||||
*
|
||||
* @param integer $offset The list offset.
|
||||
* @param integer $limit The list limit.
|
||||
* @param QueryInterface $query A QueryInterface object. [optional]
|
||||
*
|
||||
* @return Result[] An array of Result objects.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getItems($offset, $limit, $query = null)
|
||||
{
|
||||
// Get the content items to index.
|
||||
$this->db->setQuery($this->getListQuery($query)->setLimit($limit, $offset));
|
||||
$items = $this->db->loadAssocList();
|
||||
|
||||
foreach ($items as &$item) {
|
||||
$item = ArrayHelper::toObject($item, Result::class);
|
||||
|
||||
// Set the item type.
|
||||
$item->type_id = $this->type_id;
|
||||
|
||||
// Set the mime type.
|
||||
$item->mime = $this->mime;
|
||||
|
||||
// Set the item layout.
|
||||
$item->layout = $this->layout;
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the SQL query used to retrieve the list of content items.
|
||||
*
|
||||
* @param mixed $query A QueryInterface object. [optional]
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function getListQuery($query = null)
|
||||
{
|
||||
// Check if we can use the supplied SQL query.
|
||||
return $query instanceof QueryInterface ? $query : $this->db->getQuery(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the plugin type
|
||||
*
|
||||
* @param integer $id The plugin ID
|
||||
*
|
||||
* @return string The plugin type
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function getPluginType($id)
|
||||
{
|
||||
// Prepare the query
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('element'))
|
||||
->from($this->db->quoteName('#__extensions'))
|
||||
->where($this->db->quoteName('extension_id') . ' = ' . (int) $id);
|
||||
$this->db->setQuery($query);
|
||||
|
||||
return $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get a SQL query to load the published and access states for
|
||||
* an article and category.
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function getStateQuery()
|
||||
{
|
||||
$query = $this->db->getQuery(true);
|
||||
|
||||
// Item ID
|
||||
$query->select('a.id');
|
||||
|
||||
// Item and category published state
|
||||
$query->select('a.' . $this->state_field . ' AS state, c.published AS cat_state');
|
||||
|
||||
// Item and category access levels
|
||||
$query->select('a.access, c.access AS cat_access')
|
||||
->from($this->table . ' AS a')
|
||||
->join('LEFT', '#__categories AS c ON c.id = a.catid');
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the query clause for getting items to update by time.
|
||||
*
|
||||
* @param string $time The modified timestamp.
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function getUpdateQueryByTime($time)
|
||||
{
|
||||
// Build an SQL query based on the modified time.
|
||||
$query = $this->db->getQuery(true)
|
||||
->where('a.modified >= ' . $this->db->quote($time));
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the query clause for getting items to update by id.
|
||||
*
|
||||
* @param array $ids The ids to load.
|
||||
*
|
||||
* @return QueryInterface A database object.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function getUpdateQueryByIds($ids)
|
||||
{
|
||||
// Build an SQL query based on the item ids.
|
||||
$query = $this->db->getQuery(true)
|
||||
->where('a.id IN(' . implode(',', $ids) . ')');
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the type id for the adapter content.
|
||||
*
|
||||
* @return integer The numeric type id for the content.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getTypeId()
|
||||
{
|
||||
// Get the type id from the database.
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('id'))
|
||||
->from($this->db->quoteName('#__finder_types'))
|
||||
->where($this->db->quoteName('title') . ' = ' . $this->db->quote($this->type_title));
|
||||
$this->db->setQuery($query);
|
||||
|
||||
return (int) $this->db->loadResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the URL for the item. The URL is how we look up the link
|
||||
* in the Finder index.
|
||||
*
|
||||
* @param integer $id The id of the item.
|
||||
* @param string $extension The extension the category is in.
|
||||
* @param string $view The view for the URL.
|
||||
*
|
||||
* @return string The URL of the item.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function getUrl($id, $extension, $view)
|
||||
{
|
||||
return 'index.php?option=' . $extension . '&view=' . $view . '&id=' . $id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the page title of any menu item that is linked to the
|
||||
* content item, if it exists and is set.
|
||||
*
|
||||
* @param string $url The URL of the item.
|
||||
*
|
||||
* @return mixed The title on success, null if not found.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
protected function getItemMenuTitle($url)
|
||||
{
|
||||
$return = null;
|
||||
|
||||
// Set variables
|
||||
$user = $this->getApplication()->getIdentity();
|
||||
$groups = implode(',', $user->getAuthorisedViewLevels());
|
||||
|
||||
// Build a query to get the menu params.
|
||||
$query = $this->db->getQuery(true)
|
||||
->select($this->db->quoteName('params'))
|
||||
->from($this->db->quoteName('#__menu'))
|
||||
->where($this->db->quoteName('link') . ' = ' . $this->db->quote($url))
|
||||
->where($this->db->quoteName('published') . ' = 1')
|
||||
->where($this->db->quoteName('access') . ' IN (' . $groups . ')');
|
||||
|
||||
// Get the menu params from the database.
|
||||
$this->db->setQuery($query);
|
||||
$params = $this->db->loadResult();
|
||||
|
||||
// Check the results.
|
||||
if (empty($params)) {
|
||||
return $return;
|
||||
}
|
||||
|
||||
// Instantiate the params.
|
||||
$params = json_decode($params);
|
||||
|
||||
// Get the page title if it is set.
|
||||
if (isset($params->page_title) && $params->page_title) {
|
||||
$return = $params->page_title;
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data on access level changes
|
||||
*
|
||||
* @param Table $row A Table object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function itemAccessChange($row)
|
||||
{
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('a.id = ' . (int) $row->id);
|
||||
|
||||
// Get the access level.
|
||||
$this->db->setQuery($query);
|
||||
$item = $this->db->loadObject();
|
||||
|
||||
// Set the access level.
|
||||
$temp = max($row->access, $item->cat_access);
|
||||
|
||||
// Update the item.
|
||||
$this->change((int) $row->id, 'access', $temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data on published state changes
|
||||
*
|
||||
* @param array $pks A list of primary key ids of the content that has changed state.
|
||||
* @param integer $value The value of the state that the content has been changed to.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function itemStateChange($pks, $value)
|
||||
{
|
||||
/*
|
||||
* The item's published state is tied to the category
|
||||
* published state so we need to look up all published states
|
||||
* before we change anything.
|
||||
*/
|
||||
foreach ($pks as $pk) {
|
||||
$query = clone $this->getStateQuery();
|
||||
$query->where('a.id = ' . (int) $pk);
|
||||
|
||||
// Get the published states.
|
||||
$this->db->setQuery($query);
|
||||
$item = $this->db->loadObject();
|
||||
|
||||
// Translate the state.
|
||||
$temp = $this->translateState($value, $item->cat_state);
|
||||
|
||||
// Update the item.
|
||||
$this->change($pk, 'state', $temp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update index data when a plugin is disabled
|
||||
*
|
||||
* @param array $pks A list of primary key ids of the content that has changed state.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function pluginDisable($pks)
|
||||
{
|
||||
// Since multiple plugins may be disabled at a time, we need to check first
|
||||
// that we're handling the appropriate one for the context
|
||||
foreach ($pks as $pk) {
|
||||
if ($this->getPluginType($pk) == strtolower($this->context)) {
|
||||
// Get all of the items to unindex them
|
||||
$query = clone $this->getStateQuery();
|
||||
$this->db->setQuery($query);
|
||||
$items = $this->db->loadColumn();
|
||||
|
||||
// Remove each item
|
||||
foreach ($items as $item) {
|
||||
$this->remove($item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to translate the native content states into states that the
|
||||
* indexer can use.
|
||||
*
|
||||
* @param integer $item The item state.
|
||||
* @param integer $category The category state. [optional]
|
||||
*
|
||||
* @return integer The translated indexer state.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
protected function translateState($item, $category = null)
|
||||
{
|
||||
// If category is present, factor in its states as well
|
||||
if ($category !== null && $category == 0) {
|
||||
$item = 0;
|
||||
}
|
||||
|
||||
// Translate the state
|
||||
switch ($item) {
|
||||
case 1:
|
||||
case 2:
|
||||
// Published and archived items only should return a published state
|
||||
return 1;
|
||||
|
||||
default:
|
||||
// All other states should return an unpublished state
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Debug method to set the used indexer
|
||||
*
|
||||
* @param Indexer $indexer Indexer object
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public function setIndexer(Indexer $indexer)
|
||||
{
|
||||
$this->indexer = $indexer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Debug method to run a specific plugin to prepare a result object.
|
||||
* The object is then stored in the indexer object to debug further.
|
||||
*
|
||||
* @param mixed $id ID to index
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public function debug($id)
|
||||
{
|
||||
// Run the setup method.
|
||||
$this->setup();
|
||||
|
||||
// Get the item.
|
||||
$item = $this->getItem($id);
|
||||
|
||||
// Index the item.
|
||||
$this->index($item);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,44 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2022 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
/**
|
||||
* Debugging indexer class for the Finder indexer package.
|
||||
*
|
||||
* @since 5.0.0
|
||||
* @internal
|
||||
*/
|
||||
class DebugIndexer extends Indexer
|
||||
{
|
||||
/**
|
||||
* The result object from the last call to self::index()
|
||||
*
|
||||
* @var Result
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public static $item;
|
||||
|
||||
/**
|
||||
* Stub for index() in indexer class
|
||||
*
|
||||
* @param Result $item Result object to index
|
||||
* @param string $format Format to index
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public function index($item, $format = 'html')
|
||||
{
|
||||
self::$item = $item;
|
||||
}
|
||||
}
|
||||
492
administrator/components/com_finder/src/Indexer/Helper.php
Normal file
492
administrator/components/com_finder/src/Indexer/Helper.php
Normal file
@ -0,0 +1,492 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\CMS\Component\ComponentHelper;
|
||||
use Joomla\CMS\Event\Finder\PrepareContentEvent;
|
||||
use Joomla\CMS\Factory;
|
||||
use Joomla\CMS\Language\Multilanguage;
|
||||
use Joomla\CMS\Plugin\PluginHelper;
|
||||
use Joomla\CMS\Table\Table;
|
||||
use Joomla\Component\Fields\Administrator\Helper\FieldsHelper;
|
||||
use Joomla\Registry\Registry;
|
||||
use Joomla\String\StringHelper;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Helper class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
class Helper
|
||||
{
|
||||
public const CUSTOMFIELDS_DONT_INDEX = 0;
|
||||
public const CUSTOMFIELDS_ADD_TO_INDEX = 1;
|
||||
public const CUSTOMFIELDS_ADD_TO_TAXONOMY = 2;
|
||||
public const CUSTOMFIELDS_ADD_TO_BOTH = 3;
|
||||
|
||||
/**
|
||||
* Method to parse input into plain text.
|
||||
*
|
||||
* @param string $input The raw input.
|
||||
* @param string $format The format of the input. [optional]
|
||||
*
|
||||
* @return string The parsed input.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on invalid parser.
|
||||
*/
|
||||
public static function parse($input, $format = 'html')
|
||||
{
|
||||
// Get a parser for the specified format and parse the input.
|
||||
return Parser::getInstance($format)->parse($input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to tokenize a text string.
|
||||
*
|
||||
* @param string $input The input to tokenize.
|
||||
* @param string $lang The language of the input.
|
||||
* @param boolean $phrase Flag to indicate whether input could be a phrase. [optional]
|
||||
*
|
||||
* @return Token[] An array of Token objects.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public static function tokenize($input, $lang, $phrase = false)
|
||||
{
|
||||
static $cache = [], $tuplecount;
|
||||
static $multilingual;
|
||||
static $defaultLanguage;
|
||||
|
||||
if (!$tuplecount) {
|
||||
$params = ComponentHelper::getParams('com_finder');
|
||||
$tuplecount = $params->get('tuplecount', 1);
|
||||
}
|
||||
|
||||
if (\is_null($multilingual)) {
|
||||
$multilingual = Multilanguage::isEnabled();
|
||||
$config = ComponentHelper::getParams('com_finder');
|
||||
|
||||
if ($config->get('language_default', '') == '') {
|
||||
$defaultLang = '*';
|
||||
} elseif ($config->get('language_default', '') == '-1') {
|
||||
$defaultLang = self::getDefaultLanguage();
|
||||
} else {
|
||||
$defaultLang = $config->get('language_default');
|
||||
}
|
||||
|
||||
/*
|
||||
* The default language always has the language code '*'.
|
||||
* In order to not overwrite the language code of the language
|
||||
* object that we are using, we are cloning it here.
|
||||
*/
|
||||
$obj = Language::getInstance($defaultLang);
|
||||
$defaultLanguage = clone $obj;
|
||||
$defaultLanguage->language = '*';
|
||||
}
|
||||
|
||||
if (!$multilingual || $lang == '*') {
|
||||
$language = $defaultLanguage;
|
||||
} else {
|
||||
$language = Language::getInstance($lang);
|
||||
}
|
||||
|
||||
if (!isset($cache[$lang])) {
|
||||
$cache[$lang] = [];
|
||||
}
|
||||
|
||||
$tokens = [];
|
||||
$terms = $language->tokenise($input);
|
||||
|
||||
// @todo: array_filter removes any number 0's from the terms. Not sure this is entirely intended
|
||||
$terms = array_filter($terms);
|
||||
$terms = array_values($terms);
|
||||
|
||||
/*
|
||||
* If we have to handle the input as a phrase, that means we don't
|
||||
* tokenize the individual terms and we do not create the two and three
|
||||
* term combinations. The phrase must contain more than one word!
|
||||
*/
|
||||
if ($phrase === true && \count($terms) > 1) {
|
||||
// Create tokens from the phrase.
|
||||
$tokens[] = new Token($terms, $language->language, $language->spacer);
|
||||
} else {
|
||||
// Create tokens from the terms.
|
||||
for ($i = 0, $n = \count($terms); $i < $n; $i++) {
|
||||
if (isset($cache[$lang][$terms[$i]])) {
|
||||
$tokens[] = $cache[$lang][$terms[$i]];
|
||||
} else {
|
||||
$token = new Token($terms[$i], $language->language);
|
||||
$tokens[] = $token;
|
||||
$cache[$lang][$terms[$i]] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// Create multi-word phrase tokens from the individual words.
|
||||
if ($tuplecount > 1) {
|
||||
for ($i = 0, $n = \count($tokens); $i < $n; $i++) {
|
||||
$temp = [$tokens[$i]->term];
|
||||
|
||||
// Create tokens for 2 to $tuplecount length phrases
|
||||
for ($j = 1; $j < $tuplecount; $j++) {
|
||||
if ($i + $j >= $n || !isset($tokens[$i + $j])) {
|
||||
break;
|
||||
}
|
||||
|
||||
$temp[] = $tokens[$i + $j]->term;
|
||||
$key = implode('::', $temp);
|
||||
|
||||
if (isset($cache[$lang][$key])) {
|
||||
$tokens[] = $cache[$lang][$key];
|
||||
} else {
|
||||
$token = new Token($temp, $language->language, $language->spacer);
|
||||
$token->derived = true;
|
||||
$tokens[] = $token;
|
||||
$cache[$lang][$key] = $token;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prevent the cache to fill up the memory
|
||||
while (\count($cache[$lang]) > 1024) {
|
||||
/**
|
||||
* We want to cache the most common words/tokens. At the same time
|
||||
* we don't want to cache too much. The most common words will also
|
||||
* be early in the text, so we are dropping all terms/tokens which
|
||||
* have been cached later.
|
||||
*/
|
||||
array_pop($cache[$lang]);
|
||||
}
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the base word of a token.
|
||||
*
|
||||
* @param string $token The token to stem.
|
||||
* @param string $lang The language of the token.
|
||||
*
|
||||
* @return string The root token.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public static function stem($token, $lang)
|
||||
{
|
||||
static $multilingual;
|
||||
static $defaultStemmer;
|
||||
|
||||
if (\is_null($multilingual)) {
|
||||
$multilingual = Multilanguage::isEnabled();
|
||||
$config = ComponentHelper::getParams('com_finder');
|
||||
|
||||
if ($config->get('language_default', '') == '') {
|
||||
$defaultStemmer = Language::getInstance('*');
|
||||
} elseif ($config->get('language_default', '') == '-1') {
|
||||
$defaultStemmer = Language::getInstance(self::getDefaultLanguage());
|
||||
} else {
|
||||
$defaultStemmer = Language::getInstance($config->get('language_default'));
|
||||
}
|
||||
}
|
||||
|
||||
if (!$multilingual || $lang == '*') {
|
||||
$language = $defaultStemmer;
|
||||
} else {
|
||||
$language = Language::getInstance($lang);
|
||||
}
|
||||
|
||||
return $language->stem($token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add a content type to the database.
|
||||
*
|
||||
* @param string $title The type of content. For example: PDF
|
||||
* @param string $mime The mime type of the content. For example: PDF [optional]
|
||||
*
|
||||
* @return integer The id of the content type.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
public static function addContentType($title, $mime = null)
|
||||
{
|
||||
static $types;
|
||||
|
||||
$db = Factory::getDbo();
|
||||
$query = $db->getQuery(true);
|
||||
|
||||
// Check if the types are loaded.
|
||||
if (empty($types)) {
|
||||
// Build the query to get the types.
|
||||
$query->select('*')
|
||||
->from($db->quoteName('#__finder_types'));
|
||||
|
||||
// Get the types.
|
||||
$db->setQuery($query);
|
||||
$types = $db->loadObjectList('title');
|
||||
}
|
||||
|
||||
// Check if the type already exists.
|
||||
if (isset($types[$title])) {
|
||||
return (int) $types[$title]->id;
|
||||
}
|
||||
|
||||
// Add the type.
|
||||
$query->clear()
|
||||
->insert($db->quoteName('#__finder_types'))
|
||||
->columns([$db->quoteName('title'), $db->quoteName('mime')])
|
||||
->values($db->quote($title) . ', ' . $db->quote($mime ?? ''));
|
||||
$db->setQuery($query);
|
||||
$db->execute();
|
||||
|
||||
// Cache the result
|
||||
$type = new \stdClass();
|
||||
$type->title = $title;
|
||||
$type->mime = $mime ?? '';
|
||||
$type->id = (int) $db->insertid();
|
||||
|
||||
$types[$title] = $type;
|
||||
|
||||
// Return the new id.
|
||||
return $type->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to check if a token is common in a language.
|
||||
*
|
||||
* @param string $token The token to test.
|
||||
* @param string $lang The language to reference.
|
||||
*
|
||||
* @return boolean True if common, false otherwise.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public static function isCommon($token, $lang)
|
||||
{
|
||||
static $data = [], $default, $multilingual;
|
||||
|
||||
if (\is_null($multilingual)) {
|
||||
$multilingual = Multilanguage::isEnabled();
|
||||
$config = ComponentHelper::getParams('com_finder');
|
||||
|
||||
if ($config->get('language_default', '') == '') {
|
||||
$default = '*';
|
||||
} elseif ($config->get('language_default', '') == '-1') {
|
||||
$default = self::getPrimaryLanguage(self::getDefaultLanguage());
|
||||
} else {
|
||||
$default = self::getPrimaryLanguage($config->get('language_default'));
|
||||
}
|
||||
}
|
||||
|
||||
if (!$multilingual || $lang == '*') {
|
||||
$lang = $default;
|
||||
}
|
||||
|
||||
// Load the common tokens for the language if necessary.
|
||||
if (!isset($data[$lang])) {
|
||||
$data[$lang] = self::getCommonWords($lang);
|
||||
}
|
||||
|
||||
// Check if the token is in the common array.
|
||||
return \in_array($token, $data[$lang], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get an array of common terms for a language.
|
||||
*
|
||||
* @param string $lang The language to use.
|
||||
*
|
||||
* @return array Array of common terms.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
public static function getCommonWords($lang)
|
||||
{
|
||||
$db = Factory::getDbo();
|
||||
|
||||
// Create the query to load all the common terms for the language.
|
||||
$query = $db->getQuery(true)
|
||||
->select($db->quoteName('term'))
|
||||
->from($db->quoteName('#__finder_terms_common'))
|
||||
->where($db->quoteName('language') . ' = ' . $db->quote($lang));
|
||||
|
||||
// Load all of the common terms for the language.
|
||||
$db->setQuery($query);
|
||||
|
||||
return $db->loadColumn();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the default language for the site.
|
||||
*
|
||||
* @return string The default language string.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public static function getDefaultLanguage()
|
||||
{
|
||||
static $lang;
|
||||
|
||||
// We need to go to com_languages to get the site default language, it's the best we can guess.
|
||||
if (empty($lang)) {
|
||||
$lang = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
|
||||
}
|
||||
|
||||
return $lang;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to parse a language/locale key and return a simple language string.
|
||||
*
|
||||
* @param string $lang The language/locale key. For example: en-GB
|
||||
*
|
||||
* @return string The simple language string. For example: en
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public static function getPrimaryLanguage($lang)
|
||||
{
|
||||
static $data = [];
|
||||
|
||||
// Only parse the identifier if necessary.
|
||||
if (!isset($data[$lang])) {
|
||||
if (\is_callable(['Locale', 'getPrimaryLanguage'])) {
|
||||
// Get the language key using the Locale package.
|
||||
$data[$lang] = \Locale::getPrimaryLanguage($lang);
|
||||
} else {
|
||||
// Get the language key using string position.
|
||||
$data[$lang] = StringHelper::substr($lang, 0, StringHelper::strpos($lang, '-'));
|
||||
}
|
||||
}
|
||||
|
||||
return $data[$lang];
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get extra data for a content before being indexed. This is how
|
||||
* we add Comments, Tags, Labels, etc. that should be available to Finder.
|
||||
*
|
||||
* @param Result $item The item to index as a Result object.
|
||||
*
|
||||
* @return boolean True on success, false on failure.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on database error.
|
||||
*/
|
||||
public static function getContentExtras(Result $item)
|
||||
{
|
||||
$dispatcher = Factory::getApplication()->getDispatcher();
|
||||
|
||||
// Load the finder plugin group.
|
||||
PluginHelper::importPlugin('finder', null, true, $dispatcher);
|
||||
|
||||
$dispatcher->dispatch('onPrepareFinderContent', new PrepareContentEvent('onPrepareFinderContent', [
|
||||
'subject' => $item,
|
||||
]));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add custom fields for the item to the Result object
|
||||
*
|
||||
* @param Result $item Result object to add the custom fields to
|
||||
* @param string $context Context of the item in the custom fields
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public static function addCustomFields(Result $item, $context)
|
||||
{
|
||||
if (!ComponentHelper::getParams(strstr($context, '.', true))->get('custom_fields_enable', 1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$obj = new \stdClass();
|
||||
$obj->id = $item->id;
|
||||
|
||||
$fields = FieldsHelper::getFields($context, $obj, true);
|
||||
|
||||
foreach ($fields as $field) {
|
||||
$searchindex = $field->params->get('searchindex', 0);
|
||||
|
||||
// We want to add this field to the search index
|
||||
if ($searchindex == self::CUSTOMFIELDS_ADD_TO_INDEX || $searchindex == self::CUSTOMFIELDS_ADD_TO_BOTH) {
|
||||
$name = 'jsfield_' . $field->name;
|
||||
$item->$name = $field->value;
|
||||
$item->addInstruction(Indexer::META_CONTEXT, $name);
|
||||
}
|
||||
|
||||
// We want to add this field as a taxonomy
|
||||
if (
|
||||
($searchindex == self::CUSTOMFIELDS_ADD_TO_TAXONOMY || $searchindex == self::CUSTOMFIELDS_ADD_TO_BOTH)
|
||||
&& $field->value
|
||||
) {
|
||||
$item->addTaxonomy($field->title, $field->value, $field->state, $field->access, $field->language);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to process content text using the onContentPrepare event trigger.
|
||||
*
|
||||
* @param string $text The content to process.
|
||||
* @param Registry $params The parameters object. [optional]
|
||||
* @param ?Result $item The item which get prepared. [optional]
|
||||
*
|
||||
* @return string The processed content.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public static function prepareContent($text, $params = null, ?Result $item = null)
|
||||
{
|
||||
static $loaded;
|
||||
|
||||
// Load the content plugins if necessary.
|
||||
if (empty($loaded)) {
|
||||
PluginHelper::importPlugin('content');
|
||||
$loaded = true;
|
||||
}
|
||||
|
||||
// Instantiate the parameter object if necessary.
|
||||
if (!($params instanceof Registry)) {
|
||||
$registry = new Registry($params);
|
||||
$params = $registry;
|
||||
}
|
||||
|
||||
// Create a mock content object.
|
||||
$content = Table::getInstance('Content');
|
||||
$content->text = $text;
|
||||
|
||||
if ($item) {
|
||||
$content->bind((array) $item);
|
||||
$content->bind($item->getElements());
|
||||
}
|
||||
|
||||
if ($item && !empty($item->context)) {
|
||||
$content->context = $item->context;
|
||||
}
|
||||
|
||||
// Fire the onContentPrepare event.
|
||||
Factory::getApplication()->triggerEvent('onContentPrepare', ['com_finder.indexer', &$content, &$params, 0]);
|
||||
|
||||
return $content->text;
|
||||
}
|
||||
}
|
||||
1023
administrator/components/com_finder/src/Indexer/Indexer.php
Normal file
1023
administrator/components/com_finder/src/Indexer/Indexer.php
Normal file
File diff suppressed because it is too large
Load Diff
182
administrator/components/com_finder/src/Indexer/Language.php
Normal file
182
administrator/components/com_finder/src/Indexer/Language.php
Normal file
@ -0,0 +1,182 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2018 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\String\StringHelper;
|
||||
use Wamania\Snowball\NotFoundException;
|
||||
use Wamania\Snowball\Stemmer\Stemmer;
|
||||
use Wamania\Snowball\StemmerFactory;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Language support class for the Finder indexer package.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
class Language
|
||||
{
|
||||
/**
|
||||
* Language support instances container.
|
||||
*
|
||||
* @var Language[]
|
||||
* @since 4.0.0
|
||||
*/
|
||||
protected static $instances = [];
|
||||
|
||||
/**
|
||||
* Language locale of the class
|
||||
*
|
||||
* @var string
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public $language;
|
||||
|
||||
/**
|
||||
* Spacer to use between terms
|
||||
*
|
||||
* @var string
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public $spacer = ' ';
|
||||
|
||||
/**
|
||||
* The stemmer object.
|
||||
*
|
||||
* @var Stemmer
|
||||
* @since 4.0.0
|
||||
*/
|
||||
protected $stemmer = null;
|
||||
|
||||
/**
|
||||
* Method to construct the language object.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function __construct($locale = null)
|
||||
{
|
||||
if ($locale !== null) {
|
||||
$this->language = $locale;
|
||||
}
|
||||
|
||||
// Use our generic language handler if no language is set
|
||||
if ($this->language === null) {
|
||||
$this->language = '*';
|
||||
}
|
||||
|
||||
try {
|
||||
foreach (StemmerFactory::LANGS as $classname => $isoCodes) {
|
||||
if (\in_array($this->language, $isoCodes)) {
|
||||
$this->stemmer = StemmerFactory::create($this->language);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (NotFoundException $e) {
|
||||
// We don't have a stemmer for the language
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get a language support object.
|
||||
*
|
||||
* @param string $language The language of the support object.
|
||||
*
|
||||
* @return Language A Language instance.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public static function getInstance($language)
|
||||
{
|
||||
if (isset(self::$instances[$language])) {
|
||||
return self::$instances[$language];
|
||||
}
|
||||
|
||||
$locale = '*';
|
||||
|
||||
if ($language !== '*') {
|
||||
$locale = Helper::getPrimaryLanguage($language);
|
||||
$class = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Language\\' . ucfirst($locale);
|
||||
|
||||
if (class_exists($class)) {
|
||||
self::$instances[$language] = new $class();
|
||||
|
||||
return self::$instances[$language];
|
||||
}
|
||||
}
|
||||
|
||||
self::$instances[$language] = new self($locale);
|
||||
|
||||
return self::$instances[$language];
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to tokenise a text string.
|
||||
*
|
||||
* @param string $input The input to tokenise.
|
||||
*
|
||||
* @return array An array of term strings.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function tokenise($input)
|
||||
{
|
||||
$quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8');
|
||||
|
||||
/*
|
||||
* Parsing the string input into terms is a multi-step process.
|
||||
*
|
||||
* Regexes:
|
||||
* 1. Remove everything except letters, numbers, quotes, apostrophe, plus, dash, period, and comma.
|
||||
* 2. Remove plus, dash, and comma characters located before letter characters.
|
||||
* 3. Remove plus, dash, period, and comma characters located after other characters.
|
||||
* 4. Remove plus, period, and comma characters enclosed in alphabetical characters. Ungreedy.
|
||||
* 5. Remove orphaned apostrophe, plus, dash, period, and comma characters.
|
||||
* 6. Remove orphaned quote characters.
|
||||
* 7. Replace the assorted single quotation marks with the ASCII standard single quotation.
|
||||
* 8. Remove multiple space characters and replaces with a single space.
|
||||
*/
|
||||
$input = StringHelper::strtolower($input);
|
||||
$input = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $input);
|
||||
$input = preg_replace('#(^|\s)[+-,]+([\pL\pM]+)#mui', ' $1', $input);
|
||||
$input = preg_replace('#([\pL\pM\pN]+)[+-.,]+(\s|$)#mui', '$1 ', $input);
|
||||
$input = preg_replace('#([\pL\pM]+)[+.,]+([\pL\pM]+)#muiU', '$1 $2', $input);
|
||||
$input = preg_replace('#(^|\s)[\'+-.,]+(\s|$)#mui', ' ', $input);
|
||||
$input = preg_replace('#(^|\s)[\p{Pi}\p{Pf}]+(\s|$)#mui', ' ', $input);
|
||||
$input = preg_replace('#[' . $quotes . ']+#mui', '\'', $input);
|
||||
$input = preg_replace('#\s+#mui', ' ', $input);
|
||||
$input = trim($input);
|
||||
|
||||
// Explode the normalized string to get the terms.
|
||||
$terms = explode(' ', $input);
|
||||
|
||||
return $terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to stem a token.
|
||||
*
|
||||
* @param string $token The token to stem.
|
||||
*
|
||||
* @return string The stemmed token.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function stem($token)
|
||||
{
|
||||
if ($this->stemmer !== null) {
|
||||
return $this->stemmer->stem($token);
|
||||
}
|
||||
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
934
administrator/components/com_finder/src/Indexer/Language/El.php
Normal file
934
administrator/components/com_finder/src/Indexer/Language/El.php
Normal file
@ -0,0 +1,934 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2018 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*
|
||||
* The Greek stemmer was adapted for Joomla! 4 by Nicholas K. Dionysopoulos <nicholas@akeebabackup.com>. This is
|
||||
* derivative work, based on the Greek stemmer for Drupal, see
|
||||
* https://github.com/magaras/greek_stemmer/blob/master/mod_stemmer.php
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer\Language;
|
||||
|
||||
use Joomla\Component\Finder\Administrator\Indexer\Language;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Greek language support class for the Finder indexer package.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
class El extends Language
|
||||
{
|
||||
/**
|
||||
* Language locale of the class
|
||||
*
|
||||
* @var string
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public $language = 'el';
|
||||
|
||||
/**
|
||||
* Method to construct the language object.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function __construct($locale = null)
|
||||
{
|
||||
// Override parent constructor since we don't need to load an external stemmer
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to tokenise a text string. It takes into account the odd punctuation commonly used in Greek text, mapping
|
||||
* it to ASCII punctuation.
|
||||
*
|
||||
* Reference: http://www.teicrete.gr/users/kutrulis/Glosika/Stixi.htm
|
||||
*
|
||||
* @param string $input The input to tokenise.
|
||||
*
|
||||
* @return array An array of term strings.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function tokenise($input)
|
||||
{
|
||||
// Replace Greek calligraphic double quotes (various styles) to dumb double quotes
|
||||
$input = str_replace(['“', '”', '„', '«' ,'»'], '"', $input);
|
||||
|
||||
// Replace Greek calligraphic single quotes (various styles) to dumb single quotes
|
||||
$input = str_replace(['‘','’','‚'], "'", $input);
|
||||
|
||||
// Replace the middle dot (ano teleia) with a comma, adequate for the purpose of stemming
|
||||
$input = str_replace('·', ',', $input);
|
||||
|
||||
// Dot and dash (τελεία και παύλα), used to denote the end of a context at the end of a paragraph.
|
||||
$input = str_replace('.–', '.', $input);
|
||||
|
||||
// Ellipsis, two styles (separate dots or single glyph)
|
||||
$input = str_replace(['...', '…'], '.', $input);
|
||||
|
||||
// Cross. Marks the death date of a person. Removed.
|
||||
$input = str_replace('†', '', $input);
|
||||
|
||||
// Star. Reference, supposition word (in philology), birth date of a person.
|
||||
$input = str_replace('*', '', $input);
|
||||
|
||||
// Paragraph. Indicates change of subject.
|
||||
$input = str_replace('§', '.', $input);
|
||||
|
||||
// Plus/minus. Shows approximation. Not relevant for the stemmer, hence its conversion to a space.
|
||||
$input = str_replace('±', ' ', $input);
|
||||
|
||||
return parent::tokenise($input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to stem a token.
|
||||
*
|
||||
* @param string $token The token to stem.
|
||||
*
|
||||
* @return string The stemmed token.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function stem($token)
|
||||
{
|
||||
$token = $this->toUpperCase($token, $wCase);
|
||||
|
||||
// Stop-word removal
|
||||
$stop_words = '/^(ΕΚΟ|ΑΒΑ|ΑΓΑ|ΑΓΗ|ΑΓΩ|ΑΔΗ|ΑΔΩ|ΑΕ|ΑΕΙ|ΑΘΩ|ΑΙ|ΑΙΚ|ΑΚΗ|ΑΚΟΜΑ|ΑΚΟΜΗ|ΑΚΡΙΒΩΣ|ΑΛΑ|ΑΛΗΘΕΙΑ|ΑΛΗΘΙΝΑ|ΑΛΛΑΧΟΥ|ΑΛΛΙΩΣ|ΑΛΛΙΩΤΙΚΑ|'
|
||||
. 'ΑΛΛΟΙΩΣ|ΑΛΛΟΙΩΤΙΚΑ|ΑΛΛΟΤΕ|ΑΛΤ|ΑΛΩ|ΑΜΑ|ΑΜΕ|ΑΜΕΣΑ|ΑΜΕΣΩΣ|ΑΜΩ|ΑΝ|ΑΝΑ|ΑΝΑΜΕΣΑ|ΑΝΑΜΕΤΑΞΥ|ΑΝΕΥ|ΑΝΤΙ|ΑΝΤΙΠΕΡΑ|ΑΝΤΙΣ|ΑΝΩ|ΑΝΩΤΕΡΩ|ΑΞΑΦΝΑ|'
|
||||
. 'ΑΠ|ΑΠΕΝΑΝΤΙ|ΑΠΟ|ΑΠΟΨΕ|ΑΠΩ|ΑΡΑ|ΑΡΑΓΕ|ΑΡΕ|ΑΡΚ|ΑΡΚΕΤΑ|ΑΡΛ|ΑΡΜ|ΑΡΤ|ΑΡΥ|ΑΡΩ|ΑΣ|ΑΣΑ|ΑΣΟ|ΑΤΑ|ΑΤΕ|ΑΤΗ|ΑΤΙ|ΑΤΜ|ΑΤΟ|ΑΥΡΙΟ|ΑΦΗ|ΑΦΟΤΟΥ|ΑΦΟΥ|'
|
||||
. 'ΑΧ|ΑΧΕ|ΑΧΟ|ΑΨΑ|ΑΨΕ|ΑΨΗ|ΑΨΥ|ΑΩΕ|ΑΩΟ|ΒΑΝ|ΒΑΤ|ΒΑΧ|ΒΕΑ|ΒΕΒΑΙΟΤΑΤΑ|ΒΗΞ|ΒΙΑ|ΒΙΕ|ΒΙΗ|ΒΙΟ|ΒΟΗ|ΒΟΩ|ΒΡΕ|ΓΑ|ΓΑΒ|ΓΑΡ|ΓΕΝ|ΓΕΣ||ΓΗ|ΓΗΝ|ΓΙ|ΓΙΑ|'
|
||||
. 'ΓΙΕ|ΓΙΝ|ΓΙΟ|ΓΚΙ|ΓΙΑΤΙ|ΓΚΥ|ΓΟΗ|ΓΟΟ|ΓΡΗΓΟΡΑ|ΓΡΙ|ΓΡΥ|ΓΥΗ|ΓΥΡΩ|ΔΑ|ΔΕ|ΔΕΗ|ΔΕΙ|ΔΕΝ|ΔΕΣ|ΔΗ|ΔΗΘΕΝ|ΔΗΛΑΔΗ|ΔΗΩ|ΔΙ|ΔΙΑ|ΔΙΑΡΚΩΣ|ΔΙΟΛΟΥ|ΔΙΣ|'
|
||||
. 'ΔΙΧΩΣ|ΔΟΛ|ΔΟΝ|ΔΡΑ|ΔΡΥ|ΔΡΧ|ΔΥΕ|ΔΥΟ|ΔΩ|ΕΑΜ|ΕΑΝ|ΕΑΡ|ΕΘΗ|ΕΙ|ΕΙΔΕΜΗ|ΕΙΘΕ|ΕΙΜΑΙ|ΕΙΜΑΣΤΕ|ΕΙΝΑΙ|ΕΙΣ|ΕΙΣΑΙ|ΕΙΣΑΣΤΕ|ΕΙΣΤΕ|ΕΙΤΕ|ΕΙΧΑ|ΕΙΧΑΜΕ|'
|
||||
. 'ΕΙΧΑΝ|ΕΙΧΑΤΕ|ΕΙΧΕ|ΕΙΧΕΣ|ΕΚ|ΕΚΕΙ|ΕΛΑ|ΕΛΙ|ΕΜΠ|ΕΝ|ΕΝΤΕΛΩΣ|ΕΝΤΟΣ|ΕΝΤΩΜΕΤΑΞΥ|ΕΝΩ|ΕΞ|ΕΞΑΦΝΑ|ΕΞΙ|ΕΞΙΣΟΥ|ΕΞΩ|ΕΟΚ|ΕΠΑΝΩ|ΕΠΕΙΔΗ|ΕΠΕΙΤΑ|ΕΠΗ|'
|
||||
. 'ΕΠΙ|ΕΠΙΣΗΣ|ΕΠΟΜΕΝΩΣ|ΕΡΑ|ΕΣ|ΕΣΑΣ|ΕΣΕ|ΕΣΕΙΣ|ΕΣΕΝΑ|ΕΣΗ|ΕΣΤΩ|ΕΣΥ|ΕΣΩ|ΕΤΙ|ΕΤΣΙ|ΕΥ|ΕΥΑ|ΕΥΓΕ|ΕΥΘΥΣ|ΕΥΤΥΧΩΣ|ΕΦΕ|ΕΦΕΞΗΣ|ΕΦΤ|ΕΧΕ|ΕΧΕΙ|'
|
||||
. 'ΕΧΕΙΣ|ΕΧΕΤΕ|ΕΧΘΕΣ|ΕΧΟΜΕ|ΕΧΟΥΜΕ|ΕΧΟΥΝ|ΕΧΤΕΣ|ΕΧΩ|ΕΩΣ|ΖΕΑ|ΖΕΗ|ΖΕΙ|ΖΕΝ|ΖΗΝ|ΖΩ|Η|ΗΔΗ|ΗΔΥ|ΗΘΗ|ΗΛΟ|ΗΜΙ|ΗΠΑ|ΗΣΑΣΤΕ|ΗΣΟΥΝ|ΗΤΑ|ΗΤΑΝ|ΗΤΑΝΕ|'
|
||||
. 'ΗΤΟΙ|ΗΤΤΟΝ|ΗΩ|ΘΑ|ΘΥΕ|ΘΩΡ|Ι|ΙΑ|ΙΒΟ|ΙΔΗ|ΙΔΙΩΣ|ΙΕ|ΙΙ|ΙΙΙ|ΙΚΑ|ΙΛΟ|ΙΜΑ|ΙΝΑ|ΙΝΩ|ΙΞΕ|ΙΞΟ|ΙΟ|ΙΟΙ|ΙΣΑ|ΙΣΑΜΕ|ΙΣΕ|ΙΣΗ|ΙΣΙΑ|ΙΣΟ|ΙΣΩΣ|ΙΩΒ|ΙΩΝ|'
|
||||
. 'ΙΩΣ|ΙΑΝ|ΚΑΘ|ΚΑΘΕ|ΚΑΘΕΤΙ|ΚΑΘΟΛΟΥ|ΚΑΘΩΣ|ΚΑΙ|ΚΑΝ|ΚΑΠΟΤΕ|ΚΑΠΟΥ|ΚΑΠΩΣ|ΚΑΤ|ΚΑΤΑ|ΚΑΤΙ|ΚΑΤΙΤΙ|ΚΑΤΟΠΙΝ|ΚΑΤΩ|ΚΑΩ|ΚΒΟ|ΚΕΑ|ΚΕΙ|ΚΕΝ|ΚΙ|ΚΙΜ|'
|
||||
. 'ΚΙΟΛΑΣ|ΚΙΤ|ΚΙΧ|ΚΚΕ|ΚΛΙΣΕ|ΚΛΠ|ΚΟΚ|ΚΟΝΤΑ|ΚΟΧ|ΚΤΛ|ΚΥΡ|ΚΥΡΙΩΣ|ΚΩ|ΚΩΝ|ΛΑ|ΛΕΑ|ΛΕΝ|ΛΕΟ|ΛΙΑ|ΛΙΓΑΚΙ|ΛΙΓΟΥΛΑΚΙ|ΛΙΓΟ|ΛΙΓΩΤΕΡΟ|ΛΙΟ|ΛΙΡ|ΛΟΓΩ|'
|
||||
. 'ΛΟΙΠΑ|ΛΟΙΠΟΝ|ΛΟΣ|ΛΣ|ΛΥΩ|ΜΑ|ΜΑΖΙ|ΜΑΚΑΡΙ|ΜΑΛΙΣΤΑ|ΜΑΛΛΟΝ|ΜΑΝ|ΜΑΞ|ΜΑΣ|ΜΑΤ|ΜΕ|ΜΕΘΑΥΡΙΟ|ΜΕΙ|ΜΕΙΟΝ|ΜΕΛ|ΜΕΛΕΙ|ΜΕΛΛΕΤΑΙ|ΜΕΜΙΑΣ|ΜΕΝ|ΜΕΣ|'
|
||||
. 'ΜΕΣΑ|ΜΕΤ|ΜΕΤΑ|ΜΕΤΑΞΥ|ΜΕΧΡΙ|ΜΗ|ΜΗΔΕ|ΜΗΝ|ΜΗΠΩΣ|ΜΗΤΕ|ΜΙ|ΜΙΞ|ΜΙΣ|ΜΜΕ|ΜΝΑ|ΜΟΒ|ΜΟΛΙΣ|ΜΟΛΟΝΟΤΙ|ΜΟΝΑΧΑ|ΜΟΝΟΜΙΑΣ|ΜΙΑ|ΜΟΥ|ΜΠΑ|ΜΠΟΡΕΙ|'
|
||||
. 'ΜΠΟΡΟΥΝ|ΜΠΡΑΒΟ|ΜΠΡΟΣ|ΜΠΩ|ΜΥ|ΜΥΑ|ΜΥΝ|ΝΑ|ΝΑΕ|ΝΑΙ|ΝΑΟ|ΝΔ|ΝΕΐ|ΝΕΑ|ΝΕΕ|ΝΕΟ|ΝΙ|ΝΙΑ|ΝΙΚ|ΝΙΛ|ΝΙΝ|ΝΙΟ|ΝΤΑ|ΝΤΕ|ΝΤΙ|ΝΤΟ|ΝΥΝ|ΝΩΕ|ΝΩΡΙΣ|ΞΑΝΑ|'
|
||||
. 'ΞΑΦΝΙΚΑ|ΞΕΩ|ΞΙ|Ο|ΟΑ|ΟΑΠ|ΟΔΟ|ΟΕ|ΟΖΟ|ΟΗΕ|ΟΙ|ΟΙΑ|ΟΙΗ|ΟΚΑ|ΟΛΟΓΥΡΑ|ΟΛΟΝΕΝ|ΟΛΟΤΕΛΑ|ΟΛΩΣΔΙΟΛΟΥ|ΟΜΩΣ|ΟΝ|ΟΝΕ|ΟΝΟ|ΟΠΑ|ΟΠΕ|ΟΠΗ|ΟΠΟ|'
|
||||
. 'ΟΠΟΙΑΔΗΠΟΤΕ|ΟΠΟΙΑΝΔΗΠΟΤΕ|ΟΠΟΙΑΣΔΗΠΟΤΕ|ΟΠΟΙΔΗΠΟΤΕ|ΟΠΟΙΕΣΔΗΠΟΤΕ|ΟΠΟΙΟΔΗΠΟΤΕ|ΟΠΟΙΟΝΔΗΠΟΤΕ|ΟΠΟΙΟΣΔΗΠΟΤΕ|ΟΠΟΙΟΥΔΗΠΟΤΕ|ΟΠΟΙΟΥΣΔΗΠΟΤΕ|'
|
||||
. 'ΟΠΟΙΩΝΔΗΠΟΤΕ|ΟΠΟΤΕΔΗΠΟΤΕ|ΟΠΟΥ|ΟΠΟΥΔΗΠΟΤΕ|ΟΠΩΣ|ΟΡΑ|ΟΡΕ|ΟΡΗ|ΟΡΟ|ΟΡΦ|ΟΡΩ|ΟΣΑ|ΟΣΑΔΗΠΟΤΕ|ΟΣΕ|ΟΣΕΣΔΗΠΟΤΕ|ΟΣΗΔΗΠΟΤΕ|ΟΣΗΝΔΗΠΟΤΕ|'
|
||||
. 'ΟΣΗΣΔΗΠΟΤΕ|ΟΣΟΔΗΠΟΤΕ|ΟΣΟΙΔΗΠΟΤΕ|ΟΣΟΝΔΗΠΟΤΕ|ΟΣΟΣΔΗΠΟΤΕ|ΟΣΟΥΔΗΠΟΤΕ|ΟΣΟΥΣΔΗΠΟΤΕ|ΟΣΩΝΔΗΠΟΤΕ|ΟΤΑΝ|ΟΤΕ|ΟΤΙ|ΟΤΙΔΗΠΟΤΕ|ΟΥ|ΟΥΔΕ|ΟΥΚ|ΟΥΣ|'
|
||||
. 'ΟΥΤΕ|ΟΥΦ|ΟΧΙ|ΟΨΑ|ΟΨΕ|ΟΨΗ|ΟΨΙ|ΟΨΟ|ΠΑ|ΠΑΛΙ|ΠΑΝ|ΠΑΝΤΟΤΕ|ΠΑΝΤΟΥ|ΠΑΝΤΩΣ|ΠΑΠ|ΠΑΡ|ΠΑΡΑ|ΠΕΙ|ΠΕΡ|ΠΕΡΑ|ΠΕΡΙ|ΠΕΡΙΠΟΥ|ΠΕΡΣΙ|ΠΕΡΥΣΙ|ΠΕΣ|ΠΙ|'
|
||||
. 'ΠΙΑ|ΠΙΘΑΝΟΝ|ΠΙΚ|ΠΙΟ|ΠΙΣΩ|ΠΙΤ|ΠΙΩ|ΠΛΑΙ|ΠΛΕΟΝ|ΠΛΗΝ|ΠΛΩ|ΠΜ|ΠΟΑ|ΠΟΕ|ΠΟΛ|ΠΟΛΥ|ΠΟΠ|ΠΟΤΕ|ΠΟΥ|ΠΟΥΘΕ|ΠΟΥΘΕΝΑ|ΠΡΕΠΕΙ|ΠΡΙ|ΠΡΙΝ|ΠΡΟ|'
|
||||
. 'ΠΡΟΚΕΙΜΕΝΟΥ|ΠΡΟΚΕΙΤΑΙ|ΠΡΟΠΕΡΣΙ|ΠΡΟΣ|ΠΡΟΤΟΥ|ΠΡΟΧΘΕΣ|ΠΡΟΧΤΕΣ|ΠΡΩΤΥΤΕΡΑ|ΠΥΑ|ΠΥΞ|ΠΥΟ|ΠΥΡ|ΠΧ|ΠΩ|ΠΩΛ|ΠΩΣ|ΡΑ|ΡΑΙ|ΡΑΠ|ΡΑΣ|ΡΕ|ΡΕΑ|ΡΕΕ|ΡΕΙ|'
|
||||
. 'ΡΗΣ|ΡΘΩ|ΡΙΟ|ΡΟ|ΡΟΐ|ΡΟΕ|ΡΟΖ|ΡΟΗ|ΡΟΘ|ΡΟΙ|ΡΟΚ|ΡΟΛ|ΡΟΝ|ΡΟΣ|ΡΟΥ|ΣΑΙ|ΣΑΝ|ΣΑΟ|ΣΑΣ|ΣΕ|ΣΕΙΣ|ΣΕΚ|ΣΕΞ|ΣΕΡ|ΣΕΤ|ΣΕΦ|ΣΗΜΕΡΑ|ΣΙ|ΣΙΑ|ΣΙΓΑ|ΣΙΚ|'
|
||||
. 'ΣΙΧ|ΣΚΙ|ΣΟΙ|ΣΟΚ|ΣΟΛ|ΣΟΝ|ΣΟΣ|ΣΟΥ|ΣΡΙ|ΣΤΑ|ΣΤΗ|ΣΤΗΝ|ΣΤΗΣ|ΣΤΙΣ|ΣΤΟ|ΣΤΟΝ|ΣΤΟΥ|ΣΤΟΥΣ|ΣΤΩΝ|ΣΥ|ΣΥΓΧΡΟΝΩΣ|ΣΥΝ|ΣΥΝΑΜΑ|ΣΥΝΕΠΩΣ|ΣΥΝΗΘΩΣ|'
|
||||
. 'ΣΧΕΔΟΝ|ΣΩΣΤΑ|ΤΑ|ΤΑΔΕ|ΤΑΚ|ΤΑΝ|ΤΑΟ|ΤΑΥ|ΤΑΧΑ|ΤΑΧΑΤΕ|ΤΕ|ΤΕΙ|ΤΕΛ|ΤΕΛΙΚΑ|ΤΕΛΙΚΩΣ|ΤΕΣ|ΤΕΤ|ΤΖΟ|ΤΗ|ΤΗΛ|ΤΗΝ|ΤΗΣ|ΤΙ|ΤΙΚ|ΤΙΜ|ΤΙΠΟΤΑ|ΤΙΠΟΤΕ|'
|
||||
. 'ΤΙΣ|ΤΝΤ|ΤΟ|ΤΟΙ|ΤΟΚ|ΤΟΜ|ΤΟΝ|ΤΟΠ|ΤΟΣ|ΤΟΣ?Ν|ΤΟΣΑ|ΤΟΣΕΣ|ΤΟΣΗ|ΤΟΣΗΝ|ΤΟΣΗΣ|ΤΟΣΟ|ΤΟΣΟΙ|ΤΟΣΟΝ|ΤΟΣΟΣ|ΤΟΣΟΥ|ΤΟΣΟΥΣ|ΤΟΤΕ|ΤΟΥ|ΤΟΥΛΑΧΙΣΤΟ|'
|
||||
. 'ΤΟΥΛΑΧΙΣΤΟΝ|ΤΟΥΣ|ΤΣ|ΤΣΑ|ΤΣΕ|ΤΥΧΟΝ|ΤΩ|ΤΩΝ|ΤΩΡΑ|ΥΑΣ|ΥΒΑ|ΥΒΟ|ΥΙΕ|ΥΙΟ|ΥΛΑ|ΥΛΗ|ΥΝΙ|ΥΠ|ΥΠΕΡ|ΥΠΟ|ΥΠΟΨΗ|ΥΠΟΨΙΝ|ΥΣΤΕΡΑ|ΥΦΗ|ΥΨΗ|ΦΑ|ΦΑΐ|ΦΑΕ|'
|
||||
. 'ΦΑΝ|ΦΑΞ|ΦΑΣ|ΦΑΩ|ΦΕΖ|ΦΕΙ|ΦΕΤΟΣ|ΦΕΥ|ΦΙ|ΦΙΛ|ΦΙΣ|ΦΟΞ|ΦΠΑ|ΦΡΙ|ΧΑ|ΧΑΗ|ΧΑΛ|ΧΑΝ|ΧΑΦ|ΧΕ|ΧΕΙ|ΧΘΕΣ|ΧΙ|ΧΙΑ|ΧΙΛ|ΧΙΟ|ΧΛΜ|ΧΜ|ΧΟΗ|ΧΟΛ|ΧΡΩ|ΧΤΕΣ|'
|
||||
. 'ΧΩΡΙΣ|ΧΩΡΙΣΤΑ|ΨΕΣ|ΨΗΛΑ|ΨΙ|ΨΙΤ|Ω|ΩΑ|ΩΑΣ|ΩΔΕ|ΩΕΣ|ΩΘΩ|ΩΜΑ|ΩΜΕ|ΩΝ|ΩΟ|ΩΟΝ|ΩΟΥ|ΩΣ|ΩΣΑΝ|ΩΣΗ|ΩΣΟΤΟΥ|ΩΣΠΟΥ|ΩΣΤΕ|ΩΣΤΟΣΟ|ΩΤΑ|ΩΧ|ΩΩΝ)$/';
|
||||
|
||||
if (preg_match($stop_words, $token)) {
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Vowels
|
||||
$v = '(Α|Ε|Η|Ι|Ο|Υ|Ω)';
|
||||
|
||||
// Vowels without Y
|
||||
$v2 = '(Α|Ε|Η|Ι|Ο|Ω)';
|
||||
|
||||
$test1 = true;
|
||||
|
||||
// Step S1. 14 stems
|
||||
$re = '/^(.+?)(ΙΖΑ|ΙΖΕΣ|ΙΖΕ|ΙΖΑΜΕ|ΙΖΑΤΕ|ΙΖΑΝ|ΙΖΑΝΕ|ΙΖΩ|ΙΖΕΙΣ|ΙΖΕΙ|ΙΖΟΥΜΕ|ΙΖΕΤΕ|ΙΖΟΥΝ|ΙΖΟΥΝΕ)$/';
|
||||
$exceptS1 = '/^(ΑΝΑΜΠΑ|ΕΜΠΑ|ΕΠΑ|ΞΑΝΑΠΑ|ΠΑ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ)$/';
|
||||
$exceptS2 = '/^(ΜΑΡΚ|ΚΟΡΝ|ΑΜΠΑΡ|ΑΡΡ|ΒΑΘΥΡΙ|ΒΑΡΚ|Β|ΒΟΛΒΟΡ|ΓΚΡ|ΓΛΥΚΟΡ|ΓΛΥΚΥΡ|ΙΜΠ|Λ|ΛΟΥ|ΜΑΡ|Μ|ΠΡ|ΜΠΡ|ΠΟΛΥΡ|Π|Ρ|ΠΙΠΕΡΟΡ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= 'I';
|
||||
}
|
||||
|
||||
if (preg_match($exceptS2, $token)) {
|
||||
$token .= 'IΖ';
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S2. 7 stems
|
||||
$re = '/^(.+?)(ΩΘΗΚΑ|ΩΘΗΚΕΣ|ΩΘΗΚΕ|ΩΘΗΚΑΜΕ|ΩΘΗΚΑΤΕ|ΩΘΗΚΑΝ|ΩΘΗΚΑΝΕ)$/';
|
||||
$exceptS1 = '/^(ΑΛ|ΒΙ|ΕΝ|ΥΨ|ΛΙ|ΖΩ|Σ|Χ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= 'ΩΝ';
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S3. 7 stems
|
||||
$re = '/^(.+?)(ΙΣΑ|ΙΣΕΣ|ΙΣΕ|ΙΣΑΜΕ|ΙΣΑΤΕ|ΙΣΑΝ|ΙΣΑΝΕ)$/';
|
||||
$exceptS1 = '/^(ΑΝΑΜΠΑ|ΑΘΡΟ|ΕΜΠΑ|ΕΣΕ|ΕΣΩΚΛΕ|ΕΠΑ|ΞΑΝΑΠΑ|ΕΠΕ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ|ΚΛΕ|ΧΑΡΤΟΠΑ|ΕΞΑΡΧΑ|ΜΕΤΕΠΕ|ΑΠΟΚΛΕ|ΑΠΕΚΛΕ|ΕΚΛΕ|ΠΕ|ΠΕΡΙΠΑ)$/';
|
||||
$exceptS2 = '/^(ΑΝ|ΑΦ|ΓΕ|ΓΙΓΑΝΤΟΑΦ|ΓΚΕ|ΔΗΜΟΚΡΑΤ|ΚΟΜ|ΓΚ|Μ|Π|ΠΟΥΚΑΜ|ΟΛΟ|ΛΑΡ)$/';
|
||||
|
||||
if ($token == "ΙΣΑ") {
|
||||
$token = "ΙΣ";
|
||||
|
||||
return $token;
|
||||
}
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= 'Ι';
|
||||
}
|
||||
|
||||
if (preg_match($exceptS2, $token)) {
|
||||
$token .= 'ΙΣ';
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S4. 7 stems
|
||||
$re = '/^(.+?)(ΙΣΩ|ΙΣΕΙΣ|ΙΣΕΙ|ΙΣΟΥΜΕ|ΙΣΕΤΕ|ΙΣΟΥΝ|ΙΣΟΥΝΕ)$/';
|
||||
$exceptS1 = '/^(ΑΝΑΜΠΑ|ΕΜΠΑ|ΕΣΕ|ΕΣΩΚΛΕ|ΕΠΑ|ΞΑΝΑΠΑ|ΕΠΕ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ|ΚΛΕ|ΧΑΡΤΟΠΑ|ΕΞΑΡΧΑ|ΜΕΤΕΠΕ|ΑΠΟΚΛΕ|ΑΠΕΚΛΕ|ΕΚΛΕ|ΠΕ|ΠΕΡΙΠΑ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= 'Ι';
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S5. 11 stems
|
||||
$re = '/^(.+?)(ΙΣΤΟΣ|ΙΣΤΟΥ|ΙΣΤΟ|ΙΣΤΕ|ΙΣΤΟΙ|ΙΣΤΩΝ|ΙΣΤΟΥΣ|ΙΣΤΗ|ΙΣΤΗΣ|ΙΣΤΑ|ΙΣΤΕΣ)$/';
|
||||
$exceptS1 = '/^(Μ|Π|ΑΠ|ΑΡ|ΗΔ|ΚΤ|ΣΚ|ΣΧ|ΥΨ|ΦΑ|ΧΡ|ΧΤ|ΑΚΤ|ΑΟΡ|ΑΣΧ|ΑΤΑ|ΑΧΝ|ΑΧΤ|ΓΕΜ|ΓΥΡ|ΕΜΠ|ΕΥΠ|ΕΧΘ|ΗΦΑ|ΚΑΘ|ΚΑΚ|ΚΥΛ|ΛΥΓ|ΜΑΚ|ΜΕΓ|ΤΑΧ|ΦΙΛ|ΧΩΡ)$/';
|
||||
$exceptS2 = '/^(ΔΑΝΕ|ΣΥΝΑΘΡΟ|ΚΛΕ|ΣΕ|ΕΣΩΚΛΕ|ΑΣΕ|ΠΛΕ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= 'ΙΣΤ';
|
||||
}
|
||||
|
||||
if (preg_match($exceptS2, $token)) {
|
||||
$token .= 'Ι';
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S6. 6 stems
|
||||
$re = '/^(.+?)(ΙΣΜΟ|ΙΣΜΟΙ|ΙΣΜΟΣ|ΙΣΜΟΥ|ΙΣΜΟΥΣ|ΙΣΜΩΝ)$/';
|
||||
$exceptS1 = '/^(ΑΓΝΩΣΤΙΚ|ΑΤΟΜΙΚ|ΓΝΩΣΤΙΚ|ΕΘΝΙΚ|ΕΚΛΕΚΤΙΚ|ΣΚΕΠΤΙΚ|ΤΟΠΙΚ)$/';
|
||||
$exceptS2 = '/^(ΣΕ|ΜΕΤΑΣΕ|ΜΙΚΡΟΣΕ|ΕΓΚΛΕ|ΑΠΟΚΛΕ)$/';
|
||||
$exceptS3 = '/^(ΔΑΝΕ|ΑΝΤΙΔΑΝΕ)$/';
|
||||
$exceptS4 = '/^(ΑΛΕΞΑΝΔΡΙΝ|ΒΥΖΑΝΤΙΝ|ΘΕΑΤΡΙΝ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token = str_replace('ΙΚ', "", $token);
|
||||
}
|
||||
|
||||
if (preg_match($exceptS2, $token)) {
|
||||
$token .= "ΙΣΜ";
|
||||
}
|
||||
|
||||
if (preg_match($exceptS3, $token)) {
|
||||
$token .= "Ι";
|
||||
}
|
||||
|
||||
if (preg_match($exceptS4, $token)) {
|
||||
$token = str_replace('ΙΝ', "", $token);
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S7. 4 stems
|
||||
$re = '/^(.+?)(ΑΡΑΚΙ|ΑΡΑΚΙΑ|ΟΥΔΑΚΙ|ΟΥΔΑΚΙΑ)$/';
|
||||
$exceptS1 = '/^(Σ|Χ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= "AΡΑΚ";
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S8. 8 stems
|
||||
$re = '/^(.+?)(ΑΚΙ|ΑΚΙΑ|ΙΤΣΑ|ΙΤΣΑΣ|ΙΤΣΕΣ|ΙΤΣΩΝ|ΑΡΑΚΙ|ΑΡΑΚΙΑ)$/';
|
||||
$exceptS1 = '/^(ΑΝΘΡ|ΒΑΜΒ|ΒΡ|ΚΑΙΜ|ΚΟΝ|ΚΟΡ|ΛΑΒΡ|ΛΟΥΛ|ΜΕΡ|ΜΟΥΣΤ|ΝΑΓΚΑΣ|ΠΛ|Ρ|ΡΥ|Σ|ΣΚ|ΣΟΚ|ΣΠΑΝ|ΤΖ|ΦΑΡΜ|Χ|'
|
||||
. 'ΚΑΠΑΚ|ΑΛΙΣΦ|ΑΜΒΡ|ΑΝΘΡ|Κ|ΦΥΛ|ΚΑΤΡΑΠ|ΚΛΙΜ|ΜΑΛ|ΣΛΟΒ|Φ|ΣΦ|ΤΣΕΧΟΣΛΟΒ)$/';
|
||||
$exceptS2 = '/^(Β|ΒΑΛ|ΓΙΑΝ|ΓΛ|Ζ|ΗΓΟΥΜΕΝ|ΚΑΡΔ|ΚΟΝ|ΜΑΚΡΥΝ|ΝΥΦ|ΠΑΤΕΡ|Π|ΣΚ|ΤΟΣ|ΤΡΙΠΟΛ)$/';
|
||||
|
||||
// For words like ΠΛΟΥΣΙΟΚΟΡΙΤΣΑ, ΠΑΛΙΟΚΟΡΙΤΣΑ etc
|
||||
$exceptS3 = '/(ΚΟΡ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= "ΑΚ";
|
||||
}
|
||||
|
||||
if (preg_match($exceptS2, $token)) {
|
||||
$token .= "ΙΤΣ";
|
||||
}
|
||||
|
||||
if (preg_match($exceptS3, $token)) {
|
||||
$token .= "ΙΤΣ";
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S9. 3 stems
|
||||
$re = '/^(.+?)(ΙΔΙΟ|ΙΔΙΑ|ΙΔΙΩΝ)$/';
|
||||
$exceptS1 = '/^(ΑΙΦΝ|ΙΡ|ΟΛΟ|ΨΑΛ)$/';
|
||||
$exceptS2 = '/(Ε|ΠΑΙΧΝ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= "ΙΔ";
|
||||
}
|
||||
|
||||
if (preg_match($exceptS2, $token)) {
|
||||
$token .= "ΙΔ";
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step S10. 4 stems
|
||||
$re = '/^(.+?)(ΙΣΚΟΣ|ΙΣΚΟΥ|ΙΣΚΟ|ΙΣΚΕ)$/';
|
||||
$exceptS1 = '/^(Δ|ΙΒ|ΜΗΝ|Ρ|ΦΡΑΓΚ|ΛΥΚ|ΟΒΕΛ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
|
||||
if (preg_match($exceptS1, $token)) {
|
||||
$token .= "ΙΣΚ";
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
// Step 1
|
||||
// step1list is used in Step 1. 41 stems
|
||||
$step1list = [];
|
||||
$step1list["ΦΑΓΙΑ"] = "ΦΑ";
|
||||
$step1list["ΦΑΓΙΟΥ"] = "ΦΑ";
|
||||
$step1list["ΦΑΓΙΩΝ"] = "ΦΑ";
|
||||
$step1list["ΣΚΑΓΙΑ"] = "ΣΚΑ";
|
||||
$step1list["ΣΚΑΓΙΟΥ"] = "ΣΚΑ";
|
||||
$step1list["ΣΚΑΓΙΩΝ"] = "ΣΚΑ";
|
||||
$step1list["ΟΛΟΓΙΟΥ"] = "ΟΛΟ";
|
||||
$step1list["ΟΛΟΓΙΑ"] = "ΟΛΟ";
|
||||
$step1list["ΟΛΟΓΙΩΝ"] = "ΟΛΟ";
|
||||
$step1list["ΣΟΓΙΟΥ"] = "ΣΟ";
|
||||
$step1list["ΣΟΓΙΑ"] = "ΣΟ";
|
||||
$step1list["ΣΟΓΙΩΝ"] = "ΣΟ";
|
||||
$step1list["ΤΑΤΟΓΙΑ"] = "ΤΑΤΟ";
|
||||
$step1list["ΤΑΤΟΓΙΟΥ"] = "ΤΑΤΟ";
|
||||
$step1list["ΤΑΤΟΓΙΩΝ"] = "ΤΑΤΟ";
|
||||
$step1list["ΚΡΕΑΣ"] = "ΚΡΕ";
|
||||
$step1list["ΚΡΕΑΤΟΣ"] = "ΚΡΕ";
|
||||
$step1list["ΚΡΕΑΤΑ"] = "ΚΡΕ";
|
||||
$step1list["ΚΡΕΑΤΩΝ"] = "ΚΡΕ";
|
||||
$step1list["ΠΕΡΑΣ"] = "ΠΕΡ";
|
||||
$step1list["ΠΕΡΑΤΟΣ"] = "ΠΕΡ";
|
||||
|
||||
// Added by Spyros. Also at $re in step1
|
||||
$step1list["ΠΕΡΑΤΗ"] = "ΠΕΡ";
|
||||
$step1list["ΠΕΡΑΤΑ"] = "ΠΕΡ";
|
||||
$step1list["ΠΕΡΑΤΩΝ"] = "ΠΕΡ";
|
||||
$step1list["ΤΕΡΑΣ"] = "ΤΕΡ";
|
||||
$step1list["ΤΕΡΑΤΟΣ"] = "ΤΕΡ";
|
||||
$step1list["ΤΕΡΑΤΑ"] = "ΤΕΡ";
|
||||
$step1list["ΤΕΡΑΤΩΝ"] = "ΤΕΡ";
|
||||
$step1list["ΦΩΣ"] = "ΦΩ";
|
||||
$step1list["ΦΩΤΟΣ"] = "ΦΩ";
|
||||
$step1list["ΦΩΤΑ"] = "ΦΩ";
|
||||
$step1list["ΦΩΤΩΝ"] = "ΦΩ";
|
||||
$step1list["ΚΑΘΕΣΤΩΣ"] = "ΚΑΘΕΣΤ";
|
||||
$step1list["ΚΑΘΕΣΤΩΤΟΣ"] = "ΚΑΘΕΣΤ";
|
||||
$step1list["ΚΑΘΕΣΤΩΤΑ"] = "ΚΑΘΕΣΤ";
|
||||
$step1list["ΚΑΘΕΣΤΩΤΩΝ"] = "ΚΑΘΕΣΤ";
|
||||
$step1list["ΓΕΓΟΝΟΣ"] = "ΓΕΓΟΝ";
|
||||
$step1list["ΓΕΓΟΝΟΤΟΣ"] = "ΓΕΓΟΝ";
|
||||
$step1list["ΓΕΓΟΝΟΤΑ"] = "ΓΕΓΟΝ";
|
||||
$step1list["ΓΕΓΟΝΟΤΩΝ"] = "ΓΕΓΟΝ";
|
||||
|
||||
$re = '/(.*)(ΦΑΓΙΑ|ΦΑΓΙΟΥ|ΦΑΓΙΩΝ|ΣΚΑΓΙΑ|ΣΚΑΓΙΟΥ|ΣΚΑΓΙΩΝ|ΟΛΟΓΙΟΥ|ΟΛΟΓΙΑ|ΟΛΟΓΙΩΝ|ΣΟΓΙΟΥ|ΣΟΓΙΑ|ΣΟΓΙΩΝ|ΤΑΤΟΓΙΑ|ΤΑΤΟΓΙΟΥ|ΤΑΤΟΓΙΩΝ|ΚΡΕΑΣ|ΚΡΕΑΤΟΣ|'
|
||||
. 'ΚΡΕΑΤΑ|ΚΡΕΑΤΩΝ|ΠΕΡΑΣ|ΠΕΡΑΤΟΣ|ΠΕΡΑΤΗ|ΠΕΡΑΤΑ|ΠΕΡΑΤΩΝ|ΤΕΡΑΣ|ΤΕΡΑΤΟΣ|ΤΕΡΑΤΑ|ΤΕΡΑΤΩΝ|ΦΩΣ|ΦΩΤΟΣ|ΦΩΤΑ|ΦΩΤΩΝ|ΚΑΘΕΣΤΩΣ|ΚΑΘΕΣΤΩΤΟΣ|'
|
||||
. 'ΚΑΘΕΣΤΩΤΑ|ΚΑΘΕΣΤΩΤΩΝ|ΓΕΓΟΝΟΣ|ΓΕΓΟΝΟΤΟΣ|ΓΕΓΟΝΟΤΑ|ΓΕΓΟΝΟΤΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$stem = $match[1];
|
||||
$suffix = $match[2];
|
||||
$token = $stem . (\array_key_exists($suffix, $step1list) ? $step1list[$suffix] : '');
|
||||
$test1 = false;
|
||||
}
|
||||
|
||||
// Step 2a. 2 stems
|
||||
$re = '/^(.+?)(ΑΔΕΣ|ΑΔΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1];
|
||||
$re = '/(ΟΚ|ΜΑΜ|ΜΑΝ|ΜΠΑΜΠ|ΠΑΤΕΡ|ΓΙΑΓΙ|ΝΤΑΝΤ|ΚΥΡ|ΘΕΙ|ΠΕΘΕΡ)$/';
|
||||
|
||||
if (!preg_match($re, $token)) {
|
||||
$token .= "ΑΔ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2b. 2 stems
|
||||
$re = '/^(.+?)(ΕΔΕΣ|ΕΔΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$exept2 = '/(ΟΠ|ΙΠ|ΕΜΠ|ΥΠ|ΓΗΠ|ΔΑΠ|ΚΡΑΣΠ|ΜΙΛ)$/';
|
||||
|
||||
if (preg_match($exept2, $token)) {
|
||||
$token .= 'ΕΔ';
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2c
|
||||
$re = '/^(.+?)(ΟΥΔΕΣ|ΟΥΔΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
|
||||
$exept3 = '/(ΑΡΚ|ΚΑΛΙΑΚ|ΠΕΤΑΛ|ΛΙΧ|ΠΛΕΞ|ΣΚ|Σ|ΦΛ|ΦΡ|ΒΕΛ|ΛΟΥΛ|ΧΝ|ΣΠ|ΤΡΑΓ|ΦΕ)$/';
|
||||
|
||||
if (preg_match($exept3, $token)) {
|
||||
$token .= 'ΟΥΔ';
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2d
|
||||
$re = '/^(.+?)(ΕΩΣ|ΕΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept4 = '/^(Θ|Δ|ΕΛ|ΓΑΛ|Ν|Π|ΙΔ|ΠΑΡ)$/';
|
||||
|
||||
if (preg_match($exept4, $token)) {
|
||||
$token .= 'Ε';
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3
|
||||
$re = '/^(.+?)(ΙΑ|ΙΟΥ|ΙΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token, $fp)) {
|
||||
$stem = $fp[1];
|
||||
$token = $stem;
|
||||
$re = '/' . $v . '$/';
|
||||
$test1 = false;
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
$token = $stem . 'Ι';
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4
|
||||
$re = '/^(.+?)(ΙΚΑ|ΙΚΟ|ΙΚΟΥ|ΙΚΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$re = '/' . $v . '$/';
|
||||
$exept5 = '/^(ΑΛ|ΑΔ|ΕΝΔ|ΑΜΑΝ|ΑΜΜΟΧΑΛ|ΗΘ|ΑΝΗΘ|ΑΝΤΙΔ|ΦΥΣ|ΒΡΩΜ|ΓΕΡ|ΕΞΩΔ|ΚΑΛΠ|ΚΑΛΛΙΝ|ΚΑΤΑΔ|ΜΟΥΛ|ΜΠΑΝ|ΜΠΑΓΙΑΤ|ΜΠΟΛ|ΜΠΟΣ|ΝΙΤ|ΞΙΚ|ΣΥΝΟΜΗΛ|ΠΕΤΣ|'
|
||||
. 'ΠΙΤΣ|ΠΙΚΑΝΤ|ΠΛΙΑΤΣ|ΠΟΣΤΕΛΝ|ΠΡΩΤΟΔ|ΣΕΡΤ|ΣΥΝΑΔ|ΤΣΑΜ|ΥΠΟΔ|ΦΙΛΟΝ|ΦΥΛΟΔ|ΧΑΣ)$/';
|
||||
|
||||
if (preg_match($re, $token) || preg_match($exept5, $token)) {
|
||||
$token .= 'ΙΚ';
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5a
|
||||
$re = '/^(.+?)(ΑΜΕ)$/';
|
||||
$re2 = '/^(.+?)(ΑΓΑΜΕ|ΗΣΑΜΕ|ΟΥΣΑΜΕ|ΗΚΑΜΕ|ΗΘΗΚΑΜΕ)$/';
|
||||
|
||||
if ($token == "ΑΓΑΜΕ") {
|
||||
$token = "ΑΓΑΜ";
|
||||
}
|
||||
|
||||
if (preg_match($re2, $token)) {
|
||||
preg_match($re2, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
}
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept6 = '/^(ΑΝΑΠ|ΑΠΟΘ|ΑΠΟΚ|ΑΠΟΣΤ|ΒΟΥΒ|ΞΕΘ|ΟΥΛ|ΠΕΘ|ΠΙΚΡ|ΠΟΤ|ΣΙΧ|Χ)$/';
|
||||
|
||||
if (preg_match($exept6, $token)) {
|
||||
$token .= "ΑΜ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5b
|
||||
$re2 = '/^(.+?)(ΑΝΕ)$/';
|
||||
$re3 = '/^(.+?)(ΑΓΑΝΕ|ΗΣΑΝΕ|ΟΥΣΑΝΕ|ΙΟΝΤΑΝΕ|ΙΟΤΑΝΕ|ΙΟΥΝΤΑΝΕ|ΟΝΤΑΝΕ|ΟΤΑΝΕ|ΟΥΝΤΑΝΕ|ΗΚΑΝΕ|ΗΘΗΚΑΝΕ)$/';
|
||||
|
||||
if (preg_match($re3, $token)) {
|
||||
preg_match($re3, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$re3 = '/^(ΤΡ|ΤΣ)$/';
|
||||
|
||||
if (preg_match($re3, $token)) {
|
||||
$token .= "ΑΓΑΝ";
|
||||
}
|
||||
}
|
||||
|
||||
if (preg_match($re2, $token)) {
|
||||
preg_match($re2, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$re2 = '/' . $v2 . '$/';
|
||||
$exept7 = '/^(ΒΕΤΕΡ|ΒΟΥΛΚ|ΒΡΑΧΜ|Γ|ΔΡΑΔΟΥΜ|Θ|ΚΑΛΠΟΥΖ|ΚΑΣΤΕΛ|ΚΟΡΜΟΡ|ΛΑΟΠΛ|ΜΩΑΜΕΘ|Μ|ΜΟΥΣΟΥΛΜ|Ν|ΟΥΛ|Π|ΠΕΛΕΚ|ΠΛ|ΠΟΛΙΣ|ΠΟΡΤΟΛ|ΣΑΡΑΚΑΤΣ|ΣΟΥΛΤ|'
|
||||
. 'ΤΣΑΡΛΑΤ|ΟΡΦ|ΤΣΙΓΓ|ΤΣΟΠ|ΦΩΤΟΣΤΕΦ|Χ|ΨΥΧΟΠΛ|ΑΓ|ΟΡΦ|ΓΑΛ|ΓΕΡ|ΔΕΚ|ΔΙΠΛ|ΑΜΕΡΙΚΑΝ|ΟΥΡ|ΠΙΘ|ΠΟΥΡΙΤ|Σ|ΖΩΝΤ|ΙΚ|ΚΑΣΤ|ΚΟΠ|ΛΙΧ|ΛΟΥΘΗΡ|ΜΑΙΝΤ|'
|
||||
. 'ΜΕΛ|ΣΙΓ|ΣΠ|ΣΤΕΓ|ΤΡΑΓ|ΤΣΑΓ|Φ|ΕΡ|ΑΔΑΠ|ΑΘΙΓΓ|ΑΜΗΧ|ΑΝΙΚ|ΑΝΟΡΓ|ΑΠΗΓ|ΑΠΙΘ|ΑΤΣΙΓΓ|ΒΑΣ|ΒΑΣΚ|ΒΑΘΥΓΑΛ|ΒΙΟΜΗΧ|ΒΡΑΧΥΚ|ΔΙΑΤ|ΔΙΑΦ|ΕΝΟΡΓ|'
|
||||
. 'ΘΥΣ|ΚΑΠΝΟΒΙΟΜΗΧ|ΚΑΤΑΓΑΛ|ΚΛΙΒ|ΚΟΙΛΑΡΦ|ΛΙΒ|ΜΕΓΛΟΒΙΟΜΗΧ|ΜΙΚΡΟΒΙΟΜΗΧ|ΝΤΑΒ|ΞΗΡΟΚΛΙΒ|ΟΛΙΓΟΔΑΜ|ΟΛΟΓΑΛ|ΠΕΝΤΑΡΦ|ΠΕΡΗΦ|ΠΕΡΙΤΡ|ΠΛΑΤ|'
|
||||
. 'ΠΟΛΥΔΑΠ|ΠΟΛΥΜΗΧ|ΣΤΕΦ|ΤΑΒ|ΤΕΤ|ΥΠΕΡΗΦ|ΥΠΟΚΟΠ|ΧΑΜΗΛΟΔΑΠ|ΨΗΛΟΤΑΒ)$/';
|
||||
|
||||
if (preg_match($re2, $token) || preg_match($exept7, $token)) {
|
||||
$token .= "ΑΝ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5c
|
||||
$re3 = '/^(.+?)(ΕΤΕ)$/';
|
||||
$re4 = '/^(.+?)(ΗΣΕΤΕ)$/';
|
||||
|
||||
if (preg_match($re4, $token)) {
|
||||
preg_match($re4, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
}
|
||||
|
||||
if (preg_match($re3, $token)) {
|
||||
preg_match($re3, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$re3 = '/' . $v2 . '$/';
|
||||
$exept8 = '/(ΟΔ|ΑΙΡ|ΦΟΡ|ΤΑΘ|ΔΙΑΘ|ΣΧ|ΕΝΔ|ΕΥΡ|ΤΙΘ|ΥΠΕΡΘ|ΡΑΘ|ΕΝΘ|ΡΟΘ|ΣΘ|ΠΥΡ|ΑΙΝ|ΣΥΝΔ|ΣΥΝ|ΣΥΝΘ|ΧΩΡ|ΠΟΝ|ΒΡ|ΚΑΘ|ΕΥΘ|ΕΚΘ|ΝΕΤ|ΡΟΝ|ΑΡΚ|ΒΑΡ|ΒΟΛ|ΩΦΕΛ)$/';
|
||||
$exept9 = '/^(ΑΒΑΡ|ΒΕΝ|ΕΝΑΡ|ΑΒΡ|ΑΔ|ΑΘ|ΑΝ|ΑΠΛ|ΒΑΡΟΝ|ΝΤΡ|ΣΚ|ΚΟΠ|ΜΠΟΡ|ΝΙΦ|ΠΑΓ|ΠΑΡΑΚΑΛ|ΣΕΡΠ|ΣΚΕΛ|ΣΥΡΦ|ΤΟΚ|Υ|Δ|ΕΜ|ΘΑΡΡ|Θ)$/';
|
||||
|
||||
if (preg_match($re3, $token) || preg_match($exept8, $token) || preg_match($exept9, $token)) {
|
||||
$token .= "ΕΤ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5d
|
||||
$re = '/^(.+?)(ΟΝΤΑΣ|ΩΝΤΑΣ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept10 = '/^(ΑΡΧ)$/';
|
||||
$exept11 = '/(ΚΡΕ)$/';
|
||||
|
||||
if (preg_match($exept10, $token)) {
|
||||
$token .= "ΟΝΤ";
|
||||
}
|
||||
|
||||
if (preg_match($exept11, $token)) {
|
||||
$token .= "ΩΝΤ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5e
|
||||
$re = '/^(.+?)(ΟΜΑΣΤΕ|ΙΟΜΑΣΤΕ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept11 = '/^(ΟΝ)$/';
|
||||
|
||||
if (preg_match($exept11, $token)) {
|
||||
$token .= "ΟΜΑΣΤ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5f
|
||||
$re = '/^(.+?)(ΕΣΤΕ)$/';
|
||||
$re2 = '/^(.+?)(ΙΕΣΤΕ)$/';
|
||||
|
||||
if (preg_match($re2, $token)) {
|
||||
preg_match($re2, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$re2 = '/^(Π|ΑΠ|ΣΥΜΠ|ΑΣΥΜΠ|ΑΚΑΤΑΠ|ΑΜΕΤΑΜΦ)$/';
|
||||
|
||||
if (preg_match($re2, $token)) {
|
||||
$token .= "ΙΕΣΤ";
|
||||
}
|
||||
}
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept12 = '/^(ΑΛ|ΑΡ|ΕΚΤΕΛ|Ζ|Μ|Ξ|ΠΑΡΑΚΑΛ|ΠΡΟ|ΝΙΣ)$/';
|
||||
|
||||
if (preg_match($exept12, $token)) {
|
||||
$token .= "ΕΣΤ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5g
|
||||
$re = '/^(.+?)(ΗΚΑ|ΗΚΕΣ|ΗΚΕ)$/';
|
||||
$re2 = '/^(.+?)(ΗΘΗΚΑ|ΗΘΗΚΕΣ|ΗΘΗΚΕ)$/';
|
||||
|
||||
if (preg_match($re2, $token)) {
|
||||
preg_match($re2, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
}
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept13 = '/(ΣΚΩΛ|ΣΚΟΥΛ|ΝΑΡΘ|ΣΦ|ΟΘ|ΠΙΘ)$/';
|
||||
$exept14 = '/^(ΔΙΑΘ|Θ|ΠΑΡΑΚΑΤΑΘ|ΠΡΟΣΘ|ΣΥΝΘ|)$/';
|
||||
|
||||
if (preg_match($exept13, $token) || preg_match($exept14, $token)) {
|
||||
$token .= "ΗΚ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5h
|
||||
$re = '/^(.+?)(ΟΥΣΑ|ΟΥΣΕΣ|ΟΥΣΕ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept15 = '/^(ΦΑΡΜΑΚ|ΧΑΔ|ΑΓΚ|ΑΝΑΡΡ|ΒΡΟΜ|ΕΚΛΙΠ|ΛΑΜΠΙΔ|ΛΕΧ|Μ|ΠΑΤ|Ρ|Λ|ΜΕΔ|ΜΕΣΑΖ|ΥΠΟΤΕΙΝ|ΑΜ|ΑΙΘ|ΑΝΗΚ|ΔΕΣΠΟΖ|ΕΝΔΙΑΦΕΡ|ΔΕ|ΔΕΥΤΕΡΕΥ|ΚΑΘΑΡΕΥ|ΠΛΕ|ΤΣΑ)$/';
|
||||
$exept16 = '/(ΠΟΔΑΡ|ΒΛΕΠ|ΠΑΝΤΑΧ|ΦΡΥΔ|ΜΑΝΤΙΛ|ΜΑΛΛ|ΚΥΜΑΤ|ΛΑΧ|ΛΗΓ|ΦΑΓ|ΟΜ|ΠΡΩΤ)$/';
|
||||
|
||||
if (preg_match($exept15, $token) || preg_match($exept16, $token)) {
|
||||
$token .= "ΟΥΣ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5i
|
||||
$re = '/^(.+?)(ΑΓΑ|ΑΓΕΣ|ΑΓΕ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept17 = '/^(ΨΟΦ|ΝΑΥΛΟΧ)$/';
|
||||
$exept20 = '/(ΚΟΛΛ)$/';
|
||||
$exept18 = '/^(ΑΒΑΣΤ|ΠΟΛΥΦ|ΑΔΗΦ|ΠΑΜΦ|Ρ|ΑΣΠ|ΑΦ|ΑΜΑΛ|ΑΜΑΛΛΙ|ΑΝΥΣΤ|ΑΠΕΡ|ΑΣΠΑΡ|ΑΧΑΡ|ΔΕΡΒΕΝ|ΔΡΟΣΟΠ|ΞΕΦ|ΝΕΟΠ|ΝΟΜΟΤ|ΟΛΟΠ|ΟΜΟΤ|ΠΡΟΣΤ|ΠΡΟΣΩΠΟΠ|'
|
||||
. 'ΣΥΜΠ|ΣΥΝΤ|Τ|ΥΠΟΤ|ΧΑΡ|ΑΕΙΠ|ΑΙΜΟΣΤ|ΑΝΥΠ|ΑΠΟΤ|ΑΡΤΙΠ|ΔΙΑΤ|ΕΝ|ΕΠΙΤ|ΚΡΟΚΑΛΟΠ|ΣΙΔΗΡΟΠ|Λ|ΝΑΥ|ΟΥΛΑΜ|ΟΥΡ|Π|ΤΡ|Μ)$/';
|
||||
$exept19 = '/(ΟΦ|ΠΕΛ|ΧΟΡΤ|ΛΛ|ΣΦ|ΡΠ|ΦΡ|ΠΡ|ΛΟΧ|ΣΜΗΝ)$/';
|
||||
|
||||
if (
|
||||
(preg_match($exept18, $token) || preg_match($exept19, $token))
|
||||
&& !(preg_match($exept17, $token) || preg_match($exept20, $token))
|
||||
) {
|
||||
$token .= "ΑΓ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5j
|
||||
$re = '/^(.+?)(ΗΣΕ|ΗΣΟΥ|ΗΣΑ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept21 = '/^(Ν|ΧΕΡΣΟΝ|ΔΩΔΕΚΑΝ|ΕΡΗΜΟΝ|ΜΕΓΑΛΟΝ|ΕΠΤΑΝ)$/';
|
||||
|
||||
if (preg_match($exept21, $token)) {
|
||||
$token .= "ΗΣ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5k
|
||||
$re = '/^(.+?)(ΗΣΤΕ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept22 = '/^(ΑΣΒ|ΣΒ|ΑΧΡ|ΧΡ|ΑΠΛ|ΑΕΙΜΝ|ΔΥΣΧΡ|ΕΥΧΡ|ΚΟΙΝΟΧΡ|ΠΑΛΙΜΨ)$/';
|
||||
|
||||
if (preg_match($exept22, $token)) {
|
||||
$token .= "ΗΣΤ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5l
|
||||
$re = '/^(.+?)(ΟΥΝΕ|ΗΣΟΥΝΕ|ΗΘΟΥΝΕ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept23 = '/^(Ν|Ρ|ΣΠΙ|ΣΤΡΑΒΟΜΟΥΤΣ|ΚΑΚΟΜΟΥΤΣ|ΕΞΩΝ)$/';
|
||||
|
||||
if (preg_match($exept23, $token)) {
|
||||
$token .= "ΟΥΝ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5m
|
||||
$re = '/^(.+?)(ΟΥΜΕ|ΗΣΟΥΜΕ|ΗΘΟΥΜΕ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
$test1 = false;
|
||||
$exept24 = '/^(ΠΑΡΑΣΟΥΣ|Φ|Χ|ΩΡΙΟΠΛ|ΑΖ|ΑΛΛΟΣΟΥΣ|ΑΣΟΥΣ)$/';
|
||||
|
||||
if (preg_match($exept24, $token)) {
|
||||
$token .= "ΟΥΜ";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6
|
||||
$re = '/^(.+?)(ΜΑΤΑ|ΜΑΤΩΝ|ΜΑΤΟΣ)$/';
|
||||
$re2 = '/^(.+?)(Α|ΑΓΑΤΕ|ΑΓΑΝ|ΑΕΙ|ΑΜΑΙ|ΑΝ|ΑΣ|ΑΣΑΙ|ΑΤΑΙ|ΑΩ|Ε|ΕΙ|ΕΙΣ|ΕΙΤΕ|ΕΣΑΙ|ΕΣ|ΕΤΑΙ|Ι|ΙΕΜΑΙ|ΙΕΜΑΣΤΕ|ΙΕΤΑΙ|ΙΕΣΑΙ|ΙΕΣΑΣΤΕ|ΙΟΜΑΣΤΑΝ|ΙΟΜΟΥΝ|'
|
||||
. 'ΙΟΜΟΥΝΑ|ΙΟΝΤΑΝ|ΙΟΝΤΟΥΣΑΝ|ΙΟΣΑΣΤΑΝ|ΙΟΣΑΣΤΕ|ΙΟΣΟΥΝ|ΙΟΣΟΥΝΑ|ΙΟΤΑΝ|ΙΟΥΜΑ|ΙΟΥΜΑΣΤΕ|ΙΟΥΝΤΑΙ|ΙΟΥΝΤΑΝ|Η|ΗΔΕΣ|ΗΔΩΝ|ΗΘΕΙ|ΗΘΕΙΣ|ΗΘΕΙΤΕ|'
|
||||
. 'ΗΘΗΚΑΤΕ|ΗΘΗΚΑΝ|ΗΘΟΥΝ|ΗΘΩ|ΗΚΑΤΕ|ΗΚΑΝ|ΗΣ|ΗΣΑΝ|ΗΣΑΤΕ|ΗΣΕΙ|ΗΣΕΣ|ΗΣΟΥΝ|ΗΣΩ|Ο|ΟΙ|ΟΜΑΙ|ΟΜΑΣΤΑΝ|ΟΜΟΥΝ|ΟΜΟΥΝΑ|ΟΝΤΑΙ|ΟΝΤΑΝ|ΟΝΤΟΥΣΑΝ|ΟΣ|'
|
||||
. 'ΟΣΑΣΤΑΝ|ΟΣΑΣΤΕ|ΟΣΟΥΝ|ΟΣΟΥΝΑ|ΟΤΑΝ|ΟΥ|ΟΥΜΑΙ|ΟΥΜΑΣΤΕ|ΟΥΝ|ΟΥΝΤΑΙ|ΟΥΝΤΑΝ|ΟΥΣ|ΟΥΣΑΝ|ΟΥΣΑΤΕ|Υ|ΥΣ|Ω|ΩΝ)$/';
|
||||
|
||||
if (preg_match($re, $token, $match)) {
|
||||
$token = $match[1] . "ΜΑ";
|
||||
}
|
||||
|
||||
if (preg_match($re2, $token) && $test1) {
|
||||
preg_match($re2, $token, $match);
|
||||
$token = $match[1];
|
||||
}
|
||||
|
||||
// Step 7 (ΠΑΡΑΘΕΤΙΚΑ)
|
||||
$re = '/^(.+?)(ΕΣΤΕΡ|ΕΣΤΑΤ|ΟΤΕΡ|ΟΤΑΤ|ΥΤΕΡ|ΥΤΑΤ|ΩΤΕΡ|ΩΤΑΤ)$/';
|
||||
|
||||
if (preg_match($re, $token)) {
|
||||
preg_match($re, $token, $match);
|
||||
$token = $match[1];
|
||||
}
|
||||
|
||||
return $this->toLowerCase($token, $wCase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the token to uppercase, suppressing accents and diaeresis. The array $wCase contains a special map of
|
||||
* the uppercase rule used to convert each character at each position.
|
||||
*
|
||||
* @param string $token Token to process
|
||||
* @param array &$wCase Map of uppercase rules
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
protected function toUpperCase($token, &$wCase)
|
||||
{
|
||||
$wCase = array_fill(0, mb_strlen($token, 'UTF-8'), 0);
|
||||
$caseConvert = [
|
||||
"α" => 'Α',
|
||||
"β" => 'Β',
|
||||
"γ" => 'Γ',
|
||||
"δ" => 'Δ',
|
||||
"ε" => 'Ε',
|
||||
"ζ" => 'Ζ',
|
||||
"η" => 'Η',
|
||||
"θ" => 'Θ',
|
||||
"ι" => 'Ι',
|
||||
"κ" => 'Κ',
|
||||
"λ" => 'Λ',
|
||||
"μ" => 'Μ',
|
||||
"ν" => 'Ν',
|
||||
"ξ" => 'Ξ',
|
||||
"ο" => 'Ο',
|
||||
"π" => 'Π',
|
||||
"ρ" => 'Ρ',
|
||||
"σ" => 'Σ',
|
||||
"τ" => 'Τ',
|
||||
"υ" => 'Υ',
|
||||
"φ" => 'Φ',
|
||||
"χ" => 'Χ',
|
||||
"ψ" => 'Ψ',
|
||||
"ω" => 'Ω',
|
||||
"ά" => 'Α',
|
||||
"έ" => 'Ε',
|
||||
"ή" => 'Η',
|
||||
"ί" => 'Ι',
|
||||
"ό" => 'Ο',
|
||||
"ύ" => 'Υ',
|
||||
"ώ" => 'Ω',
|
||||
"ς" => 'Σ',
|
||||
"ϊ" => 'Ι',
|
||||
"ϋ" => 'Ι',
|
||||
"ΐ" => 'Ι',
|
||||
"ΰ" => 'Υ',
|
||||
];
|
||||
$newToken = '';
|
||||
|
||||
for ($i = 0; $i < mb_strlen($token); $i++) {
|
||||
$char = mb_substr($token, $i, 1);
|
||||
$isLower = \array_key_exists($char, $caseConvert);
|
||||
|
||||
if (!$isLower) {
|
||||
$newToken .= $char;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$upperCase = $caseConvert[$char];
|
||||
$newToken .= $upperCase;
|
||||
|
||||
$wCase[$i] = 1;
|
||||
|
||||
if (\in_array($char, ['ά', 'έ', 'ή', 'ί', 'ό', 'ύ', 'ώ', 'ς'])) {
|
||||
$wCase[$i] = 2;
|
||||
}
|
||||
|
||||
if (\in_array($char, ['ϊ', 'ϋ'])) {
|
||||
$wCase[$i] = 3;
|
||||
}
|
||||
|
||||
if (\in_array($char, ['ΐ', 'ΰ'])) {
|
||||
$wCase[$i] = 4;
|
||||
}
|
||||
}
|
||||
|
||||
return $newToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the suppressed uppercase token back to lowercase, using the $wCase map to add back the accents,
|
||||
* diaeresis and handle the special case of final sigma (different lowercase glyph than the regular sigma, only
|
||||
* used at the end of words).
|
||||
*
|
||||
* @param string $token Token to process
|
||||
* @param array $wCase Map of lowercase rules
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
protected function toLowerCase($token, $wCase)
|
||||
{
|
||||
$newToken = '';
|
||||
|
||||
for ($i = 0; $i < mb_strlen($token); $i++) {
|
||||
$char = mb_substr($token, $i, 1);
|
||||
|
||||
// Is $wCase not set at this position? We assume no case conversion ever took place.
|
||||
if (!isset($wCase[$i])) {
|
||||
$newToken .= $char;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// The character was not case-converted
|
||||
if ($wCase[$i] == 0) {
|
||||
$newToken .= $char;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Case 1: Unaccented letter
|
||||
if ($wCase[$i] == 1) {
|
||||
$newToken .= mb_strtolower($char);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Case 2: Vowel with accent (tonos); or the special case of final sigma
|
||||
if ($wCase[$i] == 2) {
|
||||
$charMap = [
|
||||
'Α' => 'ά',
|
||||
'Ε' => 'έ',
|
||||
'Η' => 'ή',
|
||||
'Ι' => 'ί',
|
||||
'Ο' => 'ό',
|
||||
'Υ' => 'ύ',
|
||||
'Ω' => 'ώ',
|
||||
'Σ' => 'ς',
|
||||
];
|
||||
|
||||
$newToken .= $charMap[$char];
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Case 3: vowels with diaeresis (dialytika)
|
||||
if ($wCase[$i] == 3) {
|
||||
$charMap = [
|
||||
'Ι' => 'ϊ',
|
||||
'Υ' => 'ϋ',
|
||||
];
|
||||
|
||||
$newToken .= $charMap[$char];
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Case 4: vowels with both diaeresis (dialytika) and accent (tonos)
|
||||
if ($wCase[$i] == 4) {
|
||||
$charMap = [
|
||||
'Ι' => 'ΐ',
|
||||
'Υ' => 'ΰ',
|
||||
];
|
||||
|
||||
$newToken .= $charMap[$char];
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// This should never happen!
|
||||
$newToken .= $char;
|
||||
}
|
||||
|
||||
return $newToken;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,71 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2018 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer\Language;
|
||||
|
||||
use Joomla\Component\Finder\Administrator\Indexer\Language;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Chinese (simplified) language support class for the Finder indexer package.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
class Zh extends Language
|
||||
{
|
||||
/**
|
||||
* Language locale of the class
|
||||
*
|
||||
* @var string
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public $language = 'zh';
|
||||
|
||||
/**
|
||||
* Spacer between terms
|
||||
*
|
||||
* @var string
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public $spacer = '';
|
||||
|
||||
/**
|
||||
* Method to construct the language object.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function __construct($locale = null)
|
||||
{
|
||||
// Override parent constructor since we don't need to load an external stemmer
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to tokenise a text string.
|
||||
*
|
||||
* @param string $input The input to tokenise.
|
||||
*
|
||||
* @return array An array of term strings.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function tokenise($input)
|
||||
{
|
||||
// We first add whitespace around each Chinese character, so that our later code can easily split on this.
|
||||
$input = preg_replace('#\p{Han}#mui', ' $0 ', $input);
|
||||
|
||||
// Now we split up the input into individual terms
|
||||
$terms = parent::tokenise($input);
|
||||
|
||||
return $terms;
|
||||
}
|
||||
}
|
||||
125
administrator/components/com_finder/src/Indexer/Parser.php
Normal file
125
administrator/components/com_finder/src/Indexer/Parser.php
Normal file
@ -0,0 +1,125 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\CMS\Filter\InputFilter;
|
||||
use Joomla\CMS\Language\Text;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Parser base class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
abstract class Parser
|
||||
{
|
||||
/**
|
||||
* Parser support instances container.
|
||||
*
|
||||
* @var Parser[]
|
||||
* @since 4.0.0
|
||||
*/
|
||||
protected static $instances = [];
|
||||
|
||||
/**
|
||||
* Method to get a parser, creating it if necessary.
|
||||
*
|
||||
* @param string $format The type of parser to load.
|
||||
*
|
||||
* @return Parser A Parser instance.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \Exception on invalid parser.
|
||||
*/
|
||||
public static function getInstance($format)
|
||||
{
|
||||
$format = InputFilter::getInstance()->clean($format, 'cmd');
|
||||
|
||||
// Only create one parser for each format.
|
||||
if (isset(self::$instances[$format])) {
|
||||
return self::$instances[$format];
|
||||
}
|
||||
|
||||
// Setup the adapter for the parser.
|
||||
$class = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Parser\\' . ucfirst($format);
|
||||
|
||||
// Check if a parser exists for the format.
|
||||
if (class_exists($class)) {
|
||||
self::$instances[$format] = new $class();
|
||||
|
||||
return self::$instances[$format];
|
||||
}
|
||||
|
||||
// Throw invalid format exception.
|
||||
throw new \Exception(Text::sprintf('COM_FINDER_INDEXER_INVALID_PARSER', $format));
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to parse input and extract the plain text. Because this method is
|
||||
* called from both inside and outside the indexer, it needs to be able to
|
||||
* batch out its parsing functionality to deal with the inefficiencies of
|
||||
* regular expressions. We will parse recursively in 2KB chunks.
|
||||
*
|
||||
* @param string $input The input to parse.
|
||||
*
|
||||
* @return string The plain text input.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function parse($input)
|
||||
{
|
||||
// If the input is less than 2KB we can parse it in one go.
|
||||
if (\strlen($input) <= 2048) {
|
||||
return $this->process($input);
|
||||
}
|
||||
|
||||
// Input is longer than 2Kb so parse it in chunks of 2Kb or less.
|
||||
$start = 0;
|
||||
$end = \strlen($input);
|
||||
$chunk = 2048;
|
||||
$return = null;
|
||||
|
||||
while ($start < $end) {
|
||||
// Setup the string.
|
||||
$string = substr($input, $start, $chunk);
|
||||
|
||||
// Find the last space character if we aren't at the end.
|
||||
$ls = (($start + $chunk) < $end ? strrpos($string, ' ') : false);
|
||||
|
||||
// Truncate to the last space character (but include it in the string).
|
||||
if ($ls !== false) {
|
||||
$string = substr($string, 0, $ls + 1);
|
||||
}
|
||||
|
||||
// Adjust the start position for the next iteration.
|
||||
$start += $ls !== false ? $ls + 1 : $chunk;
|
||||
|
||||
// Parse the chunk.
|
||||
$return .= $this->process($string);
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to process input and extract the plain text.
|
||||
*
|
||||
* @param string $input The input to process.
|
||||
*
|
||||
* @return string The plain text input.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
abstract protected function process($input);
|
||||
}
|
||||
158
administrator/components/com_finder/src/Indexer/Parser/Html.php
Normal file
158
administrator/components/com_finder/src/Indexer/Parser/Html.php
Normal file
@ -0,0 +1,158 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer\Parser;
|
||||
|
||||
use Joomla\Component\Finder\Administrator\Indexer\Parser;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* HTML Parser class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
class Html extends Parser
|
||||
{
|
||||
/**
|
||||
* Method to parse input and extract the plain text. Because this method is
|
||||
* called from both inside and outside the indexer, it needs to be able to
|
||||
* batch out its parsing functionality to deal with the inefficiencies of
|
||||
* regular expressions. We will parse recursively in 2KB chunks.
|
||||
*
|
||||
* @param string $input The input to parse.
|
||||
*
|
||||
* @return string The plain text input.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function parse($input)
|
||||
{
|
||||
// Strip invalid UTF-8 characters.
|
||||
$oldSetting = \ini_get('mbstring.substitute_character');
|
||||
ini_set('mbstring.substitute_character', 'none');
|
||||
$input = mb_convert_encoding($input, 'UTF-8', 'UTF-8');
|
||||
ini_set('mbstring.substitute_character', $oldSetting);
|
||||
|
||||
// Remove anything between <head> and </head> tags. Do this first
|
||||
// because there might be <script> or <style> tags nested inside.
|
||||
$input = $this->removeBlocks($input, '<head>', '</head>');
|
||||
|
||||
// Convert <style> and <noscript> tags to <script> tags
|
||||
// so we can remove them efficiently.
|
||||
$search = [
|
||||
'<style', '</style',
|
||||
'<noscript', '</noscript',
|
||||
];
|
||||
$replace = [
|
||||
'<script', '</script',
|
||||
'<script', '</script',
|
||||
];
|
||||
$input = str_replace($search, $replace, $input);
|
||||
|
||||
// Strip all script blocks.
|
||||
$input = $this->removeBlocks($input, '<script', '</script>');
|
||||
|
||||
// Decode HTML entities.
|
||||
$input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
|
||||
|
||||
// Convert entities equivalent to spaces to actual spaces.
|
||||
$input = str_replace([' ', ' '], ' ', $input);
|
||||
|
||||
// Add a space before both the OPEN and CLOSE tags of BLOCK and LINE BREAKING elements,
|
||||
// e.g. 'all<h1><em>m</em>obile List</h1>' will become 'all mobile List'
|
||||
$input = preg_replace('/(<|<\/)(' .
|
||||
'address|article|aside|blockquote|br|canvas|dd|div|dl|dt|' .
|
||||
'fieldset|figcaption|figure|footer|form|h1|h2|h3|h4|h5|h6|header|hgroup|hr|li|' .
|
||||
'main|nav|noscript|ol|output|p|pre|section|table|tfoot|ul|video' .
|
||||
')\b/i', ' $1$2', $input);
|
||||
|
||||
// Strip HTML tags.
|
||||
$input = strip_tags($input);
|
||||
|
||||
return parent::parse($input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to process HTML input and extract the plain text.
|
||||
*
|
||||
* @param string $input The input to process.
|
||||
*
|
||||
* @return string The plain text input.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function process($input)
|
||||
{
|
||||
// Replace any amount of white space with a single space.
|
||||
return preg_replace('#\s+#u', ' ', $input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove blocks of text between a start and an end tag.
|
||||
* Each block removed is effectively replaced by a single space.
|
||||
*
|
||||
* Note: The start tag and the end tag must be different.
|
||||
* Note: Blocks must not be nested.
|
||||
* Note: This method will function correctly with multi-byte strings.
|
||||
*
|
||||
* @param string $input String to be processed.
|
||||
* @param string $startTag String representing the start tag.
|
||||
* @param string $endTag String representing the end tag.
|
||||
*
|
||||
* @return string with blocks removed.
|
||||
*
|
||||
* @since 3.4
|
||||
*/
|
||||
private function removeBlocks($input, $startTag, $endTag)
|
||||
{
|
||||
$return = '';
|
||||
$offset = 0;
|
||||
$startTagLength = \strlen($startTag);
|
||||
$endTagLength = \strlen($endTag);
|
||||
|
||||
// Find the first start tag.
|
||||
$start = stripos($input, $startTag);
|
||||
|
||||
// If no start tags were found, return the string unchanged.
|
||||
if ($start === false) {
|
||||
return $input;
|
||||
}
|
||||
|
||||
// Look for all blocks defined by the start and end tags.
|
||||
while ($start !== false) {
|
||||
// Accumulate the substring up to the start tag.
|
||||
$return .= substr($input, $offset, $start - $offset) . ' ';
|
||||
|
||||
// Look for an end tag corresponding to the start tag.
|
||||
$end = stripos($input, $endTag, $start + $startTagLength);
|
||||
|
||||
// If no corresponding end tag, leave the string alone.
|
||||
if ($end === false) {
|
||||
// Fix the offset so part of the string is not duplicated.
|
||||
$offset = $start;
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance the start position.
|
||||
$offset = $end + $endTagLength;
|
||||
|
||||
// Look for the next start tag and loop.
|
||||
$start = stripos($input, $startTag, $offset);
|
||||
}
|
||||
|
||||
// Add in the final substring after the last end tag.
|
||||
$return .= substr($input, $offset);
|
||||
|
||||
return $return;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer\Parser;
|
||||
|
||||
use Joomla\Component\Finder\Administrator\Indexer\Parser;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* RTF Parser class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
class Rtf extends Parser
|
||||
{
|
||||
/**
|
||||
* Method to process RTF input and extract the plain text.
|
||||
*
|
||||
* @param string $input The input to process.
|
||||
*
|
||||
* @return string The plain text input.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function process($input)
|
||||
{
|
||||
// Remove embedded pictures.
|
||||
$input = preg_replace('#{\\\pict[^}]*}#mi', '', $input);
|
||||
|
||||
// Remove control characters.
|
||||
$input = str_replace(['{', '}', "\\\n"], [' ', ' ', "\n"], $input);
|
||||
$input = preg_replace('#\\\([^;]+?);#m', ' ', $input);
|
||||
$input = preg_replace('#\\\[\'a-zA-Z0-9]+#mi', ' ', $input);
|
||||
|
||||
return $input;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer\Parser;
|
||||
|
||||
use Joomla\Component\Finder\Administrator\Indexer\Parser;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Text Parser class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
class Txt extends Parser
|
||||
{
|
||||
/**
|
||||
* Method to process Text input and extract the plain text.
|
||||
*
|
||||
* @param string $input The input to process.
|
||||
*
|
||||
* @return string The plain text input.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
protected function process($input)
|
||||
{
|
||||
return $input;
|
||||
}
|
||||
}
|
||||
1340
administrator/components/com_finder/src/Indexer/Query.php
Normal file
1340
administrator/components/com_finder/src/Indexer/Query.php
Normal file
File diff suppressed because it is too large
Load Diff
582
administrator/components/com_finder/src/Indexer/Result.php
Normal file
582
administrator/components/com_finder/src/Indexer/Result.php
Normal file
@ -0,0 +1,582 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\CMS\Component\ComponentHelper;
|
||||
use Joomla\CMS\Tree\ImmutableNodeInterface;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Result class for the Finder indexer package.
|
||||
*
|
||||
* This class uses magic __get() and __set() methods to prevent properties
|
||||
* being added that might confuse the system. All properties not explicitly
|
||||
* declared will be pushed into the elements array and can be accessed
|
||||
* explicitly using the getElement() method.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
class Result implements \Serializable
|
||||
{
|
||||
/**
|
||||
* An array of extra result properties.
|
||||
*
|
||||
* @var array
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $elements = [];
|
||||
|
||||
/**
|
||||
* This array tells the indexer which properties should be indexed and what
|
||||
* weights to use for those properties.
|
||||
*
|
||||
* @var array
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $instructions = [
|
||||
Indexer::TITLE_CONTEXT => ['title', 'subtitle', 'id'],
|
||||
Indexer::TEXT_CONTEXT => ['summary', 'body'],
|
||||
Indexer::META_CONTEXT => ['meta', 'list_price', 'sale_price'],
|
||||
Indexer::PATH_CONTEXT => ['path', 'alias'],
|
||||
Indexer::MISC_CONTEXT => ['comments'],
|
||||
];
|
||||
|
||||
/**
|
||||
* The indexer will use this data to create taxonomy mapping entries for
|
||||
* the item so that it can be filtered by type, label, category,
|
||||
* or whatever.
|
||||
*
|
||||
* @var array
|
||||
* @since 2.5
|
||||
*/
|
||||
protected $taxonomy = [];
|
||||
|
||||
/**
|
||||
* The content URL.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $url;
|
||||
|
||||
/**
|
||||
* The content route.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $route;
|
||||
|
||||
/**
|
||||
* The content title.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $title;
|
||||
|
||||
/**
|
||||
* The content description.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $description;
|
||||
|
||||
/**
|
||||
* The published state of the result.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
public $published;
|
||||
|
||||
/**
|
||||
* The content published state.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
public $state;
|
||||
|
||||
/**
|
||||
* The content access level.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
public $access;
|
||||
|
||||
/**
|
||||
* The content language.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $language = '*';
|
||||
|
||||
/**
|
||||
* The publishing start date.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $publish_start_date;
|
||||
|
||||
/**
|
||||
* The publishing end date.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $publish_end_date;
|
||||
|
||||
/**
|
||||
* The generic start date.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $start_date;
|
||||
|
||||
/**
|
||||
* The generic end date.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $end_date;
|
||||
|
||||
/**
|
||||
* The item list price.
|
||||
*
|
||||
* @var mixed
|
||||
* @since 2.5
|
||||
*/
|
||||
public $list_price;
|
||||
|
||||
/**
|
||||
* The item sale price.
|
||||
*
|
||||
* @var mixed
|
||||
* @since 2.5
|
||||
*/
|
||||
public $sale_price;
|
||||
|
||||
/**
|
||||
* The content type id. This is set by the adapter.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
public $type_id;
|
||||
|
||||
/**
|
||||
* The default language for content.
|
||||
*
|
||||
* @var string
|
||||
* @since 3.0.2
|
||||
*/
|
||||
public $defaultLanguage;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @since 3.0.3
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->defaultLanguage = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
|
||||
}
|
||||
|
||||
/**
|
||||
* The magic set method is used to push additional values into the elements
|
||||
* array in order to preserve the cleanliness of the object.
|
||||
*
|
||||
* @param string $name The name of the element.
|
||||
* @param mixed $value The value of the element.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function __set($name, $value)
|
||||
{
|
||||
$this->setElement($name, $value);
|
||||
}
|
||||
|
||||
/**
|
||||
* The magic get method is used to retrieve additional element values from the elements array.
|
||||
*
|
||||
* @param string $name The name of the element.
|
||||
*
|
||||
* @return mixed The value of the element if set, null otherwise.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function __get($name)
|
||||
{
|
||||
return $this->getElement($name);
|
||||
}
|
||||
|
||||
/**
|
||||
* The magic isset method is used to check the state of additional element values in the elements array.
|
||||
*
|
||||
* @param string $name The name of the element.
|
||||
*
|
||||
* @return boolean True if set, false otherwise.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function __isset($name)
|
||||
{
|
||||
return isset($this->elements[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* The magic unset method is used to unset additional element values in the elements array.
|
||||
*
|
||||
* @param string $name The name of the element.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function __unset($name)
|
||||
{
|
||||
unset($this->elements[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to retrieve additional element values from the elements array.
|
||||
*
|
||||
* @param string $name The name of the element.
|
||||
*
|
||||
* @return mixed The value of the element if set, null otherwise.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function getElement($name)
|
||||
{
|
||||
// Get the element value if set.
|
||||
if (\array_key_exists($name, $this->elements)) {
|
||||
return $this->elements[$name];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to retrieve all elements.
|
||||
*
|
||||
* @return array The elements
|
||||
*
|
||||
* @since 3.8.3
|
||||
*/
|
||||
public function getElements()
|
||||
{
|
||||
return $this->elements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to set additional element values in the elements array.
|
||||
*
|
||||
* @param string $name The name of the element.
|
||||
* @param mixed $value The value of the element.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function setElement($name, $value)
|
||||
{
|
||||
$this->elements[$name] = $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get all processing instructions.
|
||||
*
|
||||
* @return array An array of processing instructions.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function getInstructions()
|
||||
{
|
||||
return $this->instructions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add a processing instruction for an item property.
|
||||
*
|
||||
* @param string $group The group to associate the property with.
|
||||
* @param string $property The property to process.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function addInstruction($group, $property)
|
||||
{
|
||||
// Check if the group exists. We can't add instructions for unknown groups.
|
||||
// Check if the property exists in the group.
|
||||
if (\array_key_exists($group, $this->instructions) && !\in_array($property, $this->instructions[$group], true)) {
|
||||
// Add the property to the group.
|
||||
$this->instructions[$group][] = $property;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove a processing instruction for an item property.
|
||||
*
|
||||
* @param string $group The group to associate the property with.
|
||||
* @param string $property The property to process.
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function removeInstruction($group, $property)
|
||||
{
|
||||
// Check if the group exists. We can't remove instructions for unknown groups.
|
||||
if (\array_key_exists($group, $this->instructions)) {
|
||||
// Search for the property in the group.
|
||||
$key = array_search($property, $this->instructions[$group]);
|
||||
|
||||
// If the property was found, remove it.
|
||||
if ($key !== false) {
|
||||
unset($this->instructions[$group][$key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the taxonomy maps for an item.
|
||||
*
|
||||
* @param string $branch The taxonomy branch to get. [optional]
|
||||
*
|
||||
* @return array An array of taxonomy maps.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function getTaxonomy($branch = null)
|
||||
{
|
||||
// Get the taxonomy branch if available.
|
||||
if ($branch !== null && isset($this->taxonomy[$branch])) {
|
||||
return $this->taxonomy[$branch];
|
||||
}
|
||||
|
||||
return $this->taxonomy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add a taxonomy map for an item.
|
||||
*
|
||||
* @param string $branch The title of the taxonomy branch to add the node to.
|
||||
* @param string $title The title of the taxonomy node.
|
||||
* @param integer $state The published state of the taxonomy node. [optional]
|
||||
* @param integer $access The access level of the taxonomy node. [optional]
|
||||
* @param string $language The language of the taxonomy. [optional]
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function addTaxonomy($branch, $title, $state = 1, $access = 1, $language = '*')
|
||||
{
|
||||
// We can't add taxonomies with empty titles
|
||||
if (!trim($title)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Filter the input.
|
||||
$branch = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,_]+#mui', ' ', $branch);
|
||||
|
||||
// Create the taxonomy node.
|
||||
$node = new \stdClass();
|
||||
$node->title = $title;
|
||||
$node->state = (int) $state;
|
||||
$node->access = (int) $access;
|
||||
$node->language = $language;
|
||||
$node->nested = false;
|
||||
|
||||
// Add the node to the taxonomy branch.
|
||||
$this->taxonomy[$branch][] = $node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add a nested taxonomy map for an item.
|
||||
*
|
||||
* @param string $branch The title of the taxonomy branch to add the node to.
|
||||
* @param ImmutableNodeInterface $contentNode The node object.
|
||||
* @param integer $state The published state of the taxonomy node. [optional]
|
||||
* @param integer $access The access level of the taxonomy node. [optional]
|
||||
* @param string $language The language of the taxonomy. [optional]
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function addNestedTaxonomy($branch, ImmutableNodeInterface $contentNode, $state = 1, $access = 1, $language = '*')
|
||||
{
|
||||
// We can't add taxonomies with empty titles
|
||||
if (!trim($contentNode->title)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Filter the input.
|
||||
$branch = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,_]+#mui', ' ', $branch);
|
||||
|
||||
// Create the taxonomy node.
|
||||
$node = new \stdClass();
|
||||
$node->title = $contentNode->title;
|
||||
$node->state = (int) $state;
|
||||
$node->access = (int) $access;
|
||||
$node->language = $language;
|
||||
$node->nested = true;
|
||||
$node->node = $contentNode;
|
||||
|
||||
// Add the node to the taxonomy branch.
|
||||
$this->taxonomy[$branch][] = $node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to set the item language
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 3.0
|
||||
*/
|
||||
public function setLanguage()
|
||||
{
|
||||
if ($this->language == '') {
|
||||
$this->language = $this->defaultLanguage;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to serialise the data of a Result object
|
||||
*
|
||||
* @return string The serialised data
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function serialize()
|
||||
{
|
||||
return serialize($this->__serialize());
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to unserialise the data for this object
|
||||
*
|
||||
* @param string $serialized Serialised data to unserialise
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public function unserialize($serialized): void
|
||||
{
|
||||
$this->__unserialize(unserialize($serialized));
|
||||
}
|
||||
|
||||
/**
|
||||
* Magic method used for serializing.
|
||||
*
|
||||
* @since 4.1.3
|
||||
*/
|
||||
public function __serialize(): array
|
||||
{
|
||||
$taxonomy = [];
|
||||
|
||||
foreach ($this->taxonomy as $branch => $nodes) {
|
||||
$taxonomy[$branch] = [];
|
||||
|
||||
foreach ($nodes as $node) {
|
||||
if ($node->nested) {
|
||||
$n = clone $node;
|
||||
unset($n->node);
|
||||
$taxonomy[$branch][] = $n;
|
||||
} else {
|
||||
$taxonomy[$branch][] = $node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This order must match EXACTLY the order of the $properties in the self::__unserialize method
|
||||
return [
|
||||
$this->access,
|
||||
$this->defaultLanguage,
|
||||
$this->description,
|
||||
$this->elements,
|
||||
$this->end_date,
|
||||
$this->instructions,
|
||||
$this->language,
|
||||
$this->list_price,
|
||||
$this->publish_end_date,
|
||||
$this->publish_start_date,
|
||||
$this->published,
|
||||
$this->route,
|
||||
$this->sale_price,
|
||||
$this->start_date,
|
||||
$this->state,
|
||||
$taxonomy,
|
||||
$this->title,
|
||||
$this->type_id,
|
||||
$this->url,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Magic method used for unserializing.
|
||||
*
|
||||
* @since 4.1.3
|
||||
*/
|
||||
public function __unserialize(array $serialized): void
|
||||
{
|
||||
// This order must match EXACTLY the order of the array in the self::__serialize method
|
||||
$properties = [
|
||||
'access',
|
||||
'defaultLanguage',
|
||||
'description',
|
||||
'elements',
|
||||
'end_date',
|
||||
'instructions',
|
||||
'language',
|
||||
'list_price',
|
||||
'publish_end_date',
|
||||
'publish_start_date',
|
||||
'published',
|
||||
'route',
|
||||
'sale_price',
|
||||
'start_date',
|
||||
'state',
|
||||
'taxonomy',
|
||||
'title',
|
||||
'type_id',
|
||||
'url',
|
||||
];
|
||||
|
||||
foreach ($properties as $k => $v) {
|
||||
$this->$v = $serialized[$k];
|
||||
}
|
||||
|
||||
foreach ($this->taxonomy as $nodes) {
|
||||
foreach ($nodes as $node) {
|
||||
$curTaxonomy = Taxonomy::getTaxonomy($node->id);
|
||||
$node->state = $curTaxonomy->state;
|
||||
$node->access = $curTaxonomy->access;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
514
administrator/components/com_finder/src/Indexer/Taxonomy.php
Normal file
514
administrator/components/com_finder/src/Indexer/Taxonomy.php
Normal file
@ -0,0 +1,514 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\CMS\Factory;
|
||||
use Joomla\CMS\Tree\NodeInterface;
|
||||
use Joomla\Component\Finder\Administrator\Table\MapTable;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Taxonomy base class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
class Taxonomy
|
||||
{
|
||||
/**
|
||||
* An internal cache of taxonomy data.
|
||||
*
|
||||
* @var object[]
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public static $taxonomies = [];
|
||||
|
||||
/**
|
||||
* An internal cache of branch data.
|
||||
*
|
||||
* @var object[]
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public static $branches = [];
|
||||
|
||||
/**
|
||||
* An internal cache of taxonomy node data for inserting it.
|
||||
*
|
||||
* @var object[]
|
||||
* @since 2.5
|
||||
*/
|
||||
public static $nodes = [];
|
||||
|
||||
/**
|
||||
* Method to add a branch to the taxonomy tree.
|
||||
*
|
||||
* @param string $title The title of the branch.
|
||||
* @param integer $state The published state of the branch. [optional]
|
||||
* @param integer $access The access state of the branch. [optional]
|
||||
*
|
||||
* @return integer The id of the branch.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function addBranch($title, $state = 1, $access = 1)
|
||||
{
|
||||
$node = new \stdClass();
|
||||
$node->title = $title;
|
||||
$node->access = $access;
|
||||
$node->parent_id = 1;
|
||||
$node->language = '*';
|
||||
|
||||
return self::storeNode($node, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add a node to the taxonomy tree.
|
||||
*
|
||||
* @param string $branch The title of the branch to store the node in.
|
||||
* @param string $title The title of the node.
|
||||
* @param integer $state The published state of the node. [optional]
|
||||
* @param integer $access The access state of the node. [optional]
|
||||
* @param string $language The language of the node. [optional]
|
||||
*
|
||||
* @return integer The id of the node.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function addNode($branch, $title, $state = 1, $access = 1, $language = '*')
|
||||
{
|
||||
if ($state != 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get the branch id, insert it if it does not exist.
|
||||
$branchId = static::addBranch($branch);
|
||||
|
||||
$node = new \stdClass();
|
||||
$node->title = $title;
|
||||
$node->access = $access;
|
||||
$node->parent_id = $branchId;
|
||||
$node->language = $language;
|
||||
|
||||
return self::storeNode($node, $branchId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add a nested node to the taxonomy tree.
|
||||
*
|
||||
* @param string $branch The title of the branch to store the node in.
|
||||
* @param NodeInterface $node The source-node of the taxonomy node.
|
||||
* @param integer $state The published state of the node. [optional]
|
||||
* @param integer $access The access state of the node. [optional]
|
||||
* @param string $language The language of the node. [optional]
|
||||
* @param integer $branchId ID of a branch if known. [optional]
|
||||
*
|
||||
* @return integer The id of the node.
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public static function addNestedNode($branch, NodeInterface $node, $state = 1, $access = 1, $language = '*', $branchId = null)
|
||||
{
|
||||
if ($state != 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!$branchId) {
|
||||
// Get the branch id, insert it if it does not exist.
|
||||
$branchId = static::addBranch($branch);
|
||||
}
|
||||
|
||||
$parent = $node->getParent();
|
||||
|
||||
$pstate = $node->state ?? ($node->published ?? $state);
|
||||
$paccess = $node->access ?? $access;
|
||||
$planguage = $node->language ?? $language;
|
||||
|
||||
if ($parent && $parent->title != 'ROOT') {
|
||||
$parentId = self::addNestedNode($branch, $parent, $pstate, $paccess, $planguage, $branchId);
|
||||
} else {
|
||||
$parentId = $branchId;
|
||||
}
|
||||
|
||||
if (!$parentId) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$temp = new \stdClass();
|
||||
$temp->title = $node->title;
|
||||
$temp->access = $access;
|
||||
$temp->parent_id = $parentId;
|
||||
$temp->language = $language;
|
||||
|
||||
return self::storeNode($temp, $parentId);
|
||||
}
|
||||
|
||||
/**
|
||||
* A helper method to store a node in the taxonomy
|
||||
*
|
||||
* @param object $node The node data to include
|
||||
* @param integer $parentId The parent id of the node to add.
|
||||
*
|
||||
* @return integer The id of the inserted node.
|
||||
*
|
||||
* @since 4.0.0
|
||||
* @throws \RuntimeException
|
||||
*/
|
||||
protected static function storeNode($node, $parentId)
|
||||
{
|
||||
// Check to see if the node is in the cache.
|
||||
if (isset(static::$nodes[$parentId . ':' . $node->title])) {
|
||||
return static::$nodes[$parentId . ':' . $node->title]->id;
|
||||
}
|
||||
|
||||
// Check to see if the node is in the table.
|
||||
$db = Factory::getDbo();
|
||||
$query = $db->getQuery(true)
|
||||
->select('*')
|
||||
->from($db->quoteName('#__finder_taxonomy'))
|
||||
->where($db->quoteName('parent_id') . ' = ' . $db->quote($parentId))
|
||||
->where($db->quoteName('title') . ' = ' . $db->quote($node->title))
|
||||
->where($db->quoteName('language') . ' = ' . $db->quote($node->language));
|
||||
|
||||
$db->setQuery($query);
|
||||
|
||||
// Get the result.
|
||||
$result = $db->loadObject();
|
||||
|
||||
// Check if the database matches the input data.
|
||||
if ((bool) $result && $result->access == $node->access) {
|
||||
// The data matches, add the item to the cache.
|
||||
static::$nodes[$parentId . ':' . $node->title] = $result;
|
||||
|
||||
return static::$nodes[$parentId . ':' . $node->title]->id;
|
||||
}
|
||||
|
||||
/*
|
||||
* The database did not match the input. This could be because the
|
||||
* state has changed or because the node does not exist. Let's figure
|
||||
* out which case is true and deal with it.
|
||||
* @todo: use factory?
|
||||
*/
|
||||
$nodeTable = new MapTable($db);
|
||||
|
||||
if (empty($result)) {
|
||||
// Prepare the node object.
|
||||
$nodeTable->title = $node->title;
|
||||
$nodeTable->access = (int) $node->access;
|
||||
$nodeTable->language = $node->language;
|
||||
$nodeTable->setLocation((int) $parentId, 'last-child');
|
||||
} else {
|
||||
// Prepare the node object.
|
||||
$nodeTable->id = (int) $result->id;
|
||||
$nodeTable->title = $result->title;
|
||||
$nodeTable->access = (int) $result->access;
|
||||
$nodeTable->language = $node->language;
|
||||
$nodeTable->setLocation($result->parent_id, 'last-child');
|
||||
}
|
||||
|
||||
// Check the data.
|
||||
if (!$nodeTable->check()) {
|
||||
$error = $nodeTable->getError();
|
||||
|
||||
if ($error instanceof \Exception) {
|
||||
// \Joomla\CMS\Table\NestedTable sets errors of exceptions, so in this case we can pass on more
|
||||
// information
|
||||
throw new \RuntimeException(
|
||||
$error->getMessage(),
|
||||
$error->getCode(),
|
||||
$error
|
||||
);
|
||||
}
|
||||
|
||||
// Standard string returned. Probably from the \Joomla\CMS\Table\Table class
|
||||
throw new \RuntimeException($error, 500);
|
||||
}
|
||||
|
||||
// Store the data.
|
||||
if (!$nodeTable->store()) {
|
||||
$error = $nodeTable->getError();
|
||||
|
||||
if ($error instanceof \Exception) {
|
||||
// \Joomla\CMS\Table\NestedTable sets errors of exceptions, so in this case we can pass on more
|
||||
// information
|
||||
throw new \RuntimeException(
|
||||
$error->getMessage(),
|
||||
$error->getCode(),
|
||||
$error
|
||||
);
|
||||
}
|
||||
|
||||
// Standard string returned. Probably from the \Joomla\CMS\Table\Table class
|
||||
throw new \RuntimeException($error, 500);
|
||||
}
|
||||
|
||||
$nodeTable->rebuildPath($nodeTable->id);
|
||||
|
||||
// Add the node to the cache.
|
||||
static::$nodes[$parentId . ':' . $nodeTable->title] = (object) $nodeTable->getProperties();
|
||||
|
||||
return static::$nodes[$parentId . ':' . $nodeTable->title]->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add a map entry between a link and a taxonomy node.
|
||||
*
|
||||
* @param integer $linkId The link to map to.
|
||||
* @param integer $nodeId The node to map to.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function addMap($linkId, $nodeId)
|
||||
{
|
||||
// Insert the map.
|
||||
$db = Factory::getDbo();
|
||||
|
||||
$query = $db->getQuery(true)
|
||||
->select($db->quoteName('link_id'))
|
||||
->from($db->quoteName('#__finder_taxonomy_map'))
|
||||
->where($db->quoteName('link_id') . ' = ' . (int) $linkId)
|
||||
->where($db->quoteName('node_id') . ' = ' . (int) $nodeId);
|
||||
$db->setQuery($query);
|
||||
$db->execute();
|
||||
$id = (int) $db->loadResult();
|
||||
|
||||
if (!$id) {
|
||||
$map = new \stdClass();
|
||||
$map->link_id = (int) $linkId;
|
||||
$map->node_id = (int) $nodeId;
|
||||
$db->insertObject('#__finder_taxonomy_map', $map);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to get the title of all taxonomy branches.
|
||||
*
|
||||
* @return array An array of branch titles.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function getBranchTitles()
|
||||
{
|
||||
$db = Factory::getDbo();
|
||||
|
||||
// Set user variables
|
||||
$groups = implode(',', Factory::getUser()->getAuthorisedViewLevels());
|
||||
|
||||
// Create a query to get the taxonomy branch titles.
|
||||
$query = $db->getQuery(true)
|
||||
->select($db->quoteName('title'))
|
||||
->from($db->quoteName('#__finder_taxonomy'))
|
||||
->where($db->quoteName('parent_id') . ' = 1')
|
||||
->where($db->quoteName('state') . ' = 1')
|
||||
->where($db->quoteName('access') . ' IN (' . $groups . ')');
|
||||
|
||||
// Get the branch titles.
|
||||
$db->setQuery($query);
|
||||
|
||||
return $db->loadColumn();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to find a taxonomy node in a branch.
|
||||
*
|
||||
* @param string $branch The branch to search.
|
||||
* @param string $title The title of the node.
|
||||
*
|
||||
* @return mixed Integer id on success, null on no match.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function getNodeByTitle($branch, $title)
|
||||
{
|
||||
$db = Factory::getDbo();
|
||||
|
||||
// Set user variables
|
||||
$groups = implode(',', Factory::getUser()->getAuthorisedViewLevels());
|
||||
|
||||
// Create a query to get the node.
|
||||
$query = $db->getQuery(true)
|
||||
->select('t1.*')
|
||||
->from($db->quoteName('#__finder_taxonomy') . ' AS t1')
|
||||
->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id')
|
||||
->where('t1.access IN (' . $groups . ')')
|
||||
->where('t1.state = 1')
|
||||
->where('t1.title LIKE ' . $db->quote($db->escape($title) . '%'))
|
||||
->where('t2.access IN (' . $groups . ')')
|
||||
->where('t2.state = 1')
|
||||
->where('t2.title = ' . $db->quote($branch));
|
||||
|
||||
// Get the node.
|
||||
$query->setLimit(1);
|
||||
$db->setQuery($query);
|
||||
|
||||
return $db->loadObject();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove map entries for a link.
|
||||
*
|
||||
* @param integer $linkId The link to remove.
|
||||
*
|
||||
* @return boolean True on success.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function removeMaps($linkId)
|
||||
{
|
||||
// Delete the maps.
|
||||
$db = Factory::getDbo();
|
||||
$query = $db->getQuery(true)
|
||||
->delete($db->quoteName('#__finder_taxonomy_map'))
|
||||
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
|
||||
$db->setQuery($query);
|
||||
$db->execute();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove orphaned taxonomy maps
|
||||
*
|
||||
* @return integer The number of deleted rows.
|
||||
*
|
||||
* @since 4.2.0
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function removeOrphanMaps()
|
||||
{
|
||||
// Delete all orphaned maps
|
||||
$db = Factory::getDbo();
|
||||
$query2 = $db->getQuery(true)
|
||||
->select($db->quoteName('link_id'))
|
||||
->from($db->quoteName('#__finder_links'));
|
||||
$query = $db->getQuery(true)
|
||||
->delete($db->quoteName('#__finder_taxonomy_map'))
|
||||
->where($db->quoteName('link_id') . ' NOT IN (' . $query2 . ')');
|
||||
$db->setQuery($query);
|
||||
$db->execute();
|
||||
$count = $db->getAffectedRows();
|
||||
|
||||
return $count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to remove orphaned taxonomy nodes and branches.
|
||||
*
|
||||
* @return integer The number of deleted rows.
|
||||
*
|
||||
* @since 2.5
|
||||
* @throws \RuntimeException on database error.
|
||||
*/
|
||||
public static function removeOrphanNodes()
|
||||
{
|
||||
// Delete all orphaned nodes.
|
||||
$affectedRows = 0;
|
||||
$db = Factory::getDbo();
|
||||
$nodeTable = new MapTable($db);
|
||||
$query = $db->getQuery(true);
|
||||
|
||||
$query->select($db->quoteName('t.id'))
|
||||
->from($db->quoteName('#__finder_taxonomy', 't'))
|
||||
->join('LEFT', $db->quoteName('#__finder_taxonomy_map', 'm') . ' ON ' . $db->quoteName('m.node_id') . '=' . $db->quoteName('t.id'))
|
||||
->where($db->quoteName('t.parent_id') . ' > 1 ')
|
||||
->where('t.lft + 1 = t.rgt')
|
||||
->where($db->quoteName('m.link_id') . ' IS NULL');
|
||||
|
||||
do {
|
||||
$db->setQuery($query);
|
||||
$nodes = $db->loadColumn();
|
||||
|
||||
foreach ($nodes as $node) {
|
||||
$nodeTable->delete($node);
|
||||
$affectedRows++;
|
||||
}
|
||||
} while ($nodes);
|
||||
|
||||
return $affectedRows;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a taxonomy based on its id or all taxonomies
|
||||
*
|
||||
* @param integer $id Id of the taxonomy
|
||||
*
|
||||
* @return object|object[] A taxonomy object or an array of all taxonomies
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public static function getTaxonomy($id = 0)
|
||||
{
|
||||
if (!\count(self::$taxonomies)) {
|
||||
$db = Factory::getDbo();
|
||||
$query = $db->getQuery(true);
|
||||
|
||||
$query->select(['id','parent_id','lft','rgt','level','path','title','alias','state','access','language'])
|
||||
->from($db->quoteName('#__finder_taxonomy'))
|
||||
->order($db->quoteName('lft'));
|
||||
|
||||
$db->setQuery($query);
|
||||
self::$taxonomies = $db->loadObjectList('id');
|
||||
}
|
||||
|
||||
if ($id == 0) {
|
||||
return self::$taxonomies;
|
||||
}
|
||||
|
||||
if (isset(self::$taxonomies[$id])) {
|
||||
return self::$taxonomies[$id];
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a taxonomy branch object based on its title or all branches
|
||||
*
|
||||
* @param string $title Title of the branch
|
||||
*
|
||||
* @return object|object[] The object with the branch data or an array of all branches
|
||||
*
|
||||
* @since 4.0.0
|
||||
*/
|
||||
public static function getBranch($title = '')
|
||||
{
|
||||
if (!\count(self::$branches)) {
|
||||
$taxonomies = self::getTaxonomy();
|
||||
|
||||
foreach ($taxonomies as $t) {
|
||||
if ($t->level == 1) {
|
||||
self::$branches[$t->title] = $t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($title == '') {
|
||||
return self::$branches;
|
||||
}
|
||||
|
||||
if (isset(self::$branches[$title])) {
|
||||
return self::$branches[$title];
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
186
administrator/components/com_finder/src/Indexer/Token.php
Normal file
186
administrator/components/com_finder/src/Indexer/Token.php
Normal file
@ -0,0 +1,186 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package Joomla.Administrator
|
||||
* @subpackage com_finder
|
||||
*
|
||||
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
|
||||
* @license GNU General Public License version 2 or later; see LICENSE.txt
|
||||
*/
|
||||
|
||||
namespace Joomla\Component\Finder\Administrator\Indexer;
|
||||
|
||||
use Joomla\String\StringHelper;
|
||||
|
||||
// phpcs:disable PSR1.Files.SideEffects
|
||||
\defined('_JEXEC') or die;
|
||||
// phpcs:enable PSR1.Files.SideEffects
|
||||
|
||||
/**
|
||||
* Token class for the Finder indexer package.
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
class Token
|
||||
{
|
||||
/**
|
||||
* This is the term that will be referenced in the terms table and the
|
||||
* mapping tables.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $term;
|
||||
|
||||
/**
|
||||
* The stem is used to match the root term and produce more potential
|
||||
* matches when searching the index.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $stem;
|
||||
|
||||
/**
|
||||
* If the token is numeric, it is likely to be short and uncommon so the
|
||||
* weight is adjusted to compensate for that situation.
|
||||
*
|
||||
* @var boolean
|
||||
* @since 2.5
|
||||
*/
|
||||
public $numeric;
|
||||
|
||||
/**
|
||||
* If the token is a common term, the weight is adjusted to compensate for
|
||||
* the higher frequency of the term in relation to other terms.
|
||||
*
|
||||
* @var boolean
|
||||
* @since 2.5
|
||||
*/
|
||||
public $common;
|
||||
|
||||
/**
|
||||
* Flag for phrase tokens.
|
||||
*
|
||||
* @var boolean
|
||||
* @since 2.5
|
||||
*/
|
||||
public $phrase;
|
||||
|
||||
/**
|
||||
* The length is used to calculate the weight of the token.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
public $length;
|
||||
|
||||
/**
|
||||
* The weight is calculated based on token size and whether the token is
|
||||
* considered a common term.
|
||||
*
|
||||
* @var integer
|
||||
* @since 2.5
|
||||
*/
|
||||
public $weight;
|
||||
|
||||
/**
|
||||
* The simple language identifier for the token.
|
||||
*
|
||||
* @var string
|
||||
* @since 2.5
|
||||
*/
|
||||
public $language;
|
||||
|
||||
/**
|
||||
* The container for matches.
|
||||
*
|
||||
* @var array
|
||||
* @since 3.8.12
|
||||
*/
|
||||
public $matches = [];
|
||||
|
||||
/**
|
||||
* Is derived token (from individual words)
|
||||
*
|
||||
* @var boolean
|
||||
* @since 3.8.12
|
||||
*/
|
||||
public $derived;
|
||||
|
||||
/**
|
||||
* The suggested term
|
||||
*
|
||||
* @var string
|
||||
* @since 3.8.12
|
||||
*/
|
||||
public $suggestion;
|
||||
|
||||
/**
|
||||
* The token required flag
|
||||
*
|
||||
* @var boolean
|
||||
* @since 4.3.0
|
||||
*/
|
||||
public $required;
|
||||
|
||||
/**
|
||||
* Method to construct the token object.
|
||||
*
|
||||
* @param mixed $term The term as a string for words or an array for phrases.
|
||||
* @param string $lang The simple language identifier.
|
||||
* @param string $spacer The space separator for phrases. [optional]
|
||||
*
|
||||
* @since 2.5
|
||||
*/
|
||||
public function __construct($term, $lang, $spacer = ' ')
|
||||
{
|
||||
if (!$lang) {
|
||||
$this->language = '*';
|
||||
} else {
|
||||
$this->language = $lang;
|
||||
}
|
||||
|
||||
// Tokens can be a single word or an array of words representing a phrase.
|
||||
if (\is_array($term)) {
|
||||
// Populate the token instance.
|
||||
$langs = array_fill(0, \count($term), $lang);
|
||||
$this->term = implode($spacer, $term);
|
||||
$this->stem = implode($spacer, array_map([Helper::class, 'stem'], $term, $langs));
|
||||
$this->numeric = false;
|
||||
$this->common = false;
|
||||
$this->phrase = true;
|
||||
$this->length = StringHelper::strlen($this->term);
|
||||
|
||||
/*
|
||||
* Calculate the weight of the token.
|
||||
*
|
||||
* 1. Length of the token up to 30 and divide by 30, add 1.
|
||||
* 2. Round weight to 4 decimal points.
|
||||
*/
|
||||
$this->weight = (min($this->length, 30) / 30) + 1;
|
||||
$this->weight = round($this->weight, 4);
|
||||
} else {
|
||||
// Populate the token instance.
|
||||
$this->term = $term;
|
||||
$this->stem = Helper::stem($this->term, $lang);
|
||||
$this->numeric = (is_numeric($this->term) || (bool) preg_match('#^[0-9,.\-\+]+$#', $this->term));
|
||||
$this->common = $this->numeric ? false : Helper::isCommon($this->term, $lang);
|
||||
$this->phrase = false;
|
||||
$this->length = StringHelper::strlen($this->term);
|
||||
|
||||
/*
|
||||
* Calculate the weight of the token.
|
||||
*
|
||||
* 1. Length of the token up to 15 and divide by 15.
|
||||
* 2. If common term, divide weight by 8.
|
||||
* 3. If numeric, multiply weight by 1.5.
|
||||
* 4. Round weight to 4 decimal points.
|
||||
*/
|
||||
$this->weight = min($this->length, 15) / 15;
|
||||
$this->weight = $this->common === true ? $this->weight / 8 : $this->weight;
|
||||
$this->weight = $this->numeric === true ? $this->weight * 1.5 : $this->weight;
|
||||
$this->weight = round($this->weight, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user