primo commit

This commit is contained in:
2024-12-17 17:34:10 +01:00
commit e650f8df99
16435 changed files with 2451012 additions and 0 deletions

View File

@ -0,0 +1,942 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\CMS\Factory;
use Joomla\CMS\Plugin\CMSPlugin;
use Joomla\CMS\Table\Table;
use Joomla\Database\DatabaseInterface;
use Joomla\Database\QueryInterface;
use Joomla\Event\DispatcherInterface;
use Joomla\Utilities\ArrayHelper;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Prototype adapter class for the Finder indexer package.
*
* @since 2.5
*/
abstract class Adapter extends CMSPlugin
{
/**
* The context is somewhat arbitrary but it must be unique or there will be
* conflicts when managing plugin/indexer state. A good best practice is to
* use the plugin name suffix as the context. For example, if the plugin is
* named 'plgFinderContent', the context could be 'Content'.
*
* @var string
* @since 2.5
*/
protected $context;
/**
* The extension name.
*
* @var string
* @since 2.5
*/
protected $extension;
/**
* The sublayout to use when rendering the results.
*
* @var string
* @since 2.5
*/
protected $layout;
/**
* The mime type of the content the adapter indexes.
*
* @var string
* @since 2.5
*/
protected $mime;
/**
* The access level of an item before save.
*
* @var integer
* @since 2.5
*/
protected $old_access;
/**
* The access level of a category before save.
*
* @var integer
* @since 2.5
*/
protected $old_cataccess;
/**
* The type of content the adapter indexes.
*
* @var string
* @since 2.5
*/
protected $type_title;
/**
* The type id of the content.
*
* @var integer
* @since 2.5
*/
protected $type_id;
/**
* The database object.
*
* @var DatabaseInterface
* @since 2.5
*/
protected $db;
/**
* The table name.
*
* @var string
* @since 2.5
*/
protected $table;
/**
* The indexer object.
*
* @var Indexer
* @since 3.0
*/
protected $indexer;
/**
* The field the published state is stored in.
*
* @var string
* @since 2.5
*/
protected $state_field = 'state';
/**
* Method to instantiate the indexer adapter.
*
* @param DispatcherInterface $dispatcher The object to observe.
* @param array $config An array that holds the plugin configuration.
*
* @since 2.5
*/
public function __construct(DispatcherInterface $dispatcher, array $config)
{
// Call the parent constructor.
parent::__construct($dispatcher, $config);
// Get the type id.
$this->type_id = $this->getTypeId();
// Add the content type if it doesn't exist and is set.
if (empty($this->type_id) && !empty($this->type_title)) {
$this->type_id = Helper::addContentType($this->type_title, $this->mime);
}
// Check for a layout override.
if ($this->params->get('layout')) {
$this->layout = $this->params->get('layout');
}
// Get the indexer object
$this->indexer = new Indexer($this->db);
}
/**
* Returns an array of events this subscriber will listen to.
*
* @return array
*
* @since 5.0.0
*/
public static function getSubscribedEvents(): array
{
return [
'onBeforeIndex' => 'onBeforeIndex',
'onBuildIndex' => 'onBuildIndex',
'onFinderGarbageCollection' => 'onFinderGarbageCollection',
'onStartIndex' => 'onStartIndex',
];
}
/**
* Method to get the adapter state and push it into the indexer.
*
* @return void
*
* @since 2.5
* @throws \Exception on error.
*/
public function onStartIndex()
{
// Get the indexer state.
$iState = Indexer::getState();
// Get the number of content items.
$total = (int) $this->getContentCount();
// Add the content count to the total number of items.
$iState->totalItems += $total;
// Populate the indexer state information for the adapter.
$iState->pluginState[$this->context]['total'] = $total;
$iState->pluginState[$this->context]['offset'] = 0;
// Set the indexer state.
Indexer::setState($iState);
}
/**
* Method to prepare for the indexer to be run. This method will often
* be used to include dependencies and things of that nature.
*
* @return boolean True on success.
*
* @since 2.5
* @throws \Exception on error.
*/
public function onBeforeIndex()
{
// Get the indexer and adapter state.
$iState = Indexer::getState();
$aState = $iState->pluginState[$this->context];
// Check the progress of the indexer and the adapter.
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
return true;
}
// Run the setup method.
return $this->setup();
}
/**
* Method to index a batch of content items. This method can be called by
* the indexer many times throughout the indexing process depending on how
* much content is available for indexing. It is important to track the
* progress correctly so we can display it to the user.
*
* @return boolean True on success.
*
* @since 2.5
* @throws \Exception on error.
*/
public function onBuildIndex()
{
// Get the indexer and adapter state.
$iState = Indexer::getState();
$aState = $iState->pluginState[$this->context];
// Check the progress of the indexer and the adapter.
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
return true;
}
// Get the batch offset and size.
$offset = (int) $aState['offset'];
$limit = (int) ($iState->batchSize - $iState->batchOffset);
// Get the content items to index.
$items = $this->getItems($offset, $limit);
// Iterate through the items and index them.
foreach ($items as $item) {
// Index the item.
$this->index($item);
// Adjust the offsets.
$offset++;
$iState->batchOffset++;
$iState->totalItems--;
}
// Update the indexer state.
$aState['offset'] = $offset;
$iState->pluginState[$this->context] = $aState;
Indexer::setState($iState);
return true;
}
/**
* Method to remove outdated index entries
*
* @return integer
*
* @since 4.2.0
*/
public function onFinderGarbageCollection()
{
$db = $this->db;
$type_id = $this->getTypeId();
$query = $db->getQuery(true);
$subquery = $db->getQuery(true);
$subquery->select('CONCAT(' . $db->quote($this->getUrl('', $this->extension, $this->layout)) . ', id)')
->from($db->quoteName($this->table));
$query->select($db->quoteName('l.link_id'))
->from($db->quoteName('#__finder_links', 'l'))
->where($db->quoteName('l.type_id') . ' = ' . $type_id)
->where($db->quoteName('l.url') . ' LIKE ' . $db->quote($this->getUrl('%', $this->extension, $this->layout)))
->where($db->quoteName('l.url') . ' NOT IN (' . $subquery . ')');
$db->setQuery($query);
$items = $db->loadColumn();
foreach ($items as $item) {
$this->indexer->remove($item);
}
return \count($items);
}
/**
* Method to change the value of a content item's property in the links
* table. This is used to synchronize published and access states that
* are changed when not editing an item directly.
*
* @param string $id The ID of the item to change.
* @param string $property The property that is being changed.
* @param integer $value The new value of that property.
*
* @return boolean True on success.
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function change($id, $property, $value)
{
// Check for a property we know how to handle.
if ($property !== 'state' && $property !== 'access') {
return true;
}
// Get the URL for the content id.
$item = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
// Update the content items.
$query = $this->db->getQuery(true)
->update($this->db->quoteName('#__finder_links'))
->set($this->db->quoteName($property) . ' = ' . (int) $value)
->where($this->db->quoteName('url') . ' = ' . $item);
$this->db->setQuery($query);
$this->db->execute();
return true;
}
/**
* Method to index an item.
*
* @param Result $item The item to index as a Result object.
*
* @return boolean True on success.
*
* @since 2.5
* @throws \Exception on database error.
*/
abstract protected function index(Result $item);
/**
* Method to reindex an item.
*
* @param integer $id The ID of the item to reindex.
*
* @return void
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function reindex($id)
{
// Run the setup method.
$this->setup();
// Get the item.
$item = $this->getItem($id);
// Index the item.
$this->index($item);
Taxonomy::removeOrphanNodes();
}
/**
* Method to remove an item from the index.
*
* @param string $id The ID of the item to remove.
* @param bool $removeTaxonomies Remove empty taxonomies
*
* @return boolean True on success.
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function remove($id, $removeTaxonomies = true)
{
// Get the item's URL
$url = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
// Get the link ids for the content items.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('link_id'))
->from($this->db->quoteName('#__finder_links'))
->where($this->db->quoteName('url') . ' = ' . $url);
$this->db->setQuery($query);
$items = $this->db->loadColumn();
// Check the items.
if (empty($items)) {
Factory::getApplication()->triggerEvent('onFinderIndexAfterDelete', [$id]);
return true;
}
// Remove the items.
foreach ($items as $item) {
$this->indexer->remove($item, $removeTaxonomies);
}
return true;
}
/**
* Method to setup the adapter before indexing.
*
* @return boolean True on success, false on failure.
*
* @since 2.5
* @throws \Exception on database error.
*/
abstract protected function setup();
/**
* Method to update index data on category access level changes
*
* @param Table $row A Table object
*
* @return void
*
* @since 2.5
*/
protected function categoryAccessChange($row)
{
$query = clone $this->getStateQuery();
$query->where('c.id = ' . (int) $row->id);
// Get the access level.
$this->db->setQuery($query);
$items = $this->db->loadObjectList();
// Adjust the access level for each item within the category.
foreach ($items as $item) {
// Set the access level.
$temp = max($item->access, $row->access);
// Update the item.
$this->change((int) $item->id, 'access', $temp);
}
}
/**
* Method to update index data on category access level changes
*
* @param array $pks A list of primary key ids of the content that has changed state.
* @param integer $value The value of the state that the content has been changed to.
*
* @return void
*
* @since 2.5
*/
protected function categoryStateChange($pks, $value)
{
/*
* The item's published state is tied to the category
* published state so we need to look up all published states
* before we change anything.
*/
foreach ($pks as $pk) {
$query = clone $this->getStateQuery();
$query->where('c.id = ' . (int) $pk);
// Get the published states.
$this->db->setQuery($query);
$items = $this->db->loadObjectList();
// Adjust the state for each item within the category.
foreach ($items as $item) {
// Translate the state.
$temp = $this->translateState($item->state, $value);
// Update the item.
$this->change($item->id, 'state', $temp);
}
}
}
/**
* Method to check the existing access level for categories
*
* @param Table $row A Table object
*
* @return void
*
* @since 2.5
*/
protected function checkCategoryAccess($row)
{
$query = $this->db->getQuery(true)
->select($this->db->quoteName('access'))
->from($this->db->quoteName('#__categories'))
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
$this->db->setQuery($query);
// Store the access level to determine if it changes
$this->old_cataccess = $this->db->loadResult();
}
/**
* Method to check the existing access level for items
*
* @param Table $row A Table object
*
* @return void
*
* @since 2.5
*/
protected function checkItemAccess($row)
{
$query = $this->db->getQuery(true)
->select($this->db->quoteName('access'))
->from($this->db->quoteName($this->table))
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
$this->db->setQuery($query);
// Store the access level to determine if it changes
$this->old_access = $this->db->loadResult();
}
/**
* Method to get the number of content items available to index.
*
* @return integer The number of content items available to index.
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function getContentCount()
{
$return = 0;
// Get the list query.
$query = $this->getListQuery();
// Check if the query is valid.
if (empty($query)) {
return $return;
}
// Tweak the SQL query to make the total lookup faster.
if ($query instanceof QueryInterface) {
$query = clone $query;
$query->clear('select')
->select('COUNT(*)')
->clear('order');
}
// Get the total number of content items to index.
$this->db->setQuery($query);
return (int) $this->db->loadResult();
}
/**
* Method to get a content item to index.
*
* @param integer $id The id of the content item.
*
* @return Result A Result object.
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function getItem($id)
{
// Get the list query and add the extra WHERE clause.
$query = $this->getListQuery();
$query->where('a.id = ' . (int) $id);
// Get the item to index.
$this->db->setQuery($query);
$item = $this->db->loadAssoc();
// Convert the item to a result object.
$item = ArrayHelper::toObject((array) $item, Result::class);
// Set the item type.
$item->type_id = $this->type_id;
// Set the item layout.
$item->layout = $this->layout;
return $item;
}
/**
* Method to get a list of content items to index.
*
* @param integer $offset The list offset.
* @param integer $limit The list limit.
* @param QueryInterface $query A QueryInterface object. [optional]
*
* @return Result[] An array of Result objects.
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function getItems($offset, $limit, $query = null)
{
// Get the content items to index.
$this->db->setQuery($this->getListQuery($query)->setLimit($limit, $offset));
$items = $this->db->loadAssocList();
foreach ($items as &$item) {
$item = ArrayHelper::toObject($item, Result::class);
// Set the item type.
$item->type_id = $this->type_id;
// Set the mime type.
$item->mime = $this->mime;
// Set the item layout.
$item->layout = $this->layout;
}
return $items;
}
/**
* Method to get the SQL query used to retrieve the list of content items.
*
* @param mixed $query A QueryInterface object. [optional]
*
* @return QueryInterface A database object.
*
* @since 2.5
*/
protected function getListQuery($query = null)
{
// Check if we can use the supplied SQL query.
return $query instanceof QueryInterface ? $query : $this->db->getQuery(true);
}
/**
* Method to get the plugin type
*
* @param integer $id The plugin ID
*
* @return string|null The plugin type
*
* @since 2.5
*/
protected function getPluginType($id)
{
// Prepare the query
$query = $this->db->getQuery(true)
->select($this->db->quoteName('element'))
->from($this->db->quoteName('#__extensions'))
->where($this->db->quoteName('folder') . ' = ' . $this->db->quote('finder'))
->where($this->db->quoteName('extension_id') . ' = ' . (int) $id);
$this->db->setQuery($query);
return $this->db->loadResult();
}
/**
* Method to get a SQL query to load the published and access states for
* an article and category.
*
* @return QueryInterface A database object.
*
* @since 2.5
*/
protected function getStateQuery()
{
$query = $this->db->getQuery(true);
// Item ID
$query->select('a.id');
// Item and category published state
$query->select('a.' . $this->state_field . ' AS state, c.published AS cat_state');
// Item and category access levels
$query->select('a.access, c.access AS cat_access')
->from($this->table . ' AS a')
->join('LEFT', '#__categories AS c ON c.id = a.catid');
return $query;
}
/**
* Method to get the query clause for getting items to update by time.
*
* @param string $time The modified timestamp.
*
* @return QueryInterface A database object.
*
* @since 2.5
*/
protected function getUpdateQueryByTime($time)
{
// Build an SQL query based on the modified time.
$query = $this->db->getQuery(true)
->where('a.modified >= ' . $this->db->quote($time));
return $query;
}
/**
* Method to get the query clause for getting items to update by id.
*
* @param array $ids The ids to load.
*
* @return QueryInterface A database object.
*
* @since 2.5
*/
protected function getUpdateQueryByIds($ids)
{
// Build an SQL query based on the item ids.
$query = $this->db->getQuery(true)
->where('a.id IN(' . implode(',', $ids) . ')');
return $query;
}
/**
* Method to get the type id for the adapter content.
*
* @return integer The numeric type id for the content.
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function getTypeId()
{
// Get the type id from the database.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('id'))
->from($this->db->quoteName('#__finder_types'))
->where($this->db->quoteName('title') . ' = ' . $this->db->quote($this->type_title));
$this->db->setQuery($query);
return (int) $this->db->loadResult();
}
/**
* Method to get the URL for the item. The URL is how we look up the link
* in the Finder index.
*
* @param integer $id The id of the item.
* @param string $extension The extension the category is in.
* @param string $view The view for the URL.
*
* @return string The URL of the item.
*
* @since 2.5
*/
protected function getUrl($id, $extension, $view)
{
return 'index.php?option=' . $extension . '&view=' . $view . '&id=' . $id;
}
/**
* Method to get the page title of any menu item that is linked to the
* content item, if it exists and is set.
*
* @param string $url The URL of the item.
*
* @return mixed The title on success, null if not found.
*
* @since 2.5
* @throws \Exception on database error.
*/
protected function getItemMenuTitle($url)
{
$return = null;
// Set variables
$user = Factory::getUser();
$groups = implode(',', $user->getAuthorisedViewLevels());
// Build a query to get the menu params.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('params'))
->from($this->db->quoteName('#__menu'))
->where($this->db->quoteName('link') . ' = ' . $this->db->quote($url))
->where($this->db->quoteName('published') . ' = 1')
->where($this->db->quoteName('access') . ' IN (' . $groups . ')');
// Get the menu params from the database.
$this->db->setQuery($query);
$params = $this->db->loadResult();
// Check the results.
if (empty($params)) {
return $return;
}
// Instantiate the params.
$params = json_decode($params);
// Get the page title if it is set.
if (isset($params->page_title) && $params->page_title) {
$return = $params->page_title;
}
return $return;
}
/**
* Method to update index data on access level changes
*
* @param Table $row A Table object
*
* @return void
*
* @since 2.5
*/
protected function itemAccessChange($row)
{
$query = clone $this->getStateQuery();
$query->where('a.id = ' . (int) $row->id);
// Get the access level.
$this->db->setQuery($query);
$item = $this->db->loadObject();
// Set the access level.
$temp = max($row->access, $item->cat_access);
// Update the item.
$this->change((int) $row->id, 'access', $temp);
}
/**
* Method to update index data on published state changes
*
* @param array $pks A list of primary key ids of the content that has changed state.
* @param integer $value The value of the state that the content has been changed to.
*
* @return void
*
* @since 2.5
*/
protected function itemStateChange($pks, $value)
{
/*
* The item's published state is tied to the category
* published state so we need to look up all published states
* before we change anything.
*/
foreach ($pks as $pk) {
$query = clone $this->getStateQuery();
$query->where('a.id = ' . (int) $pk);
// Get the published states.
$this->db->setQuery($query);
$item = $this->db->loadObject();
// Translate the state.
$temp = $this->translateState($value, $item->cat_state);
// Update the item.
$this->change($pk, 'state', $temp);
}
}
/**
* Method to update index data when a plugin is disabled
*
* @param array $pks A list of primary key ids of the content that has changed state.
*
* @return void
*
* @since 2.5
*/
protected function pluginDisable($pks)
{
// Since multiple plugins may be disabled at a time, we need to check first
// that we're handling the appropriate one for the context
foreach ($pks as $pk) {
if ($this->getPluginType($pk) == strtolower($this->context)) {
// Get all of the items to unindex them
$query = clone $this->getStateQuery();
$this->db->setQuery($query);
$items = $this->db->loadColumn();
// Remove each item
foreach ($items as $item) {
$this->remove($item);
}
// Stop processing plugins
break;
}
}
}
/**
* Method to translate the native content states into states that the
* indexer can use.
*
* @param integer $item The item state.
* @param integer $category The category state. [optional]
*
* @return integer The translated indexer state.
*
* @since 2.5
*/
protected function translateState($item, $category = null)
{
// If category is present, factor in its states as well
if ($category !== null && $category == 0) {
$item = 0;
}
// Translate the state
switch ($item) {
case 1:
// Published items should always show up in search results
return 1;
case 2:
// Archived items should only show up when option is enabled
if ($this->params->get('search_archived', 1) == 0) {
return 0;
}
return 1;
default:
// All other states should return an unpublished state
return 0;
}
}
}

View File

@ -0,0 +1,969 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2022 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\CMS\Plugin\CMSPlugin;
use Joomla\CMS\Table\Table;
use Joomla\Database\DatabaseInterface;
use Joomla\Database\QueryInterface;
use Joomla\Event\DispatcherInterface;
use Joomla\Utilities\ArrayHelper;
/**
* Prototype debug adapter class for the Finder indexer package.
* THIS CLASS IS ONLY TO BE USED FOR DEBUGGING PURPOSES! DON'T
* USE IT FOR PRODUCTIVE USE!
*
* @since 5.0.0
* @internal
*/
abstract class DebugAdapter extends CMSPlugin
{
/**
* The context is somewhat arbitrary but it must be unique or there will be
* conflicts when managing plugin/indexer state. A good best practice is to
* use the plugin name suffix as the context. For example, if the plugin is
* named 'plgFinderContent', the context could be 'Content'.
*
* @var string
* @since 5.0.0
*/
protected $context;
/**
* The extension name.
*
* @var string
* @since 5.0.0
*/
protected $extension;
/**
* The sublayout to use when rendering the results.
*
* @var string
* @since 5.0.0
*/
protected $layout;
/**
* The mime type of the content the adapter indexes.
*
* @var string
* @since 5.0.0
*/
protected $mime;
/**
* The access level of an item before save.
*
* @var integer
* @since 5.0.0
*/
protected $old_access;
/**
* The access level of a category before save.
*
* @var integer
* @since 5.0.0
*/
protected $old_cataccess;
/**
* The type of content the adapter indexes.
*
* @var string
* @since 5.0.0
*/
protected $type_title;
/**
* The type id of the content.
*
* @var integer
* @since 5.0.0
*/
protected $type_id;
/**
* The database object.
*
* @var DatabaseInterface
* @since 5.0.0
*/
protected $db;
/**
* The table name.
*
* @var string
* @since 5.0.0
*/
protected $table;
/**
* The indexer object.
*
* @var Indexer
* @since 5.0.0
*/
protected $indexer;
/**
* The field the published state is stored in.
*
* @var string
* @since 5.0.0
*/
protected $state_field = 'state';
/**
* Method to instantiate the indexer adapter.
*
* @param DispatcherInterface $dispatcher The object to observe.
* @param array $config An array that holds the plugin configuration.
*
* @since 5.0.0
*/
public function __construct(DispatcherInterface $dispatcher, array $config)
{
// Call the parent constructor.
parent::__construct($dispatcher, $config);
// Get the type id.
$this->type_id = $this->getTypeId();
// Add the content type if it doesn't exist and is set.
if (empty($this->type_id) && !empty($this->type_title)) {
$this->type_id = Helper::addContentType($this->type_title, $this->mime);
}
// Check for a layout override.
if ($this->params->get('layout')) {
$this->layout = $this->params->get('layout');
}
// Get the indexer object
$this->indexer = new Indexer($this->db);
}
/**
* Returns an array of events this subscriber will listen to.
*
* @return array
*
* @since 5.0.0
*/
public static function getSubscribedEvents(): array
{
return [
'onBeforeIndex' => 'onBeforeIndex',
'onBuildIndex' => 'onBuildIndex',
'onFinderGarbageCollection' => 'onFinderGarbageCollection',
'onStartIndex' => 'onStartIndex',
];
}
/**
* Method to get the adapter state and push it into the indexer.
*
* @return void
*
* @since 5.0.0
* @throws \Exception on error.
*/
public function onStartIndex()
{
// Get the indexer state.
$iState = Indexer::getState();
// Get the number of content items.
$total = (int) $this->getContentCount();
// Add the content count to the total number of items.
$iState->totalItems += $total;
// Populate the indexer state information for the adapter.
$iState->pluginState[$this->context]['total'] = $total;
$iState->pluginState[$this->context]['offset'] = 0;
// Set the indexer state.
Indexer::setState($iState);
}
/**
* Method to prepare for the indexer to be run. This method will often
* be used to include dependencies and things of that nature.
*
* @return boolean True on success.
*
* @since 5.0.0
* @throws \Exception on error.
*/
public function onBeforeIndex()
{
// Get the indexer and adapter state.
$iState = Indexer::getState();
$aState = $iState->pluginState[$this->context];
// Check the progress of the indexer and the adapter.
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
return true;
}
// Run the setup method.
return $this->setup();
}
/**
* Method to index a batch of content items. This method can be called by
* the indexer many times throughout the indexing process depending on how
* much content is available for indexing. It is important to track the
* progress correctly so we can display it to the user.
*
* @return boolean True on success.
*
* @since 5.0.0
* @throws \Exception on error.
*/
public function onBuildIndex()
{
// Get the indexer and adapter state.
$iState = Indexer::getState();
$aState = $iState->pluginState[$this->context];
// Check the progress of the indexer and the adapter.
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total']) {
return true;
}
// Get the batch offset and size.
$offset = (int) $aState['offset'];
$limit = (int) ($iState->batchSize - $iState->batchOffset);
// Get the content items to index.
$items = $this->getItems($offset, $limit);
// Iterate through the items and index them.
foreach ($items as $item) {
// Index the item.
$this->index($item);
// Adjust the offsets.
$offset++;
$iState->batchOffset++;
$iState->totalItems--;
}
// Update the indexer state.
$aState['offset'] = $offset;
$iState->pluginState[$this->context] = $aState;
Indexer::setState($iState);
return true;
}
/**
* Method to remove outdated index entries
*
* @return integer
*
* @since 5.0.0
*/
public function onFinderGarbageCollection()
{
$db = $this->db;
$type_id = $this->getTypeId();
$query = $db->getQuery(true);
$subquery = $db->getQuery(true);
$subquery->select('CONCAT(' . $db->quote($this->getUrl('', $this->extension, $this->layout)) . ', id)')
->from($db->quoteName($this->table));
$query->select($db->quoteName('l.link_id'))
->from($db->quoteName('#__finder_links', 'l'))
->where($db->quoteName('l.type_id') . ' = ' . $type_id)
->where($db->quoteName('l.url') . ' LIKE ' . $db->quote($this->getUrl('%', $this->extension, $this->layout)))
->where($db->quoteName('l.url') . ' NOT IN (' . $subquery . ')');
$db->setQuery($query);
$items = $db->loadColumn();
foreach ($items as $item) {
$this->indexer->remove($item);
}
return \count($items);
}
/**
* Method to change the value of a content item's property in the links
* table. This is used to synchronize published and access states that
* are changed when not editing an item directly.
*
* @param string $id The ID of the item to change.
* @param string $property The property that is being changed.
* @param integer $value The new value of that property.
*
* @return boolean True on success.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function change($id, $property, $value)
{
// Check for a property we know how to handle.
if ($property !== 'state' && $property !== 'access') {
return true;
}
// Get the URL for the content id.
$item = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
// Update the content items.
$query = $this->db->getQuery(true)
->update($this->db->quoteName('#__finder_links'))
->set($this->db->quoteName($property) . ' = ' . (int) $value)
->where($this->db->quoteName('url') . ' = ' . $item);
$this->db->setQuery($query);
$this->db->execute();
return true;
}
/**
* Method to index an item.
*
* @param Result $item The item to index as a Result object.
*
* @return boolean True on success.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
abstract protected function index(Result $item);
/**
* Method to reindex an item.
*
* @param integer $id The ID of the item to reindex.
*
* @return void
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function reindex($id)
{
// Run the setup method.
$this->setup();
// Remove the old item.
$this->remove($id, false);
// Get the item.
$item = $this->getItem($id);
// Index the item.
$this->index($item);
Taxonomy::removeOrphanNodes();
}
/**
* Method to remove an item from the index.
*
* @param string $id The ID of the item to remove.
* @param bool $removeTaxonomies Remove empty taxonomies
*
* @return boolean True on success.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function remove($id, $removeTaxonomies = true)
{
// Get the item's URL
$url = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
// Get the link ids for the content items.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('link_id'))
->from($this->db->quoteName('#__finder_links'))
->where($this->db->quoteName('url') . ' = ' . $url);
$this->db->setQuery($query);
$items = $this->db->loadColumn();
// Check the items.
if (empty($items)) {
$this->getApplication()->triggerEvent('onFinderIndexAfterDelete', [$id]);
return true;
}
// Remove the items.
foreach ($items as $item) {
$this->indexer->remove($item, $removeTaxonomies);
}
return true;
}
/**
* Method to setup the adapter before indexing.
*
* @return boolean True on success, false on failure.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
abstract protected function setup();
/**
* Method to update index data on category access level changes
*
* @param Table $row A Table object
*
* @return void
*
* @since 5.0.0
*/
protected function categoryAccessChange($row)
{
$query = clone $this->getStateQuery();
$query->where('c.id = ' . (int) $row->id);
// Get the access level.
$this->db->setQuery($query);
$items = $this->db->loadObjectList();
// Adjust the access level for each item within the category.
foreach ($items as $item) {
// Set the access level.
$temp = max($item->access, $row->access);
// Update the item.
$this->change((int) $item->id, 'access', $temp);
}
}
/**
* Method to update index data on category access level changes
*
* @param array $pks A list of primary key ids of the content that has changed state.
* @param integer $value The value of the state that the content has been changed to.
*
* @return void
*
* @since 5.0.0
*/
protected function categoryStateChange($pks, $value)
{
/*
* The item's published state is tied to the category
* published state so we need to look up all published states
* before we change anything.
*/
foreach ($pks as $pk) {
$query = clone $this->getStateQuery();
$query->where('c.id = ' . (int) $pk);
// Get the published states.
$this->db->setQuery($query);
$items = $this->db->loadObjectList();
// Adjust the state for each item within the category.
foreach ($items as $item) {
// Translate the state.
$temp = $this->translateState($item->state, $value);
// Update the item.
$this->change($item->id, 'state', $temp);
}
}
}
/**
* Method to check the existing access level for categories
*
* @param Table $row A Table object
*
* @return void
*
* @since 5.0.0
*/
protected function checkCategoryAccess($row)
{
$query = $this->db->getQuery(true)
->select($this->db->quoteName('access'))
->from($this->db->quoteName('#__categories'))
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
$this->db->setQuery($query);
// Store the access level to determine if it changes
$this->old_cataccess = $this->db->loadResult();
}
/**
* Method to check the existing access level for items
*
* @param Table $row A Table object
*
* @return void
*
* @since 5.0.0
*/
protected function checkItemAccess($row)
{
$query = $this->db->getQuery(true)
->select($this->db->quoteName('access'))
->from($this->db->quoteName($this->table))
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
$this->db->setQuery($query);
// Store the access level to determine if it changes
$this->old_access = $this->db->loadResult();
}
/**
* Method to get the number of content items available to index.
*
* @return integer The number of content items available to index.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function getContentCount()
{
$return = 0;
// Get the list query.
$query = $this->getListQuery();
// Check if the query is valid.
if (empty($query)) {
return $return;
}
// Tweak the SQL query to make the total lookup faster.
if ($query instanceof QueryInterface) {
$query = clone $query;
$query->clear('select')
->select('COUNT(*)')
->clear('order');
}
// Get the total number of content items to index.
$this->db->setQuery($query);
return (int) $this->db->loadResult();
}
/**
* Method to get a content item to index.
*
* @param integer $id The id of the content item.
*
* @return Result A Result object.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function getItem($id)
{
// Get the list query and add the extra WHERE clause.
$query = $this->getListQuery();
$query->where('a.id = ' . (int) $id);
// Get the item to index.
$this->db->setQuery($query);
$item = $this->db->loadAssoc();
// Convert the item to a result object.
$item = ArrayHelper::toObject((array) $item, Result::class);
// Set the item type.
$item->type_id = $this->type_id;
// Set the item layout.
$item->layout = $this->layout;
return $item;
}
/**
* Method to get a list of content items to index.
*
* @param integer $offset The list offset.
* @param integer $limit The list limit.
* @param QueryInterface $query A QueryInterface object. [optional]
*
* @return Result[] An array of Result objects.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function getItems($offset, $limit, $query = null)
{
// Get the content items to index.
$this->db->setQuery($this->getListQuery($query)->setLimit($limit, $offset));
$items = $this->db->loadAssocList();
foreach ($items as &$item) {
$item = ArrayHelper::toObject($item, Result::class);
// Set the item type.
$item->type_id = $this->type_id;
// Set the mime type.
$item->mime = $this->mime;
// Set the item layout.
$item->layout = $this->layout;
}
return $items;
}
/**
* Method to get the SQL query used to retrieve the list of content items.
*
* @param mixed $query A QueryInterface object. [optional]
*
* @return QueryInterface A database object.
*
* @since 5.0.0
*/
protected function getListQuery($query = null)
{
// Check if we can use the supplied SQL query.
return $query instanceof QueryInterface ? $query : $this->db->getQuery(true);
}
/**
* Method to get the plugin type
*
* @param integer $id The plugin ID
*
* @return string The plugin type
*
* @since 5.0.0
*/
protected function getPluginType($id)
{
// Prepare the query
$query = $this->db->getQuery(true)
->select($this->db->quoteName('element'))
->from($this->db->quoteName('#__extensions'))
->where($this->db->quoteName('extension_id') . ' = ' . (int) $id);
$this->db->setQuery($query);
return $this->db->loadResult();
}
/**
* Method to get a SQL query to load the published and access states for
* an article and category.
*
* @return QueryInterface A database object.
*
* @since 5.0.0
*/
protected function getStateQuery()
{
$query = $this->db->getQuery(true);
// Item ID
$query->select('a.id');
// Item and category published state
$query->select('a.' . $this->state_field . ' AS state, c.published AS cat_state');
// Item and category access levels
$query->select('a.access, c.access AS cat_access')
->from($this->table . ' AS a')
->join('LEFT', '#__categories AS c ON c.id = a.catid');
return $query;
}
/**
* Method to get the query clause for getting items to update by time.
*
* @param string $time The modified timestamp.
*
* @return QueryInterface A database object.
*
* @since 5.0.0
*/
protected function getUpdateQueryByTime($time)
{
// Build an SQL query based on the modified time.
$query = $this->db->getQuery(true)
->where('a.modified >= ' . $this->db->quote($time));
return $query;
}
/**
* Method to get the query clause for getting items to update by id.
*
* @param array $ids The ids to load.
*
* @return QueryInterface A database object.
*
* @since 5.0.0
*/
protected function getUpdateQueryByIds($ids)
{
// Build an SQL query based on the item ids.
$query = $this->db->getQuery(true)
->where('a.id IN(' . implode(',', $ids) . ')');
return $query;
}
/**
* Method to get the type id for the adapter content.
*
* @return integer The numeric type id for the content.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function getTypeId()
{
// Get the type id from the database.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('id'))
->from($this->db->quoteName('#__finder_types'))
->where($this->db->quoteName('title') . ' = ' . $this->db->quote($this->type_title));
$this->db->setQuery($query);
return (int) $this->db->loadResult();
}
/**
* Method to get the URL for the item. The URL is how we look up the link
* in the Finder index.
*
* @param integer $id The id of the item.
* @param string $extension The extension the category is in.
* @param string $view The view for the URL.
*
* @return string The URL of the item.
*
* @since 5.0.0
*/
protected function getUrl($id, $extension, $view)
{
return 'index.php?option=' . $extension . '&view=' . $view . '&id=' . $id;
}
/**
* Method to get the page title of any menu item that is linked to the
* content item, if it exists and is set.
*
* @param string $url The URL of the item.
*
* @return mixed The title on success, null if not found.
*
* @since 5.0.0
* @throws \Exception on database error.
*/
protected function getItemMenuTitle($url)
{
$return = null;
// Set variables
$user = $this->getApplication()->getIdentity();
$groups = implode(',', $user->getAuthorisedViewLevels());
// Build a query to get the menu params.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('params'))
->from($this->db->quoteName('#__menu'))
->where($this->db->quoteName('link') . ' = ' . $this->db->quote($url))
->where($this->db->quoteName('published') . ' = 1')
->where($this->db->quoteName('access') . ' IN (' . $groups . ')');
// Get the menu params from the database.
$this->db->setQuery($query);
$params = $this->db->loadResult();
// Check the results.
if (empty($params)) {
return $return;
}
// Instantiate the params.
$params = json_decode($params);
// Get the page title if it is set.
if (isset($params->page_title) && $params->page_title) {
$return = $params->page_title;
}
return $return;
}
/**
* Method to update index data on access level changes
*
* @param Table $row A Table object
*
* @return void
*
* @since 5.0.0
*/
protected function itemAccessChange($row)
{
$query = clone $this->getStateQuery();
$query->where('a.id = ' . (int) $row->id);
// Get the access level.
$this->db->setQuery($query);
$item = $this->db->loadObject();
// Set the access level.
$temp = max($row->access, $item->cat_access);
// Update the item.
$this->change((int) $row->id, 'access', $temp);
}
/**
* Method to update index data on published state changes
*
* @param array $pks A list of primary key ids of the content that has changed state.
* @param integer $value The value of the state that the content has been changed to.
*
* @return void
*
* @since 5.0.0
*/
protected function itemStateChange($pks, $value)
{
/*
* The item's published state is tied to the category
* published state so we need to look up all published states
* before we change anything.
*/
foreach ($pks as $pk) {
$query = clone $this->getStateQuery();
$query->where('a.id = ' . (int) $pk);
// Get the published states.
$this->db->setQuery($query);
$item = $this->db->loadObject();
// Translate the state.
$temp = $this->translateState($value, $item->cat_state);
// Update the item.
$this->change($pk, 'state', $temp);
}
}
/**
* Method to update index data when a plugin is disabled
*
* @param array $pks A list of primary key ids of the content that has changed state.
*
* @return void
*
* @since 5.0.0
*/
protected function pluginDisable($pks)
{
// Since multiple plugins may be disabled at a time, we need to check first
// that we're handling the appropriate one for the context
foreach ($pks as $pk) {
if ($this->getPluginType($pk) == strtolower($this->context)) {
// Get all of the items to unindex them
$query = clone $this->getStateQuery();
$this->db->setQuery($query);
$items = $this->db->loadColumn();
// Remove each item
foreach ($items as $item) {
$this->remove($item);
}
}
}
}
/**
* Method to translate the native content states into states that the
* indexer can use.
*
* @param integer $item The item state.
* @param integer $category The category state. [optional]
*
* @return integer The translated indexer state.
*
* @since 5.0.0
*/
protected function translateState($item, $category = null)
{
// If category is present, factor in its states as well
if ($category !== null && $category == 0) {
$item = 0;
}
// Translate the state
switch ($item) {
case 1:
case 2:
// Published and archived items only should return a published state
return 1;
default:
// All other states should return an unpublished state
return 0;
}
}
/**
* Debug method to set the used indexer
*
* @param Indexer $indexer Indexer object
*
* @return void
*
* @since 5.0.0
*/
public function setIndexer(Indexer $indexer)
{
$this->indexer = $indexer;
}
/**
* Debug method to run a specific plugin to prepare a result object.
* The object is then stored in the indexer object to debug further.
*
* @param mixed $id ID to index
*
* @return void
*
* @since 5.0.0
*/
public function debug($id)
{
// Run the setup method.
$this->setup();
// Get the item.
$item = $this->getItem($id);
// Index the item.
$this->index($item);
}
}

View File

@ -0,0 +1,44 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2022 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
/**
* Debugging indexer class for the Finder indexer package.
*
* @since 5.0.0
* @internal
*/
class DebugIndexer extends Indexer
{
/**
* The result object from the last call to self::index()
*
* @var Result
*
* @since 5.0.0
*/
public static $item;
/**
* Stub for index() in indexer class
*
* @param Result $item Result object to index
* @param string $format Format to index
*
* @return void
*
* @since 5.0.0
*/
public function index($item, $format = 'html')
{
self::$item = $item;
}
}

View File

@ -0,0 +1,492 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\CMS\Component\ComponentHelper;
use Joomla\CMS\Event\Finder\PrepareContentEvent;
use Joomla\CMS\Factory;
use Joomla\CMS\Language\Multilanguage;
use Joomla\CMS\Plugin\PluginHelper;
use Joomla\CMS\Table\Table;
use Joomla\Component\Fields\Administrator\Helper\FieldsHelper;
use Joomla\Registry\Registry;
use Joomla\String\StringHelper;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Helper class for the Finder indexer package.
*
* @since 2.5
*/
class Helper
{
public const CUSTOMFIELDS_DONT_INDEX = 0;
public const CUSTOMFIELDS_ADD_TO_INDEX = 1;
public const CUSTOMFIELDS_ADD_TO_TAXONOMY = 2;
public const CUSTOMFIELDS_ADD_TO_BOTH = 3;
/**
* Method to parse input into plain text.
*
* @param string $input The raw input.
* @param string $format The format of the input. [optional]
*
* @return string The parsed input.
*
* @since 2.5
* @throws \Exception on invalid parser.
*/
public static function parse($input, $format = 'html')
{
// Get a parser for the specified format and parse the input.
return Parser::getInstance($format)->parse($input);
}
/**
* Method to tokenize a text string.
*
* @param string $input The input to tokenize.
* @param string $lang The language of the input.
* @param boolean $phrase Flag to indicate whether input could be a phrase. [optional]
*
* @return Token[] An array of Token objects.
*
* @since 2.5
*/
public static function tokenize($input, $lang, $phrase = false)
{
static $cache = [], $tuplecount;
static $multilingual;
static $defaultLanguage;
if (!$tuplecount) {
$params = ComponentHelper::getParams('com_finder');
$tuplecount = $params->get('tuplecount', 1);
}
if (\is_null($multilingual)) {
$multilingual = Multilanguage::isEnabled();
$config = ComponentHelper::getParams('com_finder');
if ($config->get('language_default', '') == '') {
$defaultLang = '*';
} elseif ($config->get('language_default', '') == '-1') {
$defaultLang = self::getDefaultLanguage();
} else {
$defaultLang = $config->get('language_default');
}
/*
* The default language always has the language code '*'.
* In order to not overwrite the language code of the language
* object that we are using, we are cloning it here.
*/
$obj = Language::getInstance($defaultLang);
$defaultLanguage = clone $obj;
$defaultLanguage->language = '*';
}
if (!$multilingual || $lang == '*') {
$language = $defaultLanguage;
} else {
$language = Language::getInstance($lang);
}
if (!isset($cache[$lang])) {
$cache[$lang] = [];
}
$tokens = [];
$terms = $language->tokenise($input);
// @todo: array_filter removes any number 0's from the terms. Not sure this is entirely intended
$terms = array_filter($terms);
$terms = array_values($terms);
/*
* If we have to handle the input as a phrase, that means we don't
* tokenize the individual terms and we do not create the two and three
* term combinations. The phrase must contain more than one word!
*/
if ($phrase === true && \count($terms) > 1) {
// Create tokens from the phrase.
$tokens[] = new Token($terms, $language->language, $language->spacer);
} else {
// Create tokens from the terms.
for ($i = 0, $n = \count($terms); $i < $n; $i++) {
if (isset($cache[$lang][$terms[$i]])) {
$tokens[] = $cache[$lang][$terms[$i]];
} else {
$token = new Token($terms[$i], $language->language);
$tokens[] = $token;
$cache[$lang][$terms[$i]] = $token;
}
}
// Create multi-word phrase tokens from the individual words.
if ($tuplecount > 1) {
for ($i = 0, $n = \count($tokens); $i < $n; $i++) {
$temp = [$tokens[$i]->term];
// Create tokens for 2 to $tuplecount length phrases
for ($j = 1; $j < $tuplecount; $j++) {
if ($i + $j >= $n || !isset($tokens[$i + $j])) {
break;
}
$temp[] = $tokens[$i + $j]->term;
$key = implode('::', $temp);
if (isset($cache[$lang][$key])) {
$tokens[] = $cache[$lang][$key];
} else {
$token = new Token($temp, $language->language, $language->spacer);
$token->derived = true;
$tokens[] = $token;
$cache[$lang][$key] = $token;
}
}
}
}
}
// Prevent the cache to fill up the memory
while (\count($cache[$lang]) > 1024) {
/**
* We want to cache the most common words/tokens. At the same time
* we don't want to cache too much. The most common words will also
* be early in the text, so we are dropping all terms/tokens which
* have been cached later.
*/
array_pop($cache[$lang]);
}
return $tokens;
}
/**
* Method to get the base word of a token.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 2.5
*/
public static function stem($token, $lang)
{
static $multilingual;
static $defaultStemmer;
if (\is_null($multilingual)) {
$multilingual = Multilanguage::isEnabled();
$config = ComponentHelper::getParams('com_finder');
if ($config->get('language_default', '') == '') {
$defaultStemmer = Language::getInstance('*');
} elseif ($config->get('language_default', '') == '-1') {
$defaultStemmer = Language::getInstance(self::getDefaultLanguage());
} else {
$defaultStemmer = Language::getInstance($config->get('language_default'));
}
}
if (!$multilingual || $lang == '*') {
$language = $defaultStemmer;
} else {
$language = Language::getInstance($lang);
}
return $language->stem($token);
}
/**
* Method to add a content type to the database.
*
* @param string $title The type of content. For example: PDF
* @param string $mime The mime type of the content. For example: PDF [optional]
*
* @return integer The id of the content type.
*
* @since 2.5
* @throws \Exception on database error.
*/
public static function addContentType($title, $mime = null)
{
static $types;
$db = Factory::getDbo();
$query = $db->getQuery(true);
// Check if the types are loaded.
if (empty($types)) {
// Build the query to get the types.
$query->select('*')
->from($db->quoteName('#__finder_types'));
// Get the types.
$db->setQuery($query);
$types = $db->loadObjectList('title');
}
// Check if the type already exists.
if (isset($types[$title])) {
return (int) $types[$title]->id;
}
// Add the type.
$query->clear()
->insert($db->quoteName('#__finder_types'))
->columns([$db->quoteName('title'), $db->quoteName('mime')])
->values($db->quote($title) . ', ' . $db->quote($mime ?? ''));
$db->setQuery($query);
$db->execute();
// Cache the result
$type = new \stdClass();
$type->title = $title;
$type->mime = $mime ?? '';
$type->id = (int) $db->insertid();
$types[$title] = $type;
// Return the new id.
return $type->id;
}
/**
* Method to check if a token is common in a language.
*
* @param string $token The token to test.
* @param string $lang The language to reference.
*
* @return boolean True if common, false otherwise.
*
* @since 2.5
*/
public static function isCommon($token, $lang)
{
static $data = [], $default, $multilingual;
if (\is_null($multilingual)) {
$multilingual = Multilanguage::isEnabled();
$config = ComponentHelper::getParams('com_finder');
if ($config->get('language_default', '') == '') {
$default = '*';
} elseif ($config->get('language_default', '') == '-1') {
$default = self::getPrimaryLanguage(self::getDefaultLanguage());
} else {
$default = self::getPrimaryLanguage($config->get('language_default'));
}
}
if (!$multilingual || $lang == '*') {
$lang = $default;
}
// Load the common tokens for the language if necessary.
if (!isset($data[$lang])) {
$data[$lang] = self::getCommonWords($lang);
}
// Check if the token is in the common array.
return \in_array($token, $data[$lang], true);
}
/**
* Method to get an array of common terms for a language.
*
* @param string $lang The language to use.
*
* @return array Array of common terms.
*
* @since 2.5
* @throws \Exception on database error.
*/
public static function getCommonWords($lang)
{
$db = Factory::getDbo();
// Create the query to load all the common terms for the language.
$query = $db->getQuery(true)
->select($db->quoteName('term'))
->from($db->quoteName('#__finder_terms_common'))
->where($db->quoteName('language') . ' = ' . $db->quote($lang));
// Load all of the common terms for the language.
$db->setQuery($query);
return $db->loadColumn();
}
/**
* Method to get the default language for the site.
*
* @return string The default language string.
*
* @since 2.5
*/
public static function getDefaultLanguage()
{
static $lang;
// We need to go to com_languages to get the site default language, it's the best we can guess.
if (empty($lang)) {
$lang = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
}
return $lang;
}
/**
* Method to parse a language/locale key and return a simple language string.
*
* @param string $lang The language/locale key. For example: en-GB
*
* @return string The simple language string. For example: en
*
* @since 2.5
*/
public static function getPrimaryLanguage($lang)
{
static $data = [];
// Only parse the identifier if necessary.
if (!isset($data[$lang])) {
if (\is_callable(['Locale', 'getPrimaryLanguage'])) {
// Get the language key using the Locale package.
$data[$lang] = \Locale::getPrimaryLanguage($lang);
} else {
// Get the language key using string position.
$data[$lang] = StringHelper::substr($lang, 0, StringHelper::strpos($lang, '-'));
}
}
return $data[$lang];
}
/**
* Method to get extra data for a content before being indexed. This is how
* we add Comments, Tags, Labels, etc. that should be available to Finder.
*
* @param Result $item The item to index as a Result object.
*
* @return boolean True on success, false on failure.
*
* @since 2.5
* @throws \Exception on database error.
*/
public static function getContentExtras(Result $item)
{
$dispatcher = Factory::getApplication()->getDispatcher();
// Load the finder plugin group.
PluginHelper::importPlugin('finder', null, true, $dispatcher);
$dispatcher->dispatch('onPrepareFinderContent', new PrepareContentEvent('onPrepareFinderContent', [
'subject' => $item,
]));
return true;
}
/**
* Add custom fields for the item to the Result object
*
* @param Result $item Result object to add the custom fields to
* @param string $context Context of the item in the custom fields
*
* @return void
*
* @since 5.0.0
*/
public static function addCustomFields(Result $item, $context)
{
if (!ComponentHelper::getParams(strstr($context, '.', true))->get('custom_fields_enable', 1)) {
return;
}
$obj = new \stdClass();
$obj->id = $item->id;
$fields = FieldsHelper::getFields($context, $obj, true);
foreach ($fields as $field) {
$searchindex = $field->params->get('searchindex', 0);
// We want to add this field to the search index
if ($searchindex == self::CUSTOMFIELDS_ADD_TO_INDEX || $searchindex == self::CUSTOMFIELDS_ADD_TO_BOTH) {
$name = 'jsfield_' . $field->name;
$item->$name = $field->value;
$item->addInstruction(Indexer::META_CONTEXT, $name);
}
// We want to add this field as a taxonomy
if (
($searchindex == self::CUSTOMFIELDS_ADD_TO_TAXONOMY || $searchindex == self::CUSTOMFIELDS_ADD_TO_BOTH)
&& $field->value
) {
$item->addTaxonomy($field->title, $field->value, $field->state, $field->access, $field->language);
}
}
}
/**
* Method to process content text using the onContentPrepare event trigger.
*
* @param string $text The content to process.
* @param Registry $params The parameters object. [optional]
* @param ?Result $item The item which get prepared. [optional]
*
* @return string The processed content.
*
* @since 2.5
*/
public static function prepareContent($text, $params = null, ?Result $item = null)
{
static $loaded;
// Load the content plugins if necessary.
if (empty($loaded)) {
PluginHelper::importPlugin('content');
$loaded = true;
}
// Instantiate the parameter object if necessary.
if (!($params instanceof Registry)) {
$registry = new Registry($params);
$params = $registry;
}
// Create a mock content object.
$content = Table::getInstance('Content');
$content->text = $text;
if ($item) {
$content->bind((array) $item);
$content->bind($item->getElements());
}
if ($item && !empty($item->context)) {
$content->context = $item->context;
}
// Fire the onContentPrepare event.
Factory::getApplication()->triggerEvent('onContentPrepare', ['com_finder.indexer', &$content, &$params, 0]);
return $content->text;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,182 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2018 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\String\StringHelper;
use Wamania\Snowball\NotFoundException;
use Wamania\Snowball\Stemmer\Stemmer;
use Wamania\Snowball\StemmerFactory;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Language support class for the Finder indexer package.
*
* @since 4.0.0
*/
class Language
{
/**
* Language support instances container.
*
* @var Language[]
* @since 4.0.0
*/
protected static $instances = [];
/**
* Language locale of the class
*
* @var string
* @since 4.0.0
*/
public $language;
/**
* Spacer to use between terms
*
* @var string
* @since 4.0.0
*/
public $spacer = ' ';
/**
* The stemmer object.
*
* @var Stemmer
* @since 4.0.0
*/
protected $stemmer = null;
/**
* Method to construct the language object.
*
* @since 4.0.0
*/
public function __construct($locale = null)
{
if ($locale !== null) {
$this->language = $locale;
}
// Use our generic language handler if no language is set
if ($this->language === null) {
$this->language = '*';
}
try {
foreach (StemmerFactory::LANGS as $classname => $isoCodes) {
if (\in_array($this->language, $isoCodes)) {
$this->stemmer = StemmerFactory::create($this->language);
break;
}
}
} catch (NotFoundException $e) {
// We don't have a stemmer for the language
}
}
/**
* Method to get a language support object.
*
* @param string $language The language of the support object.
*
* @return Language A Language instance.
*
* @since 4.0.0
*/
public static function getInstance($language)
{
if (isset(self::$instances[$language])) {
return self::$instances[$language];
}
$locale = '*';
if ($language !== '*') {
$locale = Helper::getPrimaryLanguage($language);
$class = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Language\\' . ucfirst($locale);
if (class_exists($class)) {
self::$instances[$language] = new $class();
return self::$instances[$language];
}
}
self::$instances[$language] = new self($locale);
return self::$instances[$language];
}
/**
* Method to tokenise a text string.
*
* @param string $input The input to tokenise.
*
* @return array An array of term strings.
*
* @since 4.0.0
*/
public function tokenise($input)
{
$quotes = html_entity_decode('&#8216;&#8217;&#39;', ENT_QUOTES, 'UTF-8');
/*
* Parsing the string input into terms is a multi-step process.
*
* Regexes:
* 1. Remove everything except letters, numbers, quotes, apostrophe, plus, dash, period, and comma.
* 2. Remove plus, dash, and comma characters located before letter characters.
* 3. Remove plus, dash, period, and comma characters located after other characters.
* 4. Remove plus, period, and comma characters enclosed in alphabetical characters. Ungreedy.
* 5. Remove orphaned apostrophe, plus, dash, period, and comma characters.
* 6. Remove orphaned quote characters.
* 7. Replace the assorted single quotation marks with the ASCII standard single quotation.
* 8. Remove multiple space characters and replaces with a single space.
*/
$input = StringHelper::strtolower($input);
$input = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $input);
$input = preg_replace('#(^|\s)[+-,]+([\pL\pM]+)#mui', ' $1', $input);
$input = preg_replace('#([\pL\pM\pN]+)[+-.,]+(\s|$)#mui', '$1 ', $input);
$input = preg_replace('#([\pL\pM]+)[+.,]+([\pL\pM]+)#muiU', '$1 $2', $input);
$input = preg_replace('#(^|\s)[\'+-.,]+(\s|$)#mui', ' ', $input);
$input = preg_replace('#(^|\s)[\p{Pi}\p{Pf}]+(\s|$)#mui', ' ', $input);
$input = preg_replace('#[' . $quotes . ']+#mui', '\'', $input);
$input = preg_replace('#\s+#mui', ' ', $input);
$input = trim($input);
// Explode the normalized string to get the terms.
$terms = explode(' ', $input);
return $terms;
}
/**
* Method to stem a token.
*
* @param string $token The token to stem.
*
* @return string The stemmed token.
*
* @since 4.0.0
*/
public function stem($token)
{
if ($this->stemmer !== null) {
return $this->stemmer->stem($token);
}
return $token;
}
}

View File

@ -0,0 +1,934 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2018 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*
* The Greek stemmer was adapted for Joomla! 4 by Nicholas K. Dionysopoulos <nicholas@akeebabackup.com>. This is
* derivative work, based on the Greek stemmer for Drupal, see
* https://github.com/magaras/greek_stemmer/blob/master/mod_stemmer.php
*/
namespace Joomla\Component\Finder\Administrator\Indexer\Language;
use Joomla\Component\Finder\Administrator\Indexer\Language;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Greek language support class for the Finder indexer package.
*
* @since 4.0.0
*/
class El extends Language
{
/**
* Language locale of the class
*
* @var string
* @since 4.0.0
*/
public $language = 'el';
/**
* Method to construct the language object.
*
* @since 4.0.0
*/
public function __construct($locale = null)
{
// Override parent constructor since we don't need to load an external stemmer
}
/**
* Method to tokenise a text string. It takes into account the odd punctuation commonly used in Greek text, mapping
* it to ASCII punctuation.
*
* Reference: http://www.teicrete.gr/users/kutrulis/Glosika/Stixi.htm
*
* @param string $input The input to tokenise.
*
* @return array An array of term strings.
*
* @since 4.0.0
*/
public function tokenise($input)
{
// Replace Greek calligraphic double quotes (various styles) to dumb double quotes
$input = str_replace(['“', '”', '„', '«' ,'»'], '"', $input);
// Replace Greek calligraphic single quotes (various styles) to dumb single quotes
$input = str_replace(['','',''], "'", $input);
// Replace the middle dot (ano teleia) with a comma, adequate for the purpose of stemming
$input = str_replace('·', ',', $input);
// Dot and dash (τελεία και παύλα), used to denote the end of a context at the end of a paragraph.
$input = str_replace('.', '.', $input);
// Ellipsis, two styles (separate dots or single glyph)
$input = str_replace(['...', '…'], '.', $input);
// Cross. Marks the death date of a person. Removed.
$input = str_replace('†', '', $input);
// Star. Reference, supposition word (in philology), birth date of a person.
$input = str_replace('*', '', $input);
// Paragraph. Indicates change of subject.
$input = str_replace('§', '.', $input);
// Plus/minus. Shows approximation. Not relevant for the stemmer, hence its conversion to a space.
$input = str_replace('±', ' ', $input);
return parent::tokenise($input);
}
/**
* Method to stem a token.
*
* @param string $token The token to stem.
*
* @return string The stemmed token.
*
* @since 4.0.0
*/
public function stem($token)
{
$token = $this->toUpperCase($token, $wCase);
// Stop-word removal
$stop_words = '/^(ΕΚΟ|ΑΒΑ|ΑΓΑ|ΑΓΗ|ΑΓΩ|ΑΔΗ|ΑΔΩ|ΑΕ|ΑΕΙ|ΑΘΩ|ΑΙ|ΑΙΚ|ΑΚΗ|ΑΚΟΜΑ|ΑΚΟΜΗ|ΑΚΡΙΒΩΣ|ΑΛΑ|ΑΛΗΘΕΙΑ|ΑΛΗΘΙΝΑ|ΑΛΛΑΧΟΥ|ΑΛΛΙΩΣ|ΑΛΛΙΩΤΙΚΑ|'
. 'ΑΛΛΟΙΩΣ|ΑΛΛΟΙΩΤΙΚΑ|ΑΛΛΟΤΕ|ΑΛΤ|ΑΛΩ|ΑΜΑ|ΑΜΕ|ΑΜΕΣΑ|ΑΜΕΣΩΣ|ΑΜΩ|ΑΝ|ΑΝΑ|ΑΝΑΜΕΣΑ|ΑΝΑΜΕΤΑΞΥ|ΑΝΕΥ|ΑΝΤΙ|ΑΝΤΙΠΕΡΑ|ΑΝΤΙΣ|ΑΝΩ|ΑΝΩΤΕΡΩ|ΑΞΑΦΝΑ|'
. 'ΑΠ|ΑΠΕΝΑΝΤΙ|ΑΠΟ|ΑΠΟΨΕ|ΑΠΩ|ΑΡΑ|ΑΡΑΓΕ|ΑΡΕ|ΑΡΚ|ΑΡΚΕΤΑ|ΑΡΛ|ΑΡΜ|ΑΡΤ|ΑΡΥ|ΑΡΩ|ΑΣ|ΑΣΑ|ΑΣΟ|ΑΤΑ|ΑΤΕ|ΑΤΗ|ΑΤΙ|ΑΤΜ|ΑΤΟ|ΑΥΡΙΟ|ΑΦΗ|ΑΦΟΤΟΥ|ΑΦΟΥ|'
. 'ΑΧ|ΑΧΕ|ΑΧΟ|ΑΨΑ|ΑΨΕ|ΑΨΗ|ΑΨΥ|ΑΩΕ|ΑΩΟ|ΒΑΝ|ΒΑΤ|ΒΑΧ|ΒΕΑ|ΒΕΒΑΙΟΤΑΤΑ|ΒΗΞ|ΒΙΑ|ΒΙΕ|ΒΙΗ|ΒΙΟ|ΒΟΗ|ΒΟΩ|ΒΡΕ|ΓΑ|ΓΑΒ|ΓΑΡ|ΓΕΝ|ΓΕΣ||ΓΗ|ΓΗΝ|ΓΙ|ΓΙΑ|'
. 'ΓΙΕ|ΓΙΝ|ΓΙΟ|ΓΚΙ|ΓΙΑΤΙ|ΓΚΥ|ΓΟΗ|ΓΟΟ|ΓΡΗΓΟΡΑ|ΓΡΙ|ΓΡΥ|ΓΥΗ|ΓΥΡΩ|ΔΑ|ΔΕ|ΔΕΗ|ΔΕΙ|ΔΕΝ|ΔΕΣ|ΔΗ|ΔΗΘΕΝ|ΔΗΛΑΔΗ|ΔΗΩ|ΔΙ|ΔΙΑ|ΔΙΑΡΚΩΣ|ΔΙΟΛΟΥ|ΔΙΣ|'
. 'ΔΙΧΩΣ|ΔΟΛ|ΔΟΝ|ΔΡΑ|ΔΡΥ|ΔΡΧ|ΔΥΕ|ΔΥΟ|ΔΩ|ΕΑΜ|ΕΑΝ|ΕΑΡ|ΕΘΗ|ΕΙ|ΕΙΔΕΜΗ|ΕΙΘΕ|ΕΙΜΑΙ|ΕΙΜΑΣΤΕ|ΕΙΝΑΙ|ΕΙΣ|ΕΙΣΑΙ|ΕΙΣΑΣΤΕ|ΕΙΣΤΕ|ΕΙΤΕ|ΕΙΧΑ|ΕΙΧΑΜΕ|'
. 'ΕΙΧΑΝ|ΕΙΧΑΤΕ|ΕΙΧΕ|ΕΙΧΕΣ|ΕΚ|ΕΚΕΙ|ΕΛΑ|ΕΛΙ|ΕΜΠ|ΕΝ|ΕΝΤΕΛΩΣ|ΕΝΤΟΣ|ΕΝΤΩΜΕΤΑΞΥ|ΕΝΩ|ΕΞ|ΕΞΑΦΝΑ|ΕΞΙ|ΕΞΙΣΟΥ|ΕΞΩ|ΕΟΚ|ΕΠΑΝΩ|ΕΠΕΙΔΗ|ΕΠΕΙΤΑ|ΕΠΗ|'
. 'ΕΠΙ|ΕΠΙΣΗΣ|ΕΠΟΜΕΝΩΣ|ΕΡΑ|ΕΣ|ΕΣΑΣ|ΕΣΕ|ΕΣΕΙΣ|ΕΣΕΝΑ|ΕΣΗ|ΕΣΤΩ|ΕΣΥ|ΕΣΩ|ΕΤΙ|ΕΤΣΙ|ΕΥ|ΕΥΑ|ΕΥΓΕ|ΕΥΘΥΣ|ΕΥΤΥΧΩΣ|ΕΦΕ|ΕΦΕΞΗΣ|ΕΦΤ|ΕΧΕ|ΕΧΕΙ|'
. 'ΕΧΕΙΣ|ΕΧΕΤΕ|ΕΧΘΕΣ|ΕΧΟΜΕ|ΕΧΟΥΜΕ|ΕΧΟΥΝ|ΕΧΤΕΣ|ΕΧΩ|ΕΩΣ|ΖΕΑ|ΖΕΗ|ΖΕΙ|ΖΕΝ|ΖΗΝ|ΖΩ|Η|ΗΔΗ|ΗΔΥ|ΗΘΗ|ΗΛΟ|ΗΜΙ|ΗΠΑ|ΗΣΑΣΤΕ|ΗΣΟΥΝ|ΗΤΑ|ΗΤΑΝ|ΗΤΑΝΕ|'
. 'ΗΤΟΙ|ΗΤΤΟΝ|ΗΩ|ΘΑ|ΘΥΕ|ΘΩΡ|Ι|ΙΑ|ΙΒΟ|ΙΔΗ|ΙΔΙΩΣ|ΙΕ|ΙΙ|ΙΙΙ|ΙΚΑ|ΙΛΟ|ΙΜΑ|ΙΝΑ|ΙΝΩ|ΙΞΕ|ΙΞΟ|ΙΟ|ΙΟΙ|ΙΣΑ|ΙΣΑΜΕ|ΙΣΕ|ΙΣΗ|ΙΣΙΑ|ΙΣΟ|ΙΣΩΣ|ΙΩΒ|ΙΩΝ|'
. 'ΙΩΣ|ΙΑΝ|ΚΑΘ|ΚΑΘΕ|ΚΑΘΕΤΙ|ΚΑΘΟΛΟΥ|ΚΑΘΩΣ|ΚΑΙ|ΚΑΝ|ΚΑΠΟΤΕ|ΚΑΠΟΥ|ΚΑΠΩΣ|ΚΑΤ|ΚΑΤΑ|ΚΑΤΙ|ΚΑΤΙΤΙ|ΚΑΤΟΠΙΝ|ΚΑΤΩ|ΚΑΩ|ΚΒΟ|ΚΕΑ|ΚΕΙ|ΚΕΝ|ΚΙ|ΚΙΜ|'
. 'ΚΙΟΛΑΣ|ΚΙΤ|ΚΙΧ|ΚΚΕ|ΚΛΙΣΕ|ΚΛΠ|ΚΟΚ|ΚΟΝΤΑ|ΚΟΧ|ΚΤΛ|ΚΥΡ|ΚΥΡΙΩΣ|ΚΩ|ΚΩΝ|ΛΑ|ΛΕΑ|ΛΕΝ|ΛΕΟ|ΛΙΑ|ΛΙΓΑΚΙ|ΛΙΓΟΥΛΑΚΙ|ΛΙΓΟ|ΛΙΓΩΤΕΡΟ|ΛΙΟ|ΛΙΡ|ΛΟΓΩ|'
. 'ΛΟΙΠΑ|ΛΟΙΠΟΝ|ΛΟΣ|ΛΣ|ΛΥΩ|ΜΑ|ΜΑΖΙ|ΜΑΚΑΡΙ|ΜΑΛΙΣΤΑ|ΜΑΛΛΟΝ|ΜΑΝ|ΜΑΞ|ΜΑΣ|ΜΑΤ|ΜΕ|ΜΕΘΑΥΡΙΟ|ΜΕΙ|ΜΕΙΟΝ|ΜΕΛ|ΜΕΛΕΙ|ΜΕΛΛΕΤΑΙ|ΜΕΜΙΑΣ|ΜΕΝ|ΜΕΣ|'
. 'ΜΕΣΑ|ΜΕΤ|ΜΕΤΑ|ΜΕΤΑΞΥ|ΜΕΧΡΙ|ΜΗ|ΜΗΔΕ|ΜΗΝ|ΜΗΠΩΣ|ΜΗΤΕ|ΜΙ|ΜΙΞ|ΜΙΣ|ΜΜΕ|ΜΝΑ|ΜΟΒ|ΜΟΛΙΣ|ΜΟΛΟΝΟΤΙ|ΜΟΝΑΧΑ|ΜΟΝΟΜΙΑΣ|ΜΙΑ|ΜΟΥ|ΜΠΑ|ΜΠΟΡΕΙ|'
. 'ΜΠΟΡΟΥΝ|ΜΠΡΑΒΟ|ΜΠΡΟΣ|ΜΠΩ|ΜΥ|ΜΥΑ|ΜΥΝ|ΝΑ|ΝΑΕ|ΝΑΙ|ΝΑΟ|ΝΔ|ΝΕΐ|ΝΕΑ|ΝΕΕ|ΝΕΟ|ΝΙ|ΝΙΑ|ΝΙΚ|ΝΙΛ|ΝΙΝ|ΝΙΟ|ΝΤΑ|ΝΤΕ|ΝΤΙ|ΝΤΟ|ΝΥΝ|ΝΩΕ|ΝΩΡΙΣ|ΞΑΝΑ|'
. 'ΞΑΦΝΙΚΑ|ΞΕΩ|ΞΙ|Ο|ΟΑ|ΟΑΠ|ΟΔΟ|ΟΕ|ΟΖΟ|ΟΗΕ|ΟΙ|ΟΙΑ|ΟΙΗ|ΟΚΑ|ΟΛΟΓΥΡΑ|ΟΛΟΝΕΝ|ΟΛΟΤΕΛΑ|ΟΛΩΣΔΙΟΛΟΥ|ΟΜΩΣ|ΟΝ|ΟΝΕ|ΟΝΟ|ΟΠΑ|ΟΠΕ|ΟΠΗ|ΟΠΟ|'
. 'ΟΠΟΙΑΔΗΠΟΤΕ|ΟΠΟΙΑΝΔΗΠΟΤΕ|ΟΠΟΙΑΣΔΗΠΟΤΕ|ΟΠΟΙΔΗΠΟΤΕ|ΟΠΟΙΕΣΔΗΠΟΤΕ|ΟΠΟΙΟΔΗΠΟΤΕ|ΟΠΟΙΟΝΔΗΠΟΤΕ|ΟΠΟΙΟΣΔΗΠΟΤΕ|ΟΠΟΙΟΥΔΗΠΟΤΕ|ΟΠΟΙΟΥΣΔΗΠΟΤΕ|'
. 'ΟΠΟΙΩΝΔΗΠΟΤΕ|ΟΠΟΤΕΔΗΠΟΤΕ|ΟΠΟΥ|ΟΠΟΥΔΗΠΟΤΕ|ΟΠΩΣ|ΟΡΑ|ΟΡΕ|ΟΡΗ|ΟΡΟ|ΟΡΦ|ΟΡΩ|ΟΣΑ|ΟΣΑΔΗΠΟΤΕ|ΟΣΕ|ΟΣΕΣΔΗΠΟΤΕ|ΟΣΗΔΗΠΟΤΕ|ΟΣΗΝΔΗΠΟΤΕ|'
. 'ΟΣΗΣΔΗΠΟΤΕ|ΟΣΟΔΗΠΟΤΕ|ΟΣΟΙΔΗΠΟΤΕ|ΟΣΟΝΔΗΠΟΤΕ|ΟΣΟΣΔΗΠΟΤΕ|ΟΣΟΥΔΗΠΟΤΕ|ΟΣΟΥΣΔΗΠΟΤΕ|ΟΣΩΝΔΗΠΟΤΕ|ΟΤΑΝ|ΟΤΕ|ΟΤΙ|ΟΤΙΔΗΠΟΤΕ|ΟΥ|ΟΥΔΕ|ΟΥΚ|ΟΥΣ|'
. 'ΟΥΤΕ|ΟΥΦ|ΟΧΙ|ΟΨΑ|ΟΨΕ|ΟΨΗ|ΟΨΙ|ΟΨΟ|ΠΑ|ΠΑΛΙ|ΠΑΝ|ΠΑΝΤΟΤΕ|ΠΑΝΤΟΥ|ΠΑΝΤΩΣ|ΠΑΠ|ΠΑΡ|ΠΑΡΑ|ΠΕΙ|ΠΕΡ|ΠΕΡΑ|ΠΕΡΙ|ΠΕΡΙΠΟΥ|ΠΕΡΣΙ|ΠΕΡΥΣΙ|ΠΕΣ|ΠΙ|'
. 'ΠΙΑ|ΠΙΘΑΝΟΝ|ΠΙΚ|ΠΙΟ|ΠΙΣΩ|ΠΙΤ|ΠΙΩ|ΠΛΑΙ|ΠΛΕΟΝ|ΠΛΗΝ|ΠΛΩ|ΠΜ|ΠΟΑ|ΠΟΕ|ΠΟΛ|ΠΟΛΥ|ΠΟΠ|ΠΟΤΕ|ΠΟΥ|ΠΟΥΘΕ|ΠΟΥΘΕΝΑ|ΠΡΕΠΕΙ|ΠΡΙ|ΠΡΙΝ|ΠΡΟ|'
. 'ΠΡΟΚΕΙΜΕΝΟΥ|ΠΡΟΚΕΙΤΑΙ|ΠΡΟΠΕΡΣΙ|ΠΡΟΣ|ΠΡΟΤΟΥ|ΠΡΟΧΘΕΣ|ΠΡΟΧΤΕΣ|ΠΡΩΤΥΤΕΡΑ|ΠΥΑ|ΠΥΞ|ΠΥΟ|ΠΥΡ|ΠΧ|ΠΩ|ΠΩΛ|ΠΩΣ|ΡΑ|ΡΑΙ|ΡΑΠ|ΡΑΣ|ΡΕ|ΡΕΑ|ΡΕΕ|ΡΕΙ|'
. 'ΡΗΣ|ΡΘΩ|ΡΙΟ|ΡΟ|ΡΟΐ|ΡΟΕ|ΡΟΖ|ΡΟΗ|ΡΟΘ|ΡΟΙ|ΡΟΚ|ΡΟΛ|ΡΟΝ|ΡΟΣ|ΡΟΥ|ΣΑΙ|ΣΑΝ|ΣΑΟ|ΣΑΣ|ΣΕ|ΣΕΙΣ|ΣΕΚ|ΣΕΞ|ΣΕΡ|ΣΕΤ|ΣΕΦ|ΣΗΜΕΡΑ|ΣΙ|ΣΙΑ|ΣΙΓΑ|ΣΙΚ|'
. 'ΣΙΧ|ΣΚΙ|ΣΟΙ|ΣΟΚ|ΣΟΛ|ΣΟΝ|ΣΟΣ|ΣΟΥ|ΣΡΙ|ΣΤΑ|ΣΤΗ|ΣΤΗΝ|ΣΤΗΣ|ΣΤΙΣ|ΣΤΟ|ΣΤΟΝ|ΣΤΟΥ|ΣΤΟΥΣ|ΣΤΩΝ|ΣΥ|ΣΥΓΧΡΟΝΩΣ|ΣΥΝ|ΣΥΝΑΜΑ|ΣΥΝΕΠΩΣ|ΣΥΝΗΘΩΣ|'
. 'ΣΧΕΔΟΝ|ΣΩΣΤΑ|ΤΑ|ΤΑΔΕ|ΤΑΚ|ΤΑΝ|ΤΑΟ|ΤΑΥ|ΤΑΧΑ|ΤΑΧΑΤΕ|ΤΕ|ΤΕΙ|ΤΕΛ|ΤΕΛΙΚΑ|ΤΕΛΙΚΩΣ|ΤΕΣ|ΤΕΤ|ΤΖΟ|ΤΗ|ΤΗΛ|ΤΗΝ|ΤΗΣ|ΤΙ|ΤΙΚ|ΤΙΜ|ΤΙΠΟΤΑ|ΤΙΠΟΤΕ|'
. 'ΤΙΣ|ΤΝΤ|ΤΟ|ΤΟΙ|ΤΟΚ|ΤΟΜ|ΤΟΝ|ΤΟΠ|ΤΟΣ|ΤΟΣ?Ν|ΤΟΣΑ|ΤΟΣΕΣ|ΤΟΣΗ|ΤΟΣΗΝ|ΤΟΣΗΣ|ΤΟΣΟ|ΤΟΣΟΙ|ΤΟΣΟΝ|ΤΟΣΟΣ|ΤΟΣΟΥ|ΤΟΣΟΥΣ|ΤΟΤΕ|ΤΟΥ|ΤΟΥΛΑΧΙΣΤΟ|'
. 'ΤΟΥΛΑΧΙΣΤΟΝ|ΤΟΥΣ|ΤΣ|ΤΣΑ|ΤΣΕ|ΤΥΧΟΝ|ΤΩ|ΤΩΝ|ΤΩΡΑ|ΥΑΣ|ΥΒΑ|ΥΒΟ|ΥΙΕ|ΥΙΟ|ΥΛΑ|ΥΛΗ|ΥΝΙ|ΥΠ|ΥΠΕΡ|ΥΠΟ|ΥΠΟΨΗ|ΥΠΟΨΙΝ|ΥΣΤΕΡΑ|ΥΦΗ|ΥΨΗ|ΦΑ|ΦΑΐ|ΦΑΕ|'
. 'ΦΑΝ|ΦΑΞ|ΦΑΣ|ΦΑΩ|ΦΕΖ|ΦΕΙ|ΦΕΤΟΣ|ΦΕΥ|ΦΙ|ΦΙΛ|ΦΙΣ|ΦΟΞ|ΦΠΑ|ΦΡΙ|ΧΑ|ΧΑΗ|ΧΑΛ|ΧΑΝ|ΧΑΦ|ΧΕ|ΧΕΙ|ΧΘΕΣ|ΧΙ|ΧΙΑ|ΧΙΛ|ΧΙΟ|ΧΛΜ|ΧΜ|ΧΟΗ|ΧΟΛ|ΧΡΩ|ΧΤΕΣ|'
. 'ΧΩΡΙΣ|ΧΩΡΙΣΤΑ|ΨΕΣ|ΨΗΛΑ|ΨΙ|ΨΙΤ|Ω|ΩΑ|ΩΑΣ|ΩΔΕ|ΩΕΣ|ΩΘΩ|ΩΜΑ|ΩΜΕ|ΩΝ|ΩΟ|ΩΟΝ|ΩΟΥ|ΩΣ|ΩΣΑΝ|ΩΣΗ|ΩΣΟΤΟΥ|ΩΣΠΟΥ|ΩΣΤΕ|ΩΣΤΟΣΟ|ΩΤΑ|ΩΧ|ΩΩΝ)$/';
if (preg_match($stop_words, $token)) {
return $this->toLowerCase($token, $wCase);
}
// Vowels
$v = '(Α|Ε|Η|Ι|Ο|Υ|Ω)';
// Vowels without Y
$v2 = '(Α|Ε|Η|Ι|Ο|Ω)';
$test1 = true;
// Step S1. 14 stems
$re = '/^(.+?)(ΙΖΑ|ΙΖΕΣ|ΙΖΕ|ΙΖΑΜΕ|ΙΖΑΤΕ|ΙΖΑΝ|ΙΖΑΝΕ|ΙΖΩ|ΙΖΕΙΣ|ΙΖΕΙ|ΙΖΟΥΜΕ|ΙΖΕΤΕ|ΙΖΟΥΝ|ΙΖΟΥΝΕ)$/';
$exceptS1 = '/^(ΑΝΑΜΠΑ|ΕΜΠΑ|ΕΠΑ|ΞΑΝΑΠΑ|ΠΑ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ)$/';
$exceptS2 = '/^(ΜΑΡΚ|ΚΟΡΝ|ΑΜΠΑΡ|ΑΡΡ|ΒΑΘΥΡΙ|ΒΑΡΚ|Β|ΒΟΛΒΟΡ|ΓΚΡ|ΓΛΥΚΟΡ|ΓΛΥΚΥΡ|ΙΜΠ|Λ|ΛΟΥ|ΜΑΡ|Μ|ΠΡ|ΜΠΡ|ΠΟΛΥΡ|Π|Ρ|ΠΙΠΕΡΟΡ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= 'I';
}
if (preg_match($exceptS2, $token)) {
$token .= 'IΖ';
}
return $this->toLowerCase($token, $wCase);
}
// Step S2. 7 stems
$re = '/^(.+?)(ΩΘΗΚΑ|ΩΘΗΚΕΣ|ΩΘΗΚΕ|ΩΘΗΚΑΜΕ|ΩΘΗΚΑΤΕ|ΩΘΗΚΑΝ|ΩΘΗΚΑΝΕ)$/';
$exceptS1 = '/^(ΑΛ|ΒΙ|ΕΝ|ΥΨ|ΛΙ|ΖΩ|Σ|Χ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= 'ΩΝ';
}
return $this->toLowerCase($token, $wCase);
}
// Step S3. 7 stems
$re = '/^(.+?)(ΙΣΑ|ΙΣΕΣ|ΙΣΕ|ΙΣΑΜΕ|ΙΣΑΤΕ|ΙΣΑΝ|ΙΣΑΝΕ)$/';
$exceptS1 = '/^(ΑΝΑΜΠΑ|ΑΘΡΟ|ΕΜΠΑ|ΕΣΕ|ΕΣΩΚΛΕ|ΕΠΑ|ΞΑΝΑΠΑ|ΕΠΕ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ|ΚΛΕ|ΧΑΡΤΟΠΑ|ΕΞΑΡΧΑ|ΜΕΤΕΠΕ|ΑΠΟΚΛΕ|ΑΠΕΚΛΕ|ΕΚΛΕ|ΠΕ|ΠΕΡΙΠΑ)$/';
$exceptS2 = '/^(ΑΝ|ΑΦ|ΓΕ|ΓΙΓΑΝΤΟΑΦ|ΓΚΕ|ΔΗΜΟΚΡΑΤ|ΚΟΜ|ΓΚ|Μ|Π|ΠΟΥΚΑΜ|ΟΛΟ|ΛΑΡ)$/';
if ($token == "ΙΣΑ") {
$token = "ΙΣ";
return $token;
}
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= 'Ι';
}
if (preg_match($exceptS2, $token)) {
$token .= 'ΙΣ';
}
return $this->toLowerCase($token, $wCase);
}
// Step S4. 7 stems
$re = '/^(.+?)(ΙΣΩ|ΙΣΕΙΣ|ΙΣΕΙ|ΙΣΟΥΜΕ|ΙΣΕΤΕ|ΙΣΟΥΝ|ΙΣΟΥΝΕ)$/';
$exceptS1 = '/^(ΑΝΑΜΠΑ|ΕΜΠΑ|ΕΣΕ|ΕΣΩΚΛΕ|ΕΠΑ|ΞΑΝΑΠΑ|ΕΠΕ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ|ΚΛΕ|ΧΑΡΤΟΠΑ|ΕΞΑΡΧΑ|ΜΕΤΕΠΕ|ΑΠΟΚΛΕ|ΑΠΕΚΛΕ|ΕΚΛΕ|ΠΕ|ΠΕΡΙΠΑ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= 'Ι';
}
return $this->toLowerCase($token, $wCase);
}
// Step S5. 11 stems
$re = '/^(.+?)(ΙΣΤΟΣ|ΙΣΤΟΥ|ΙΣΤΟ|ΙΣΤΕ|ΙΣΤΟΙ|ΙΣΤΩΝ|ΙΣΤΟΥΣ|ΙΣΤΗ|ΙΣΤΗΣ|ΙΣΤΑ|ΙΣΤΕΣ)$/';
$exceptS1 = '/^(Μ|Π|ΑΠ|ΑΡ|ΗΔ|ΚΤ|ΣΚ|ΣΧ|ΥΨ|ΦΑ|ΧΡ|ΧΤ|ΑΚΤ|ΑΟΡ|ΑΣΧ|ΑΤΑ|ΑΧΝ|ΑΧΤ|ΓΕΜ|ΓΥΡ|ΕΜΠ|ΕΥΠ|ΕΧΘ|ΗΦΑ|ΚΑΘ|ΚΑΚ|ΚΥΛ|ΛΥΓ|ΜΑΚ|ΜΕΓ|ΤΑΧ|ΦΙΛ|ΧΩΡ)$/';
$exceptS2 = '/^(ΔΑΝΕ|ΣΥΝΑΘΡΟ|ΚΛΕ|ΣΕ|ΕΣΩΚΛΕ|ΑΣΕ|ΠΛΕ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= 'ΙΣΤ';
}
if (preg_match($exceptS2, $token)) {
$token .= 'Ι';
}
return $this->toLowerCase($token, $wCase);
}
// Step S6. 6 stems
$re = '/^(.+?)(ΙΣΜΟ|ΙΣΜΟΙ|ΙΣΜΟΣ|ΙΣΜΟΥ|ΙΣΜΟΥΣ|ΙΣΜΩΝ)$/';
$exceptS1 = '/^(ΑΓΝΩΣΤΙΚ|ΑΤΟΜΙΚ|ΓΝΩΣΤΙΚ|ΕΘΝΙΚ|ΕΚΛΕΚΤΙΚ|ΣΚΕΠΤΙΚ|ΤΟΠΙΚ)$/';
$exceptS2 = '/^(ΣΕ|ΜΕΤΑΣΕ|ΜΙΚΡΟΣΕ|ΕΓΚΛΕ|ΑΠΟΚΛΕ)$/';
$exceptS3 = '/^(ΔΑΝΕ|ΑΝΤΙΔΑΝΕ)$/';
$exceptS4 = '/^(ΑΛΕΞΑΝΔΡΙΝ|ΒΥΖΑΝΤΙΝ|ΘΕΑΤΡΙΝ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token = str_replace('ΙΚ', "", $token);
}
if (preg_match($exceptS2, $token)) {
$token .= "ΙΣΜ";
}
if (preg_match($exceptS3, $token)) {
$token .= "Ι";
}
if (preg_match($exceptS4, $token)) {
$token = str_replace('ΙΝ', "", $token);
}
return $this->toLowerCase($token, $wCase);
}
// Step S7. 4 stems
$re = '/^(.+?)(ΑΡΑΚΙ|ΑΡΑΚΙΑ|ΟΥΔΑΚΙ|ΟΥΔΑΚΙΑ)$/';
$exceptS1 = '/^(Σ|Χ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= "AΡΑΚ";
}
return $this->toLowerCase($token, $wCase);
}
// Step S8. 8 stems
$re = '/^(.+?)(ΑΚΙ|ΑΚΙΑ|ΙΤΣΑ|ΙΤΣΑΣ|ΙΤΣΕΣ|ΙΤΣΩΝ|ΑΡΑΚΙ|ΑΡΑΚΙΑ)$/';
$exceptS1 = '/^(ΑΝΘΡ|ΒΑΜΒ|ΒΡ|ΚΑΙΜ|ΚΟΝ|ΚΟΡ|ΛΑΒΡ|ΛΟΥΛ|ΜΕΡ|ΜΟΥΣΤ|ΝΑΓΚΑΣ|ΠΛ|Ρ|ΡΥ|Σ|ΣΚ|ΣΟΚ|ΣΠΑΝ|ΤΖ|ΦΑΡΜ|Χ|'
. 'ΚΑΠΑΚ|ΑΛΙΣΦ|ΑΜΒΡ|ΑΝΘΡ|Κ|ΦΥΛ|ΚΑΤΡΑΠ|ΚΛΙΜ|ΜΑΛ|ΣΛΟΒ|Φ|ΣΦ|ΤΣΕΧΟΣΛΟΒ)$/';
$exceptS2 = '/^(Β|ΒΑΛ|ΓΙΑΝ|ΓΛ|Ζ|ΗΓΟΥΜΕΝ|ΚΑΡΔ|ΚΟΝ|ΜΑΚΡΥΝ|ΝΥΦ|ΠΑΤΕΡ|Π|ΣΚ|ΤΟΣ|ΤΡΙΠΟΛ)$/';
// For words like ΠΛΟΥΣΙΟΚΟΡΙΤΣΑ, ΠΑΛΙΟΚΟΡΙΤΣΑ etc
$exceptS3 = '/(ΚΟΡ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= "ΑΚ";
}
if (preg_match($exceptS2, $token)) {
$token .= "ΙΤΣ";
}
if (preg_match($exceptS3, $token)) {
$token .= "ΙΤΣ";
}
return $this->toLowerCase($token, $wCase);
}
// Step S9. 3 stems
$re = '/^(.+?)(ΙΔΙΟ|ΙΔΙΑ|ΙΔΙΩΝ)$/';
$exceptS1 = '/^(ΑΙΦΝ|ΙΡ|ΟΛΟ|ΨΑΛ)$/';
$exceptS2 = '/(Ε|ΠΑΙΧΝ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= "ΙΔ";
}
if (preg_match($exceptS2, $token)) {
$token .= "ΙΔ";
}
return $this->toLowerCase($token, $wCase);
}
// Step S10. 4 stems
$re = '/^(.+?)(ΙΣΚΟΣ|ΙΣΚΟΥ|ΙΣΚΟ|ΙΣΚΕ)$/';
$exceptS1 = '/^(Δ|ΙΒ|ΜΗΝ|Ρ|ΦΡΑΓΚ|ΛΥΚ|ΟΒΕΛ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
if (preg_match($exceptS1, $token)) {
$token .= "ΙΣΚ";
}
return $this->toLowerCase($token, $wCase);
}
// Step 1
// step1list is used in Step 1. 41 stems
$step1list = [];
$step1list["ΦΑΓΙΑ"] = "ΦΑ";
$step1list["ΦΑΓΙΟΥ"] = "ΦΑ";
$step1list["ΦΑΓΙΩΝ"] = "ΦΑ";
$step1list["ΣΚΑΓΙΑ"] = "ΣΚΑ";
$step1list["ΣΚΑΓΙΟΥ"] = "ΣΚΑ";
$step1list["ΣΚΑΓΙΩΝ"] = "ΣΚΑ";
$step1list["ΟΛΟΓΙΟΥ"] = "ΟΛΟ";
$step1list["ΟΛΟΓΙΑ"] = "ΟΛΟ";
$step1list["ΟΛΟΓΙΩΝ"] = "ΟΛΟ";
$step1list["ΣΟΓΙΟΥ"] = "ΣΟ";
$step1list["ΣΟΓΙΑ"] = "ΣΟ";
$step1list["ΣΟΓΙΩΝ"] = "ΣΟ";
$step1list["ΤΑΤΟΓΙΑ"] = "ΤΑΤΟ";
$step1list["ΤΑΤΟΓΙΟΥ"] = "ΤΑΤΟ";
$step1list["ΤΑΤΟΓΙΩΝ"] = "ΤΑΤΟ";
$step1list["ΚΡΕΑΣ"] = "ΚΡΕ";
$step1list["ΚΡΕΑΤΟΣ"] = "ΚΡΕ";
$step1list["ΚΡΕΑΤΑ"] = "ΚΡΕ";
$step1list["ΚΡΕΑΤΩΝ"] = "ΚΡΕ";
$step1list["ΠΕΡΑΣ"] = "ΠΕΡ";
$step1list["ΠΕΡΑΤΟΣ"] = "ΠΕΡ";
// Added by Spyros. Also at $re in step1
$step1list["ΠΕΡΑΤΗ"] = "ΠΕΡ";
$step1list["ΠΕΡΑΤΑ"] = "ΠΕΡ";
$step1list["ΠΕΡΑΤΩΝ"] = "ΠΕΡ";
$step1list["ΤΕΡΑΣ"] = "ΤΕΡ";
$step1list["ΤΕΡΑΤΟΣ"] = "ΤΕΡ";
$step1list["ΤΕΡΑΤΑ"] = "ΤΕΡ";
$step1list["ΤΕΡΑΤΩΝ"] = "ΤΕΡ";
$step1list["ΦΩΣ"] = "ΦΩ";
$step1list["ΦΩΤΟΣ"] = "ΦΩ";
$step1list["ΦΩΤΑ"] = "ΦΩ";
$step1list["ΦΩΤΩΝ"] = "ΦΩ";
$step1list["ΚΑΘΕΣΤΩΣ"] = "ΚΑΘΕΣΤ";
$step1list["ΚΑΘΕΣΤΩΤΟΣ"] = "ΚΑΘΕΣΤ";
$step1list["ΚΑΘΕΣΤΩΤΑ"] = "ΚΑΘΕΣΤ";
$step1list["ΚΑΘΕΣΤΩΤΩΝ"] = "ΚΑΘΕΣΤ";
$step1list["ΓΕΓΟΝΟΣ"] = "ΓΕΓΟΝ";
$step1list["ΓΕΓΟΝΟΤΟΣ"] = "ΓΕΓΟΝ";
$step1list["ΓΕΓΟΝΟΤΑ"] = "ΓΕΓΟΝ";
$step1list["ΓΕΓΟΝΟΤΩΝ"] = "ΓΕΓΟΝ";
$re = '/(.*)(ΦΑΓΙΑ|ΦΑΓΙΟΥ|ΦΑΓΙΩΝ|ΣΚΑΓΙΑ|ΣΚΑΓΙΟΥ|ΣΚΑΓΙΩΝ|ΟΛΟΓΙΟΥ|ΟΛΟΓΙΑ|ΟΛΟΓΙΩΝ|ΣΟΓΙΟΥ|ΣΟΓΙΑ|ΣΟΓΙΩΝ|ΤΑΤΟΓΙΑ|ΤΑΤΟΓΙΟΥ|ΤΑΤΟΓΙΩΝ|ΚΡΕΑΣ|ΚΡΕΑΤΟΣ|'
. 'ΚΡΕΑΤΑ|ΚΡΕΑΤΩΝ|ΠΕΡΑΣ|ΠΕΡΑΤΟΣ|ΠΕΡΑΤΗ|ΠΕΡΑΤΑ|ΠΕΡΑΤΩΝ|ΤΕΡΑΣ|ΤΕΡΑΤΟΣ|ΤΕΡΑΤΑ|ΤΕΡΑΤΩΝ|ΦΩΣ|ΦΩΤΟΣ|ΦΩΤΑ|ΦΩΤΩΝ|ΚΑΘΕΣΤΩΣ|ΚΑΘΕΣΤΩΤΟΣ|'
. 'ΚΑΘΕΣΤΩΤΑ|ΚΑΘΕΣΤΩΤΩΝ|ΓΕΓΟΝΟΣ|ΓΕΓΟΝΟΤΟΣ|ΓΕΓΟΝΟΤΑ|ΓΕΓΟΝΟΤΩΝ)$/';
if (preg_match($re, $token, $match)) {
$stem = $match[1];
$suffix = $match[2];
$token = $stem . (\array_key_exists($suffix, $step1list) ? $step1list[$suffix] : '');
$test1 = false;
}
// Step 2a. 2 stems
$re = '/^(.+?)(ΑΔΕΣ|ΑΔΩΝ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1];
$re = '/(ΟΚ|ΜΑΜ|ΜΑΝ|ΜΠΑΜΠ|ΠΑΤΕΡ|ΓΙΑΓΙ|ΝΤΑΝΤ|ΚΥΡ|ΘΕΙ|ΠΕΘΕΡ)$/';
if (!preg_match($re, $token)) {
$token .= "ΑΔ";
}
}
// Step 2b. 2 stems
$re = '/^(.+?)(ΕΔΕΣ|ΕΔΩΝ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$exept2 = '/(ΟΠ|ΙΠ|ΕΜΠ|ΥΠ|ΓΗΠ|ΔΑΠ|ΚΡΑΣΠ|ΜΙΛ)$/';
if (preg_match($exept2, $token)) {
$token .= 'ΕΔ';
}
}
// Step 2c
$re = '/^(.+?)(ΟΥΔΕΣ|ΟΥΔΩΝ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$exept3 = '/(ΑΡΚ|ΚΑΛΙΑΚ|ΠΕΤΑΛ|ΛΙΧ|ΠΛΕΞ|ΣΚ|Σ|ΦΛ|ΦΡ|ΒΕΛ|ΛΟΥΛ|ΧΝ|ΣΠ|ΤΡΑΓ|ΦΕ)$/';
if (preg_match($exept3, $token)) {
$token .= 'ΟΥΔ';
}
}
// Step 2d
$re = '/^(.+?)(ΕΩΣ|ΕΩΝ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept4 = '/^(Θ|Δ|ΕΛ|ΓΑΛ|Ν|Π|ΙΔ|ΠΑΡ)$/';
if (preg_match($exept4, $token)) {
$token .= 'Ε';
}
}
// Step 3
$re = '/^(.+?)(ΙΑ|ΙΟΥ|ΙΩΝ)$/';
if (preg_match($re, $token, $fp)) {
$stem = $fp[1];
$token = $stem;
$re = '/' . $v . '$/';
$test1 = false;
if (preg_match($re, $token)) {
$token = $stem . 'Ι';
}
}
// Step 4
$re = '/^(.+?)(ΙΚΑ|ΙΚΟ|ΙΚΟΥ|ΙΚΩΝ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$re = '/' . $v . '$/';
$exept5 = '/^(ΑΛ|ΑΔ|ΕΝΔ|ΑΜΑΝ|ΑΜΜΟΧΑΛ|ΗΘ|ΑΝΗΘ|ΑΝΤΙΔ|ΦΥΣ|ΒΡΩΜ|ΓΕΡ|ΕΞΩΔ|ΚΑΛΠ|ΚΑΛΛΙΝ|ΚΑΤΑΔ|ΜΟΥΛ|ΜΠΑΝ|ΜΠΑΓΙΑΤ|ΜΠΟΛ|ΜΠΟΣ|ΝΙΤ|ΞΙΚ|ΣΥΝΟΜΗΛ|ΠΕΤΣ|'
. 'ΠΙΤΣ|ΠΙΚΑΝΤ|ΠΛΙΑΤΣ|ΠΟΣΤΕΛΝ|ΠΡΩΤΟΔ|ΣΕΡΤ|ΣΥΝΑΔ|ΤΣΑΜ|ΥΠΟΔ|ΦΙΛΟΝ|ΦΥΛΟΔ|ΧΑΣ)$/';
if (preg_match($re, $token) || preg_match($exept5, $token)) {
$token .= 'ΙΚ';
}
}
// Step 5a
$re = '/^(.+?)(ΑΜΕ)$/';
$re2 = '/^(.+?)(ΑΓΑΜΕ|ΗΣΑΜΕ|ΟΥΣΑΜΕ|ΗΚΑΜΕ|ΗΘΗΚΑΜΕ)$/';
if ($token == "ΑΓΑΜΕ") {
$token = "ΑΓΑΜ";
}
if (preg_match($re2, $token)) {
preg_match($re2, $token, $match);
$token = $match[1];
$test1 = false;
}
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept6 = '/^(ΑΝΑΠ|ΑΠΟΘ|ΑΠΟΚ|ΑΠΟΣΤ|ΒΟΥΒ|ΞΕΘ|ΟΥΛ|ΠΕΘ|ΠΙΚΡ|ΠΟΤ|ΣΙΧ|Χ)$/';
if (preg_match($exept6, $token)) {
$token .= "ΑΜ";
}
}
// Step 5b
$re2 = '/^(.+?)(ΑΝΕ)$/';
$re3 = '/^(.+?)(ΑΓΑΝΕ|ΗΣΑΝΕ|ΟΥΣΑΝΕ|ΙΟΝΤΑΝΕ|ΙΟΤΑΝΕ|ΙΟΥΝΤΑΝΕ|ΟΝΤΑΝΕ|ΟΤΑΝΕ|ΟΥΝΤΑΝΕ|ΗΚΑΝΕ|ΗΘΗΚΑΝΕ)$/';
if (preg_match($re3, $token)) {
preg_match($re3, $token, $match);
$token = $match[1];
$test1 = false;
$re3 = '/^(ΤΡ|ΤΣ)$/';
if (preg_match($re3, $token)) {
$token .= "ΑΓΑΝ";
}
}
if (preg_match($re2, $token)) {
preg_match($re2, $token, $match);
$token = $match[1];
$test1 = false;
$re2 = '/' . $v2 . '$/';
$exept7 = '/^(ΒΕΤΕΡ|ΒΟΥΛΚ|ΒΡΑΧΜ|Γ|ΔΡΑΔΟΥΜ|Θ|ΚΑΛΠΟΥΖ|ΚΑΣΤΕΛ|ΚΟΡΜΟΡ|ΛΑΟΠΛ|ΜΩΑΜΕΘ|Μ|ΜΟΥΣΟΥΛΜ|Ν|ΟΥΛ|Π|ΠΕΛΕΚ|ΠΛ|ΠΟΛΙΣ|ΠΟΡΤΟΛ|ΣΑΡΑΚΑΤΣ|ΣΟΥΛΤ|'
. 'ΤΣΑΡΛΑΤ|ΟΡΦ|ΤΣΙΓΓ|ΤΣΟΠ|ΦΩΤΟΣΤΕΦ|Χ|ΨΥΧΟΠΛ|ΑΓ|ΟΡΦ|ΓΑΛ|ΓΕΡ|ΔΕΚ|ΔΙΠΛ|ΑΜΕΡΙΚΑΝ|ΟΥΡ|ΠΙΘ|ΠΟΥΡΙΤ|Σ|ΖΩΝΤ|ΙΚ|ΚΑΣΤ|ΚΟΠ|ΛΙΧ|ΛΟΥΘΗΡ|ΜΑΙΝΤ|'
. 'ΜΕΛ|ΣΙΓ|ΣΠ|ΣΤΕΓ|ΤΡΑΓ|ΤΣΑΓ|Φ|ΕΡ|ΑΔΑΠ|ΑΘΙΓΓ|ΑΜΗΧ|ΑΝΙΚ|ΑΝΟΡΓ|ΑΠΗΓ|ΑΠΙΘ|ΑΤΣΙΓΓ|ΒΑΣ|ΒΑΣΚ|ΒΑΘΥΓΑΛ|ΒΙΟΜΗΧ|ΒΡΑΧΥΚ|ΔΙΑΤ|ΔΙΑΦ|ΕΝΟΡΓ|'
. 'ΘΥΣ|ΚΑΠΝΟΒΙΟΜΗΧ|ΚΑΤΑΓΑΛ|ΚΛΙΒ|ΚΟΙΛΑΡΦ|ΛΙΒ|ΜΕΓΛΟΒΙΟΜΗΧ|ΜΙΚΡΟΒΙΟΜΗΧ|ΝΤΑΒ|ΞΗΡΟΚΛΙΒ|ΟΛΙΓΟΔΑΜ|ΟΛΟΓΑΛ|ΠΕΝΤΑΡΦ|ΠΕΡΗΦ|ΠΕΡΙΤΡ|ΠΛΑΤ|'
. 'ΠΟΛΥΔΑΠ|ΠΟΛΥΜΗΧ|ΣΤΕΦ|ΤΑΒ|ΤΕΤ|ΥΠΕΡΗΦ|ΥΠΟΚΟΠ|ΧΑΜΗΛΟΔΑΠ|ΨΗΛΟΤΑΒ)$/';
if (preg_match($re2, $token) || preg_match($exept7, $token)) {
$token .= "ΑΝ";
}
}
// Step 5c
$re3 = '/^(.+?)(ΕΤΕ)$/';
$re4 = '/^(.+?)(ΗΣΕΤΕ)$/';
if (preg_match($re4, $token)) {
preg_match($re4, $token, $match);
$token = $match[1];
$test1 = false;
}
if (preg_match($re3, $token)) {
preg_match($re3, $token, $match);
$token = $match[1];
$test1 = false;
$re3 = '/' . $v2 . '$/';
$exept8 = '/(ΟΔ|ΑΙΡ|ΦΟΡ|ΤΑΘ|ΔΙΑΘ|ΣΧ|ΕΝΔ|ΕΥΡ|ΤΙΘ|ΥΠΕΡΘ|ΡΑΘ|ΕΝΘ|ΡΟΘ|ΣΘ|ΠΥΡ|ΑΙΝ|ΣΥΝΔ|ΣΥΝ|ΣΥΝΘ|ΧΩΡ|ΠΟΝ|ΒΡ|ΚΑΘ|ΕΥΘ|ΕΚΘ|ΝΕΤ|ΡΟΝ|ΑΡΚ|ΒΑΡ|ΒΟΛ|ΩΦΕΛ)$/';
$exept9 = '/^(ΑΒΑΡ|ΒΕΝ|ΕΝΑΡ|ΑΒΡ|ΑΔ|ΑΘ|ΑΝ|ΑΠΛ|ΒΑΡΟΝ|ΝΤΡ|ΣΚ|ΚΟΠ|ΜΠΟΡ|ΝΙΦ|ΠΑΓ|ΠΑΡΑΚΑΛ|ΣΕΡΠ|ΣΚΕΛ|ΣΥΡΦ|ΤΟΚ|Υ|Δ|ΕΜ|ΘΑΡΡ|Θ)$/';
if (preg_match($re3, $token) || preg_match($exept8, $token) || preg_match($exept9, $token)) {
$token .= "ΕΤ";
}
}
// Step 5d
$re = '/^(.+?)(ΟΝΤΑΣ|ΩΝΤΑΣ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept10 = '/^(ΑΡΧ)$/';
$exept11 = '/(ΚΡΕ)$/';
if (preg_match($exept10, $token)) {
$token .= "ΟΝΤ";
}
if (preg_match($exept11, $token)) {
$token .= "ΩΝΤ";
}
}
// Step 5e
$re = '/^(.+?)(ΟΜΑΣΤΕ|ΙΟΜΑΣΤΕ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept11 = '/^(ΟΝ)$/';
if (preg_match($exept11, $token)) {
$token .= "ΟΜΑΣΤ";
}
}
// Step 5f
$re = '/^(.+?)(ΕΣΤΕ)$/';
$re2 = '/^(.+?)(ΙΕΣΤΕ)$/';
if (preg_match($re2, $token)) {
preg_match($re2, $token, $match);
$token = $match[1];
$test1 = false;
$re2 = '/^(Π|ΑΠ|ΣΥΜΠ|ΑΣΥΜΠ|ΑΚΑΤΑΠ|ΑΜΕΤΑΜΦ)$/';
if (preg_match($re2, $token)) {
$token .= "ΙΕΣΤ";
}
}
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept12 = '/^(ΑΛ|ΑΡ|ΕΚΤΕΛ|Ζ|Μ|Ξ|ΠΑΡΑΚΑΛ|ΠΡΟ|ΝΙΣ)$/';
if (preg_match($exept12, $token)) {
$token .= "ΕΣΤ";
}
}
// Step 5g
$re = '/^(.+?)(ΗΚΑ|ΗΚΕΣ|ΗΚΕ)$/';
$re2 = '/^(.+?)(ΗΘΗΚΑ|ΗΘΗΚΕΣ|ΗΘΗΚΕ)$/';
if (preg_match($re2, $token)) {
preg_match($re2, $token, $match);
$token = $match[1];
$test1 = false;
}
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept13 = '/(ΣΚΩΛ|ΣΚΟΥΛ|ΝΑΡΘ|ΣΦ|ΟΘ|ΠΙΘ)$/';
$exept14 = '/^(ΔΙΑΘ|Θ|ΠΑΡΑΚΑΤΑΘ|ΠΡΟΣΘ|ΣΥΝΘ|)$/';
if (preg_match($exept13, $token) || preg_match($exept14, $token)) {
$token .= "ΗΚ";
}
}
// Step 5h
$re = '/^(.+?)(ΟΥΣΑ|ΟΥΣΕΣ|ΟΥΣΕ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept15 = '/^(ΦΑΡΜΑΚ|ΧΑΔ|ΑΓΚ|ΑΝΑΡΡ|ΒΡΟΜ|ΕΚΛΙΠ|ΛΑΜΠΙΔ|ΛΕΧ|Μ|ΠΑΤ|Ρ|Λ|ΜΕΔ|ΜΕΣΑΖ|ΥΠΟΤΕΙΝ|ΑΜ|ΑΙΘ|ΑΝΗΚ|ΔΕΣΠΟΖ|ΕΝΔΙΑΦΕΡ|ΔΕ|ΔΕΥΤΕΡΕΥ|ΚΑΘΑΡΕΥ|ΠΛΕ|ΤΣΑ)$/';
$exept16 = '/(ΠΟΔΑΡ|ΒΛΕΠ|ΠΑΝΤΑΧ|ΦΡΥΔ|ΜΑΝΤΙΛ|ΜΑΛΛ|ΚΥΜΑΤ|ΛΑΧ|ΛΗΓ|ΦΑΓ|ΟΜ|ΠΡΩΤ)$/';
if (preg_match($exept15, $token) || preg_match($exept16, $token)) {
$token .= "ΟΥΣ";
}
}
// Step 5i
$re = '/^(.+?)(ΑΓΑ|ΑΓΕΣ|ΑΓΕ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept17 = '/^(ΨΟΦ|ΝΑΥΛΟΧ)$/';
$exept20 = '/(ΚΟΛΛ)$/';
$exept18 = '/^(ΑΒΑΣΤ|ΠΟΛΥΦ|ΑΔΗΦ|ΠΑΜΦ|Ρ|ΑΣΠ|ΑΦ|ΑΜΑΛ|ΑΜΑΛΛΙ|ΑΝΥΣΤ|ΑΠΕΡ|ΑΣΠΑΡ|ΑΧΑΡ|ΔΕΡΒΕΝ|ΔΡΟΣΟΠ|ΞΕΦ|ΝΕΟΠ|ΝΟΜΟΤ|ΟΛΟΠ|ΟΜΟΤ|ΠΡΟΣΤ|ΠΡΟΣΩΠΟΠ|'
. 'ΣΥΜΠ|ΣΥΝΤ|Τ|ΥΠΟΤ|ΧΑΡ|ΑΕΙΠ|ΑΙΜΟΣΤ|ΑΝΥΠ|ΑΠΟΤ|ΑΡΤΙΠ|ΔΙΑΤ|ΕΝ|ΕΠΙΤ|ΚΡΟΚΑΛΟΠ|ΣΙΔΗΡΟΠ|Λ|ΝΑΥ|ΟΥΛΑΜ|ΟΥΡ|Π|ΤΡ|Μ)$/';
$exept19 = '/(ΟΦ|ΠΕΛ|ΧΟΡΤ|ΛΛ|ΣΦ|ΡΠ|ΦΡ|ΠΡ|ΛΟΧ|ΣΜΗΝ)$/';
if (
(preg_match($exept18, $token) || preg_match($exept19, $token))
&& !(preg_match($exept17, $token) || preg_match($exept20, $token))
) {
$token .= "ΑΓ";
}
}
// Step 5j
$re = '/^(.+?)(ΗΣΕ|ΗΣΟΥ|ΗΣΑ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept21 = '/^(Ν|ΧΕΡΣΟΝ|ΔΩΔΕΚΑΝ|ΕΡΗΜΟΝ|ΜΕΓΑΛΟΝ|ΕΠΤΑΝ)$/';
if (preg_match($exept21, $token)) {
$token .= "ΗΣ";
}
}
// Step 5k
$re = '/^(.+?)(ΗΣΤΕ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept22 = '/^(ΑΣΒ|ΣΒ|ΑΧΡ|ΧΡ|ΑΠΛ|ΑΕΙΜΝ|ΔΥΣΧΡ|ΕΥΧΡ|ΚΟΙΝΟΧΡ|ΠΑΛΙΜΨ)$/';
if (preg_match($exept22, $token)) {
$token .= "ΗΣΤ";
}
}
// Step 5l
$re = '/^(.+?)(ΟΥΝΕ|ΗΣΟΥΝΕ|ΗΘΟΥΝΕ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept23 = '/^(Ν|Ρ|ΣΠΙ|ΣΤΡΑΒΟΜΟΥΤΣ|ΚΑΚΟΜΟΥΤΣ|ΕΞΩΝ)$/';
if (preg_match($exept23, $token)) {
$token .= "ΟΥΝ";
}
}
// Step 5m
$re = '/^(.+?)(ΟΥΜΕ|ΗΣΟΥΜΕ|ΗΘΟΥΜΕ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
$test1 = false;
$exept24 = '/^(ΠΑΡΑΣΟΥΣ|Φ|Χ|ΩΡΙΟΠΛ|ΑΖ|ΑΛΛΟΣΟΥΣ|ΑΣΟΥΣ)$/';
if (preg_match($exept24, $token)) {
$token .= "ΟΥΜ";
}
}
// Step 6
$re = '/^(.+?)(ΜΑΤΑ|ΜΑΤΩΝ|ΜΑΤΟΣ)$/';
$re2 = '/^(.+?)(Α|ΑΓΑΤΕ|ΑΓΑΝ|ΑΕΙ|ΑΜΑΙ|ΑΝ|ΑΣ|ΑΣΑΙ|ΑΤΑΙ|ΑΩ|Ε|ΕΙ|ΕΙΣ|ΕΙΤΕ|ΕΣΑΙ|ΕΣ|ΕΤΑΙ|Ι|ΙΕΜΑΙ|ΙΕΜΑΣΤΕ|ΙΕΤΑΙ|ΙΕΣΑΙ|ΙΕΣΑΣΤΕ|ΙΟΜΑΣΤΑΝ|ΙΟΜΟΥΝ|'
. 'ΙΟΜΟΥΝΑ|ΙΟΝΤΑΝ|ΙΟΝΤΟΥΣΑΝ|ΙΟΣΑΣΤΑΝ|ΙΟΣΑΣΤΕ|ΙΟΣΟΥΝ|ΙΟΣΟΥΝΑ|ΙΟΤΑΝ|ΙΟΥΜΑ|ΙΟΥΜΑΣΤΕ|ΙΟΥΝΤΑΙ|ΙΟΥΝΤΑΝ|Η|ΗΔΕΣ|ΗΔΩΝ|ΗΘΕΙ|ΗΘΕΙΣ|ΗΘΕΙΤΕ|'
. 'ΗΘΗΚΑΤΕ|ΗΘΗΚΑΝ|ΗΘΟΥΝ|ΗΘΩ|ΗΚΑΤΕ|ΗΚΑΝ|ΗΣ|ΗΣΑΝ|ΗΣΑΤΕ|ΗΣΕΙ|ΗΣΕΣ|ΗΣΟΥΝ|ΗΣΩ|Ο|ΟΙ|ΟΜΑΙ|ΟΜΑΣΤΑΝ|ΟΜΟΥΝ|ΟΜΟΥΝΑ|ΟΝΤΑΙ|ΟΝΤΑΝ|ΟΝΤΟΥΣΑΝ|ΟΣ|'
. 'ΟΣΑΣΤΑΝ|ΟΣΑΣΤΕ|ΟΣΟΥΝ|ΟΣΟΥΝΑ|ΟΤΑΝ|ΟΥ|ΟΥΜΑΙ|ΟΥΜΑΣΤΕ|ΟΥΝ|ΟΥΝΤΑΙ|ΟΥΝΤΑΝ|ΟΥΣ|ΟΥΣΑΝ|ΟΥΣΑΤΕ|Υ|ΥΣ|Ω|ΩΝ)$/';
if (preg_match($re, $token, $match)) {
$token = $match[1] . "ΜΑ";
}
if (preg_match($re2, $token) && $test1) {
preg_match($re2, $token, $match);
$token = $match[1];
}
// Step 7 (ΠΑΡΑΘΕΤΙΚΑ)
$re = '/^(.+?)(ΕΣΤΕΡ|ΕΣΤΑΤ|ΟΤΕΡ|ΟΤΑΤ|ΥΤΕΡ|ΥΤΑΤ|ΩΤΕΡ|ΩΤΑΤ)$/';
if (preg_match($re, $token)) {
preg_match($re, $token, $match);
$token = $match[1];
}
return $this->toLowerCase($token, $wCase);
}
/**
* Converts the token to uppercase, suppressing accents and diaeresis. The array $wCase contains a special map of
* the uppercase rule used to convert each character at each position.
*
* @param string $token Token to process
* @param array &$wCase Map of uppercase rules
*
* @return string
*
* @since 4.0.0
*/
protected function toUpperCase($token, &$wCase)
{
$wCase = array_fill(0, mb_strlen($token, 'UTF-8'), 0);
$caseConvert = [
"α" => 'Α',
"β" => 'Β',
"γ" => 'Γ',
"δ" => 'Δ',
"ε" => 'Ε',
"ζ" => 'Ζ',
"η" => 'Η',
"θ" => 'Θ',
"ι" => 'Ι',
"κ" => 'Κ',
"λ" => 'Λ',
"μ" => 'Μ',
"ν" => 'Ν',
"ξ" => 'Ξ',
"ο" => 'Ο',
"π" => 'Π',
"ρ" => 'Ρ',
"σ" => 'Σ',
"τ" => 'Τ',
"υ" => 'Υ',
"φ" => 'Φ',
"χ" => 'Χ',
"ψ" => 'Ψ',
"ω" => 'Ω',
"ά" => 'Α',
"έ" => 'Ε',
"ή" => 'Η',
"ί" => 'Ι',
"ό" => 'Ο',
"ύ" => 'Υ',
"ώ" => 'Ω',
"ς" => 'Σ',
"ϊ" => 'Ι',
"ϋ" => 'Ι',
"ΐ" => 'Ι',
"ΰ" => 'Υ',
];
$newToken = '';
for ($i = 0; $i < mb_strlen($token); $i++) {
$char = mb_substr($token, $i, 1);
$isLower = \array_key_exists($char, $caseConvert);
if (!$isLower) {
$newToken .= $char;
continue;
}
$upperCase = $caseConvert[$char];
$newToken .= $upperCase;
$wCase[$i] = 1;
if (\in_array($char, ['ά', 'έ', 'ή', 'ί', 'ό', 'ύ', 'ώ', 'ς'])) {
$wCase[$i] = 2;
}
if (\in_array($char, ['ϊ', 'ϋ'])) {
$wCase[$i] = 3;
}
if (\in_array($char, ['ΐ', 'ΰ'])) {
$wCase[$i] = 4;
}
}
return $newToken;
}
/**
* Converts the suppressed uppercase token back to lowercase, using the $wCase map to add back the accents,
* diaeresis and handle the special case of final sigma (different lowercase glyph than the regular sigma, only
* used at the end of words).
*
* @param string $token Token to process
* @param array $wCase Map of lowercase rules
*
* @return string
*
* @since 4.0.0
*/
protected function toLowerCase($token, $wCase)
{
$newToken = '';
for ($i = 0; $i < mb_strlen($token); $i++) {
$char = mb_substr($token, $i, 1);
// Is $wCase not set at this position? We assume no case conversion ever took place.
if (!isset($wCase[$i])) {
$newToken .= $char;
continue;
}
// The character was not case-converted
if ($wCase[$i] == 0) {
$newToken .= $char;
continue;
}
// Case 1: Unaccented letter
if ($wCase[$i] == 1) {
$newToken .= mb_strtolower($char);
continue;
}
// Case 2: Vowel with accent (tonos); or the special case of final sigma
if ($wCase[$i] == 2) {
$charMap = [
'Α' => 'ά',
'Ε' => 'έ',
'Η' => 'ή',
'Ι' => 'ί',
'Ο' => 'ό',
'Υ' => 'ύ',
'Ω' => 'ώ',
'Σ' => 'ς',
];
$newToken .= $charMap[$char];
continue;
}
// Case 3: vowels with diaeresis (dialytika)
if ($wCase[$i] == 3) {
$charMap = [
'Ι' => 'ϊ',
'Υ' => 'ϋ',
];
$newToken .= $charMap[$char];
continue;
}
// Case 4: vowels with both diaeresis (dialytika) and accent (tonos)
if ($wCase[$i] == 4) {
$charMap = [
'Ι' => 'ΐ',
'Υ' => 'ΰ',
];
$newToken .= $charMap[$char];
continue;
}
// This should never happen!
$newToken .= $char;
}
return $newToken;
}
}

View File

@ -0,0 +1,71 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2018 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer\Language;
use Joomla\Component\Finder\Administrator\Indexer\Language;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Chinese (simplified) language support class for the Finder indexer package.
*
* @since 4.0.0
*/
class Zh extends Language
{
/**
* Language locale of the class
*
* @var string
* @since 4.0.0
*/
public $language = 'zh';
/**
* Spacer between terms
*
* @var string
* @since 4.0.0
*/
public $spacer = '';
/**
* Method to construct the language object.
*
* @since 4.0.0
*/
public function __construct($locale = null)
{
// Override parent constructor since we don't need to load an external stemmer
}
/**
* Method to tokenise a text string.
*
* @param string $input The input to tokenise.
*
* @return array An array of term strings.
*
* @since 4.0.0
*/
public function tokenise($input)
{
// We first add whitespace around each Chinese character, so that our later code can easily split on this.
$input = preg_replace('#\p{Han}#mui', ' $0 ', $input);
// Now we split up the input into individual terms
$terms = parent::tokenise($input);
return $terms;
}
}

View File

@ -0,0 +1,125 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\CMS\Filter\InputFilter;
use Joomla\CMS\Language\Text;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Parser base class for the Finder indexer package.
*
* @since 2.5
*/
abstract class Parser
{
/**
* Parser support instances container.
*
* @var Parser[]
* @since 4.0.0
*/
protected static $instances = [];
/**
* Method to get a parser, creating it if necessary.
*
* @param string $format The type of parser to load.
*
* @return Parser A Parser instance.
*
* @since 2.5
* @throws \Exception on invalid parser.
*/
public static function getInstance($format)
{
$format = InputFilter::getInstance()->clean($format, 'cmd');
// Only create one parser for each format.
if (isset(self::$instances[$format])) {
return self::$instances[$format];
}
// Setup the adapter for the parser.
$class = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Parser\\' . ucfirst($format);
// Check if a parser exists for the format.
if (class_exists($class)) {
self::$instances[$format] = new $class();
return self::$instances[$format];
}
// Throw invalid format exception.
throw new \Exception(Text::sprintf('COM_FINDER_INDEXER_INVALID_PARSER', $format));
}
/**
* Method to parse input and extract the plain text. Because this method is
* called from both inside and outside the indexer, it needs to be able to
* batch out its parsing functionality to deal with the inefficiencies of
* regular expressions. We will parse recursively in 2KB chunks.
*
* @param string $input The input to parse.
*
* @return string The plain text input.
*
* @since 2.5
*/
public function parse($input)
{
// If the input is less than 2KB we can parse it in one go.
if (\strlen($input) <= 2048) {
return $this->process($input);
}
// Input is longer than 2Kb so parse it in chunks of 2Kb or less.
$start = 0;
$end = \strlen($input);
$chunk = 2048;
$return = null;
while ($start < $end) {
// Setup the string.
$string = substr($input, $start, $chunk);
// Find the last space character if we aren't at the end.
$ls = (($start + $chunk) < $end ? strrpos($string, ' ') : false);
// Truncate to the last space character (but include it in the string).
if ($ls !== false) {
$string = substr($string, 0, $ls + 1);
}
// Adjust the start position for the next iteration.
$start += $ls !== false ? $ls + 1 : $chunk;
// Parse the chunk.
$return .= $this->process($string);
}
return $return;
}
/**
* Method to process input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
abstract protected function process($input);
}

View File

@ -0,0 +1,158 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer\Parser;
use Joomla\Component\Finder\Administrator\Indexer\Parser;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* HTML Parser class for the Finder indexer package.
*
* @since 2.5
*/
class Html extends Parser
{
/**
* Method to parse input and extract the plain text. Because this method is
* called from both inside and outside the indexer, it needs to be able to
* batch out its parsing functionality to deal with the inefficiencies of
* regular expressions. We will parse recursively in 2KB chunks.
*
* @param string $input The input to parse.
*
* @return string The plain text input.
*
* @since 2.5
*/
public function parse($input)
{
// Strip invalid UTF-8 characters.
$oldSetting = \ini_get('mbstring.substitute_character');
ini_set('mbstring.substitute_character', 'none');
$input = mb_convert_encoding($input, 'UTF-8', 'UTF-8');
ini_set('mbstring.substitute_character', $oldSetting);
// Remove anything between <head> and </head> tags. Do this first
// because there might be <script> or <style> tags nested inside.
$input = $this->removeBlocks($input, '<head>', '</head>');
// Convert <style> and <noscript> tags to <script> tags
// so we can remove them efficiently.
$search = [
'<style', '</style',
'<noscript', '</noscript',
];
$replace = [
'<script', '</script',
'<script', '</script',
];
$input = str_replace($search, $replace, $input);
// Strip all script blocks.
$input = $this->removeBlocks($input, '<script', '</script>');
// Decode HTML entities.
$input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
// Convert entities equivalent to spaces to actual spaces.
$input = str_replace(['&nbsp;', '&#160;'], ' ', $input);
// Add a space before both the OPEN and CLOSE tags of BLOCK and LINE BREAKING elements,
// e.g. 'all<h1><em>m</em>obile List</h1>' will become 'all mobile List'
$input = preg_replace('/(<|<\/)(' .
'address|article|aside|blockquote|br|canvas|dd|div|dl|dt|' .
'fieldset|figcaption|figure|footer|form|h1|h2|h3|h4|h5|h6|header|hgroup|hr|li|' .
'main|nav|noscript|ol|output|p|pre|section|table|tfoot|ul|video' .
')\b/i', ' $1$2', $input);
// Strip HTML tags.
$input = strip_tags($input);
return parent::parse($input);
}
/**
* Method to process HTML input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
// Replace any amount of white space with a single space.
return preg_replace('#\s+#u', ' ', $input);
}
/**
* Method to remove blocks of text between a start and an end tag.
* Each block removed is effectively replaced by a single space.
*
* Note: The start tag and the end tag must be different.
* Note: Blocks must not be nested.
* Note: This method will function correctly with multi-byte strings.
*
* @param string $input String to be processed.
* @param string $startTag String representing the start tag.
* @param string $endTag String representing the end tag.
*
* @return string with blocks removed.
*
* @since 3.4
*/
private function removeBlocks($input, $startTag, $endTag)
{
$return = '';
$offset = 0;
$startTagLength = \strlen($startTag);
$endTagLength = \strlen($endTag);
// Find the first start tag.
$start = stripos($input, $startTag);
// If no start tags were found, return the string unchanged.
if ($start === false) {
return $input;
}
// Look for all blocks defined by the start and end tags.
while ($start !== false) {
// Accumulate the substring up to the start tag.
$return .= substr($input, $offset, $start - $offset) . ' ';
// Look for an end tag corresponding to the start tag.
$end = stripos($input, $endTag, $start + $startTagLength);
// If no corresponding end tag, leave the string alone.
if ($end === false) {
// Fix the offset so part of the string is not duplicated.
$offset = $start;
break;
}
// Advance the start position.
$offset = $end + $endTagLength;
// Look for the next start tag and loop.
$start = stripos($input, $startTag, $offset);
}
// Add in the final substring after the last end tag.
$return .= substr($input, $offset);
return $return;
}
}

View File

@ -0,0 +1,47 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer\Parser;
use Joomla\Component\Finder\Administrator\Indexer\Parser;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* RTF Parser class for the Finder indexer package.
*
* @since 2.5
*/
class Rtf extends Parser
{
/**
* Method to process RTF input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
// Remove embedded pictures.
$input = preg_replace('#{\\\pict[^}]*}#mi', '', $input);
// Remove control characters.
$input = str_replace(['{', '}', "\\\n"], [' ', ' ', "\n"], $input);
$input = preg_replace('#\\\([^;]+?);#m', ' ', $input);
$input = preg_replace('#\\\[\'a-zA-Z0-9]+#mi', ' ', $input);
return $input;
}
}

View File

@ -0,0 +1,39 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer\Parser;
use Joomla\Component\Finder\Administrator\Indexer\Parser;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Text Parser class for the Finder indexer package.
*
* @since 2.5
*/
class Txt extends Parser
{
/**
* Method to process Text input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
return $input;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,582 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\CMS\Component\ComponentHelper;
use Joomla\CMS\Tree\ImmutableNodeInterface;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Result class for the Finder indexer package.
*
* This class uses magic __get() and __set() methods to prevent properties
* being added that might confuse the system. All properties not explicitly
* declared will be pushed into the elements array and can be accessed
* explicitly using the getElement() method.
*
* @since 2.5
*/
class Result implements \Serializable
{
/**
* An array of extra result properties.
*
* @var array
* @since 2.5
*/
protected $elements = [];
/**
* This array tells the indexer which properties should be indexed and what
* weights to use for those properties.
*
* @var array
* @since 2.5
*/
protected $instructions = [
Indexer::TITLE_CONTEXT => ['title', 'subtitle', 'id'],
Indexer::TEXT_CONTEXT => ['summary', 'body'],
Indexer::META_CONTEXT => ['meta', 'list_price', 'sale_price'],
Indexer::PATH_CONTEXT => ['path', 'alias'],
Indexer::MISC_CONTEXT => ['comments'],
];
/**
* The indexer will use this data to create taxonomy mapping entries for
* the item so that it can be filtered by type, label, category,
* or whatever.
*
* @var array
* @since 2.5
*/
protected $taxonomy = [];
/**
* The content URL.
*
* @var string
* @since 2.5
*/
public $url;
/**
* The content route.
*
* @var string
* @since 2.5
*/
public $route;
/**
* The content title.
*
* @var string
* @since 2.5
*/
public $title;
/**
* The content description.
*
* @var string
* @since 2.5
*/
public $description;
/**
* The published state of the result.
*
* @var integer
* @since 2.5
*/
public $published;
/**
* The content published state.
*
* @var integer
* @since 2.5
*/
public $state;
/**
* The content access level.
*
* @var integer
* @since 2.5
*/
public $access;
/**
* The content language.
*
* @var string
* @since 2.5
*/
public $language = '*';
/**
* The publishing start date.
*
* @var string
* @since 2.5
*/
public $publish_start_date;
/**
* The publishing end date.
*
* @var string
* @since 2.5
*/
public $publish_end_date;
/**
* The generic start date.
*
* @var string
* @since 2.5
*/
public $start_date;
/**
* The generic end date.
*
* @var string
* @since 2.5
*/
public $end_date;
/**
* The item list price.
*
* @var mixed
* @since 2.5
*/
public $list_price;
/**
* The item sale price.
*
* @var mixed
* @since 2.5
*/
public $sale_price;
/**
* The content type id. This is set by the adapter.
*
* @var integer
* @since 2.5
*/
public $type_id;
/**
* The default language for content.
*
* @var string
* @since 3.0.2
*/
public $defaultLanguage;
/**
* Constructor
*
* @since 3.0.3
*/
public function __construct()
{
$this->defaultLanguage = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
}
/**
* The magic set method is used to push additional values into the elements
* array in order to preserve the cleanliness of the object.
*
* @param string $name The name of the element.
* @param mixed $value The value of the element.
*
* @return void
*
* @since 2.5
*/
public function __set($name, $value)
{
$this->setElement($name, $value);
}
/**
* The magic get method is used to retrieve additional element values from the elements array.
*
* @param string $name The name of the element.
*
* @return mixed The value of the element if set, null otherwise.
*
* @since 2.5
*/
public function __get($name)
{
return $this->getElement($name);
}
/**
* The magic isset method is used to check the state of additional element values in the elements array.
*
* @param string $name The name of the element.
*
* @return boolean True if set, false otherwise.
*
* @since 2.5
*/
public function __isset($name)
{
return isset($this->elements[$name]);
}
/**
* The magic unset method is used to unset additional element values in the elements array.
*
* @param string $name The name of the element.
*
* @return void
*
* @since 2.5
*/
public function __unset($name)
{
unset($this->elements[$name]);
}
/**
* Method to retrieve additional element values from the elements array.
*
* @param string $name The name of the element.
*
* @return mixed The value of the element if set, null otherwise.
*
* @since 2.5
*/
public function getElement($name)
{
// Get the element value if set.
if (\array_key_exists($name, $this->elements)) {
return $this->elements[$name];
}
return null;
}
/**
* Method to retrieve all elements.
*
* @return array The elements
*
* @since 3.8.3
*/
public function getElements()
{
return $this->elements;
}
/**
* Method to set additional element values in the elements array.
*
* @param string $name The name of the element.
* @param mixed $value The value of the element.
*
* @return void
*
* @since 2.5
*/
public function setElement($name, $value)
{
$this->elements[$name] = $value;
}
/**
* Method to get all processing instructions.
*
* @return array An array of processing instructions.
*
* @since 2.5
*/
public function getInstructions()
{
return $this->instructions;
}
/**
* Method to add a processing instruction for an item property.
*
* @param string $group The group to associate the property with.
* @param string $property The property to process.
*
* @return void
*
* @since 2.5
*/
public function addInstruction($group, $property)
{
// Check if the group exists. We can't add instructions for unknown groups.
// Check if the property exists in the group.
if (\array_key_exists($group, $this->instructions) && !\in_array($property, $this->instructions[$group], true)) {
// Add the property to the group.
$this->instructions[$group][] = $property;
}
}
/**
* Method to remove a processing instruction for an item property.
*
* @param string $group The group to associate the property with.
* @param string $property The property to process.
*
* @return void
*
* @since 2.5
*/
public function removeInstruction($group, $property)
{
// Check if the group exists. We can't remove instructions for unknown groups.
if (\array_key_exists($group, $this->instructions)) {
// Search for the property in the group.
$key = array_search($property, $this->instructions[$group]);
// If the property was found, remove it.
if ($key !== false) {
unset($this->instructions[$group][$key]);
}
}
}
/**
* Method to get the taxonomy maps for an item.
*
* @param string $branch The taxonomy branch to get. [optional]
*
* @return array An array of taxonomy maps.
*
* @since 2.5
*/
public function getTaxonomy($branch = null)
{
// Get the taxonomy branch if available.
if ($branch !== null && isset($this->taxonomy[$branch])) {
return $this->taxonomy[$branch];
}
return $this->taxonomy;
}
/**
* Method to add a taxonomy map for an item.
*
* @param string $branch The title of the taxonomy branch to add the node to.
* @param string $title The title of the taxonomy node.
* @param integer $state The published state of the taxonomy node. [optional]
* @param integer $access The access level of the taxonomy node. [optional]
* @param string $language The language of the taxonomy. [optional]
*
* @return void
*
* @since 2.5
*/
public function addTaxonomy($branch, $title, $state = 1, $access = 1, $language = '*')
{
// We can't add taxonomies with empty titles
if (!trim($title)) {
return;
}
// Filter the input.
$branch = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,_]+#mui', ' ', $branch);
// Create the taxonomy node.
$node = new \stdClass();
$node->title = $title;
$node->state = (int) $state;
$node->access = (int) $access;
$node->language = $language;
$node->nested = false;
// Add the node to the taxonomy branch.
$this->taxonomy[$branch][] = $node;
}
/**
* Method to add a nested taxonomy map for an item.
*
* @param string $branch The title of the taxonomy branch to add the node to.
* @param ImmutableNodeInterface $contentNode The node object.
* @param integer $state The published state of the taxonomy node. [optional]
* @param integer $access The access level of the taxonomy node. [optional]
* @param string $language The language of the taxonomy. [optional]
*
* @return void
*
* @since 4.0.0
*/
public function addNestedTaxonomy($branch, ImmutableNodeInterface $contentNode, $state = 1, $access = 1, $language = '*')
{
// We can't add taxonomies with empty titles
if (!trim($contentNode->title)) {
return;
}
// Filter the input.
$branch = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,_]+#mui', ' ', $branch);
// Create the taxonomy node.
$node = new \stdClass();
$node->title = $contentNode->title;
$node->state = (int) $state;
$node->access = (int) $access;
$node->language = $language;
$node->nested = true;
$node->node = $contentNode;
// Add the node to the taxonomy branch.
$this->taxonomy[$branch][] = $node;
}
/**
* Method to set the item language
*
* @return void
*
* @since 3.0
*/
public function setLanguage()
{
if ($this->language == '') {
$this->language = $this->defaultLanguage;
}
}
/**
* Helper function to serialise the data of a Result object
*
* @return string The serialised data
*
* @since 4.0.0
*/
public function serialize()
{
return serialize($this->__serialize());
}
/**
* Helper function to unserialise the data for this object
*
* @param string $serialized Serialised data to unserialise
*
* @return void
*
* @since 4.0.0
*/
public function unserialize($serialized): void
{
$this->__unserialize(unserialize($serialized));
}
/**
* Magic method used for serializing.
*
* @since 4.1.3
*/
public function __serialize(): array
{
$taxonomy = [];
foreach ($this->taxonomy as $branch => $nodes) {
$taxonomy[$branch] = [];
foreach ($nodes as $node) {
if ($node->nested) {
$n = clone $node;
unset($n->node);
$taxonomy[$branch][] = $n;
} else {
$taxonomy[$branch][] = $node;
}
}
}
// This order must match EXACTLY the order of the $properties in the self::__unserialize method
return [
$this->access,
$this->defaultLanguage,
$this->description,
$this->elements,
$this->end_date,
$this->instructions,
$this->language,
$this->list_price,
$this->publish_end_date,
$this->publish_start_date,
$this->published,
$this->route,
$this->sale_price,
$this->start_date,
$this->state,
$taxonomy,
$this->title,
$this->type_id,
$this->url,
];
}
/**
* Magic method used for unserializing.
*
* @since 4.1.3
*/
public function __unserialize(array $serialized): void
{
// This order must match EXACTLY the order of the array in the self::__serialize method
$properties = [
'access',
'defaultLanguage',
'description',
'elements',
'end_date',
'instructions',
'language',
'list_price',
'publish_end_date',
'publish_start_date',
'published',
'route',
'sale_price',
'start_date',
'state',
'taxonomy',
'title',
'type_id',
'url',
];
foreach ($properties as $k => $v) {
$this->$v = $serialized[$k];
}
foreach ($this->taxonomy as $nodes) {
foreach ($nodes as $node) {
$curTaxonomy = Taxonomy::getTaxonomy($node->id);
$node->state = $curTaxonomy->state;
$node->access = $curTaxonomy->access;
}
}
}
}

View File

@ -0,0 +1,514 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\CMS\Factory;
use Joomla\CMS\Tree\NodeInterface;
use Joomla\Component\Finder\Administrator\Table\MapTable;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Taxonomy base class for the Finder indexer package.
*
* @since 2.5
*/
class Taxonomy
{
/**
* An internal cache of taxonomy data.
*
* @var object[]
* @since 4.0.0
*/
public static $taxonomies = [];
/**
* An internal cache of branch data.
*
* @var object[]
* @since 4.0.0
*/
public static $branches = [];
/**
* An internal cache of taxonomy node data for inserting it.
*
* @var object[]
* @since 2.5
*/
public static $nodes = [];
/**
* Method to add a branch to the taxonomy tree.
*
* @param string $title The title of the branch.
* @param integer $state The published state of the branch. [optional]
* @param integer $access The access state of the branch. [optional]
*
* @return integer The id of the branch.
*
* @since 2.5
* @throws \RuntimeException on database error.
*/
public static function addBranch($title, $state = 1, $access = 1)
{
$node = new \stdClass();
$node->title = $title;
$node->access = $access;
$node->parent_id = 1;
$node->language = '*';
return self::storeNode($node, 1);
}
/**
* Method to add a node to the taxonomy tree.
*
* @param string $branch The title of the branch to store the node in.
* @param string $title The title of the node.
* @param integer $state The published state of the node. [optional]
* @param integer $access The access state of the node. [optional]
* @param string $language The language of the node. [optional]
*
* @return integer The id of the node.
*
* @since 2.5
* @throws \RuntimeException on database error.
*/
public static function addNode($branch, $title, $state = 1, $access = 1, $language = '*')
{
if ($state != 1) {
return 0;
}
// Get the branch id, insert it if it does not exist.
$branchId = static::addBranch($branch);
$node = new \stdClass();
$node->title = $title;
$node->access = $access;
$node->parent_id = $branchId;
$node->language = $language;
return self::storeNode($node, $branchId);
}
/**
* Method to add a nested node to the taxonomy tree.
*
* @param string $branch The title of the branch to store the node in.
* @param NodeInterface $node The source-node of the taxonomy node.
* @param integer $state The published state of the node. [optional]
* @param integer $access The access state of the node. [optional]
* @param string $language The language of the node. [optional]
* @param integer $branchId ID of a branch if known. [optional]
*
* @return integer The id of the node.
*
* @since 4.0.0
*/
public static function addNestedNode($branch, NodeInterface $node, $state = 1, $access = 1, $language = '*', $branchId = null)
{
if ($state != 1) {
return 0;
}
if (!$branchId) {
// Get the branch id, insert it if it does not exist.
$branchId = static::addBranch($branch);
}
$parent = $node->getParent();
$pstate = $node->state ?? ($node->published ?? $state);
$paccess = $node->access ?? $access;
$planguage = $node->language ?? $language;
if ($parent && $parent->title != 'ROOT') {
$parentId = self::addNestedNode($branch, $parent, $pstate, $paccess, $planguage, $branchId);
} else {
$parentId = $branchId;
}
if (!$parentId) {
return 0;
}
$temp = new \stdClass();
$temp->title = $node->title;
$temp->access = $access;
$temp->parent_id = $parentId;
$temp->language = $language;
return self::storeNode($temp, $parentId);
}
/**
* A helper method to store a node in the taxonomy
*
* @param object $node The node data to include
* @param integer $parentId The parent id of the node to add.
*
* @return integer The id of the inserted node.
*
* @since 4.0.0
* @throws \RuntimeException
*/
protected static function storeNode($node, $parentId)
{
// Check to see if the node is in the cache.
if (isset(static::$nodes[$parentId . ':' . $node->title])) {
return static::$nodes[$parentId . ':' . $node->title]->id;
}
// Check to see if the node is in the table.
$db = Factory::getDbo();
$query = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__finder_taxonomy'))
->where($db->quoteName('parent_id') . ' = ' . $db->quote($parentId))
->where($db->quoteName('title') . ' = ' . $db->quote($node->title))
->where($db->quoteName('language') . ' = ' . $db->quote($node->language));
$db->setQuery($query);
// Get the result.
$result = $db->loadObject();
// Check if the database matches the input data.
if ((bool) $result && $result->access == $node->access) {
// The data matches, add the item to the cache.
static::$nodes[$parentId . ':' . $node->title] = $result;
return static::$nodes[$parentId . ':' . $node->title]->id;
}
/*
* The database did not match the input. This could be because the
* state has changed or because the node does not exist. Let's figure
* out which case is true and deal with it.
* @todo: use factory?
*/
$nodeTable = new MapTable($db);
if (empty($result)) {
// Prepare the node object.
$nodeTable->title = $node->title;
$nodeTable->access = (int) $node->access;
$nodeTable->language = $node->language;
$nodeTable->setLocation((int) $parentId, 'last-child');
} else {
// Prepare the node object.
$nodeTable->id = (int) $result->id;
$nodeTable->title = $result->title;
$nodeTable->access = (int) $result->access;
$nodeTable->language = $node->language;
$nodeTable->setLocation($result->parent_id, 'last-child');
}
// Check the data.
if (!$nodeTable->check()) {
$error = $nodeTable->getError();
if ($error instanceof \Exception) {
// \Joomla\CMS\Table\NestedTable sets errors of exceptions, so in this case we can pass on more
// information
throw new \RuntimeException(
$error->getMessage(),
$error->getCode(),
$error
);
}
// Standard string returned. Probably from the \Joomla\CMS\Table\Table class
throw new \RuntimeException($error, 500);
}
// Store the data.
if (!$nodeTable->store()) {
$error = $nodeTable->getError();
if ($error instanceof \Exception) {
// \Joomla\CMS\Table\NestedTable sets errors of exceptions, so in this case we can pass on more
// information
throw new \RuntimeException(
$error->getMessage(),
$error->getCode(),
$error
);
}
// Standard string returned. Probably from the \Joomla\CMS\Table\Table class
throw new \RuntimeException($error, 500);
}
$nodeTable->rebuildPath($nodeTable->id);
// Add the node to the cache.
static::$nodes[$parentId . ':' . $nodeTable->title] = (object) $nodeTable->getProperties();
return static::$nodes[$parentId . ':' . $nodeTable->title]->id;
}
/**
* Method to add a map entry between a link and a taxonomy node.
*
* @param integer $linkId The link to map to.
* @param integer $nodeId The node to map to.
*
* @return boolean True on success.
*
* @since 2.5
* @throws \RuntimeException on database error.
*/
public static function addMap($linkId, $nodeId)
{
// Insert the map.
$db = Factory::getDbo();
$query = $db->getQuery(true)
->select($db->quoteName('link_id'))
->from($db->quoteName('#__finder_taxonomy_map'))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId)
->where($db->quoteName('node_id') . ' = ' . (int) $nodeId);
$db->setQuery($query);
$db->execute();
$id = (int) $db->loadResult();
if (!$id) {
$map = new \stdClass();
$map->link_id = (int) $linkId;
$map->node_id = (int) $nodeId;
$db->insertObject('#__finder_taxonomy_map', $map);
}
return true;
}
/**
* Method to get the title of all taxonomy branches.
*
* @return array An array of branch titles.
*
* @since 2.5
* @throws \RuntimeException on database error.
*/
public static function getBranchTitles()
{
$db = Factory::getDbo();
// Set user variables
$groups = implode(',', Factory::getUser()->getAuthorisedViewLevels());
// Create a query to get the taxonomy branch titles.
$query = $db->getQuery(true)
->select($db->quoteName('title'))
->from($db->quoteName('#__finder_taxonomy'))
->where($db->quoteName('parent_id') . ' = 1')
->where($db->quoteName('state') . ' = 1')
->where($db->quoteName('access') . ' IN (' . $groups . ')');
// Get the branch titles.
$db->setQuery($query);
return $db->loadColumn();
}
/**
* Method to find a taxonomy node in a branch.
*
* @param string $branch The branch to search.
* @param string $title The title of the node.
*
* @return mixed Integer id on success, null on no match.
*
* @since 2.5
* @throws \RuntimeException on database error.
*/
public static function getNodeByTitle($branch, $title)
{
$db = Factory::getDbo();
// Set user variables
$groups = implode(',', Factory::getUser()->getAuthorisedViewLevels());
// Create a query to get the node.
$query = $db->getQuery(true)
->select('t1.*')
->from($db->quoteName('#__finder_taxonomy') . ' AS t1')
->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id')
->where('t1.access IN (' . $groups . ')')
->where('t1.state = 1')
->where('t1.title LIKE ' . $db->quote($db->escape($title) . '%'))
->where('t2.access IN (' . $groups . ')')
->where('t2.state = 1')
->where('t2.title = ' . $db->quote($branch));
// Get the node.
$query->setLimit(1);
$db->setQuery($query);
return $db->loadObject();
}
/**
* Method to remove map entries for a link.
*
* @param integer $linkId The link to remove.
*
* @return boolean True on success.
*
* @since 2.5
* @throws \RuntimeException on database error.
*/
public static function removeMaps($linkId)
{
// Delete the maps.
$db = Factory::getDbo();
$query = $db->getQuery(true)
->delete($db->quoteName('#__finder_taxonomy_map'))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
return true;
}
/**
* Method to remove orphaned taxonomy maps
*
* @return integer The number of deleted rows.
*
* @since 4.2.0
* @throws \RuntimeException on database error.
*/
public static function removeOrphanMaps()
{
// Delete all orphaned maps
$db = Factory::getDbo();
$query2 = $db->getQuery(true)
->select($db->quoteName('link_id'))
->from($db->quoteName('#__finder_links'));
$query = $db->getQuery(true)
->delete($db->quoteName('#__finder_taxonomy_map'))
->where($db->quoteName('link_id') . ' NOT IN (' . $query2 . ')');
$db->setQuery($query);
$db->execute();
$count = $db->getAffectedRows();
return $count;
}
/**
* Method to remove orphaned taxonomy nodes and branches.
*
* @return integer The number of deleted rows.
*
* @since 2.5
* @throws \RuntimeException on database error.
*/
public static function removeOrphanNodes()
{
// Delete all orphaned nodes.
$affectedRows = 0;
$db = Factory::getDbo();
$nodeTable = new MapTable($db);
$query = $db->getQuery(true);
$query->select($db->quoteName('t.id'))
->from($db->quoteName('#__finder_taxonomy', 't'))
->join('LEFT', $db->quoteName('#__finder_taxonomy_map', 'm') . ' ON ' . $db->quoteName('m.node_id') . '=' . $db->quoteName('t.id'))
->where($db->quoteName('t.parent_id') . ' > 1 ')
->where('t.lft + 1 = t.rgt')
->where($db->quoteName('m.link_id') . ' IS NULL');
do {
$db->setQuery($query);
$nodes = $db->loadColumn();
foreach ($nodes as $node) {
$nodeTable->delete($node);
$affectedRows++;
}
} while ($nodes);
return $affectedRows;
}
/**
* Get a taxonomy based on its id or all taxonomies
*
* @param integer $id Id of the taxonomy
*
* @return object|object[] A taxonomy object or an array of all taxonomies
*
* @since 4.0.0
*/
public static function getTaxonomy($id = 0)
{
if (!\count(self::$taxonomies)) {
$db = Factory::getDbo();
$query = $db->getQuery(true);
$query->select(['id','parent_id','lft','rgt','level','path','title','alias','state','access','language'])
->from($db->quoteName('#__finder_taxonomy'))
->order($db->quoteName('lft'));
$db->setQuery($query);
self::$taxonomies = $db->loadObjectList('id');
}
if ($id == 0) {
return self::$taxonomies;
}
if (isset(self::$taxonomies[$id])) {
return self::$taxonomies[$id];
}
return false;
}
/**
* Get a taxonomy branch object based on its title or all branches
*
* @param string $title Title of the branch
*
* @return object|object[] The object with the branch data or an array of all branches
*
* @since 4.0.0
*/
public static function getBranch($title = '')
{
if (!\count(self::$branches)) {
$taxonomies = self::getTaxonomy();
foreach ($taxonomies as $t) {
if ($t->level == 1) {
self::$branches[$t->title] = $t;
}
}
}
if ($title == '') {
return self::$branches;
}
if (isset(self::$branches[$title])) {
return self::$branches[$title];
}
return false;
}
}

View File

@ -0,0 +1,186 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\String\StringHelper;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Token class for the Finder indexer package.
*
* @since 2.5
*/
class Token
{
/**
* This is the term that will be referenced in the terms table and the
* mapping tables.
*
* @var string
* @since 2.5
*/
public $term;
/**
* The stem is used to match the root term and produce more potential
* matches when searching the index.
*
* @var string
* @since 2.5
*/
public $stem;
/**
* If the token is numeric, it is likely to be short and uncommon so the
* weight is adjusted to compensate for that situation.
*
* @var boolean
* @since 2.5
*/
public $numeric;
/**
* If the token is a common term, the weight is adjusted to compensate for
* the higher frequency of the term in relation to other terms.
*
* @var boolean
* @since 2.5
*/
public $common;
/**
* Flag for phrase tokens.
*
* @var boolean
* @since 2.5
*/
public $phrase;
/**
* The length is used to calculate the weight of the token.
*
* @var integer
* @since 2.5
*/
public $length;
/**
* The weight is calculated based on token size and whether the token is
* considered a common term.
*
* @var integer
* @since 2.5
*/
public $weight;
/**
* The simple language identifier for the token.
*
* @var string
* @since 2.5
*/
public $language;
/**
* The container for matches.
*
* @var array
* @since 3.8.12
*/
public $matches = [];
/**
* Is derived token (from individual words)
*
* @var boolean
* @since 3.8.12
*/
public $derived;
/**
* The suggested term
*
* @var string
* @since 3.8.12
*/
public $suggestion;
/**
* The token required flag
*
* @var boolean
* @since 4.3.0
*/
public $required;
/**
* Method to construct the token object.
*
* @param mixed $term The term as a string for words or an array for phrases.
* @param string $lang The simple language identifier.
* @param string $spacer The space separator for phrases. [optional]
*
* @since 2.5
*/
public function __construct($term, $lang, $spacer = ' ')
{
if (!$lang) {
$this->language = '*';
} else {
$this->language = $lang;
}
// Tokens can be a single word or an array of words representing a phrase.
if (\is_array($term)) {
// Populate the token instance.
$langs = array_fill(0, \count($term), $lang);
$this->term = implode($spacer, $term);
$this->stem = implode($spacer, array_map([Helper::class, 'stem'], $term, $langs));
$this->numeric = false;
$this->common = false;
$this->phrase = true;
$this->length = StringHelper::strlen($this->term);
/*
* Calculate the weight of the token.
*
* 1. Length of the token up to 30 and divide by 30, add 1.
* 2. Round weight to 4 decimal points.
*/
$this->weight = (min($this->length, 30) / 30) + 1;
$this->weight = round($this->weight, 4);
} else {
// Populate the token instance.
$this->term = $term;
$this->stem = Helper::stem($this->term, $lang);
$this->numeric = (is_numeric($this->term) || (bool) preg_match('#^[0-9,.\-\+]+$#', $this->term));
$this->common = $this->numeric ? false : Helper::isCommon($this->term, $lang);
$this->phrase = false;
$this->length = StringHelper::strlen($this->term);
/*
* Calculate the weight of the token.
*
* 1. Length of the token up to 15 and divide by 15.
* 2. If common term, divide weight by 8.
* 3. If numeric, multiply weight by 1.5.
* 4. Round weight to 4 decimal points.
*/
$this->weight = min($this->length, 15) / 15;
$this->weight = $this->common === true ? $this->weight / 8 : $this->weight;
$this->weight = $this->numeric === true ? $this->weight * 1.5 : $this->weight;
$this->weight = round($this->weight, 4);
}
}
}