Files
conservatorio-tomadini/plugins/system/nrframework/NRFramework/URLHelper.php
2024-12-31 11:07:09 +01:00

226 lines
6.9 KiB
PHP

<?php
/**
* @author Tassos Marinos <info@tassos.gr>
* @link https://www.tassos.gr
* @copyright Copyright © 2024 Tassos All Rights Reserved
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> or later
*/
namespace NRFramework;
use NRFramework\URL;
defined('_JEXEC') or die('Restricted access');
class URLHelper
{
/**
* Searches the given HTML for all external links and appends the affiliate paramter aff=id to every link based on an affiliate list.
*
* @param string $text The html to search for external links
* @param array $affiliates A key value array: domain name => affiliate parameter
*
* @return string
*/
public static function replaceAffiliateLinks($text, $affiliates, $factory = null)
{
if (!class_exists('DOMDocument') || empty($text))
{
return $text;
}
$factory = $factory ? $factory : new \NRFramework\Factory();
libxml_use_internal_errors(true);
$dom = new \DOMDocument;
$dom->encoding = 'UTF-8';
$dom->loadHTML($text);
$links = $dom->getElementsByTagName('a');
foreach ($links as $link)
{
$linkHref = $link->getAttribute('href');
if (empty($linkHref))
{
continue;
}
$url = new URL($linkHref, $factory);
if ($url->isInternal())
{
continue;
}
$domain = $url->getDomainName();
if (!array_key_exists($domain, $affiliates))
{
continue;
}
$urlInstance = $url->getInstance();
$urlQuery = $urlInstance->getQuery();
$affQuery = $affiliates[$domain];
// If both queries are the same, skip the link tag
if ($urlQuery === $affQuery)
{
continue;
}
if (empty($urlQuery))
{
$urlInstance->setQuery($affQuery);
} else
{
parse_str($urlQuery, $params);
parse_str($affQuery, $params_);
$params_new = array_merge($params, $params_);
$urlInstance->setQuery(http_build_query($params_new));
}
$newURL = $urlInstance->toString();
if ($newURL === $linkHref)
{
continue;
}
$link->setAttribute('href', $newURL);
}
return $dom->saveHtml();
}
/**
* Convert all <img> and <a> tags with relative paths to absolute URLs
*
* @param string $text The text/HTML to search for relative paths
* @param object $factory The framework's factory
* @param object $fix_links Should we parse links?
* @param object $fix_images Should we parse images?
*
* @return void The converted HTML string
*/
public static function relativePathsToAbsoluteURLs($text, $factory = null, $fix_links = true, $fix_images = true)
{
// Make sure DOMDocument is installed
if (!class_exists('DOMDocument'))
{
return $text;
}
// Quick check the given text has some links or images
$hasImages = $fix_images && strpos($text, '<img') !== false;
$hasLinks = $fix_links && strpos($text, '<a') !== false;
if (empty($text) || (!$hasImages && !$hasLinks))
{
return $text;
}
$factory = $factory ? $factory : new \NRFramework\Factory();
$replacements = 0;
try
{
libxml_use_internal_errors(true);
$dom = new \DOMDocument;
$dom->encoding = 'UTF-8';
// Handle non-latin characters to UTF8
$text_ = iconv('UTF-8', 'UTF-8', $text);
$text_ = mb_encode_numericentity($text_, [0x80, 0x10FFFF, 0, 0x1FFFFF], 'UTF-8');
// Load HTML without adding a doctype.
// Do not ever try to remove <html><body> tags with LIBXML_HTML_NOIMPLIED constant as it's rather unstable.
// https://stackoverflow.com/questions/4879946/how-to-savehtml-of-domdocument-without-html-wrapper/44866403#44866403
// LIBXML_HTML_NODEFDTD requires Libxml >= 2.7.8 - https://www.php.net/manual/en/libxml.constants.php
$dom->loadHTML($text_, LIBXML_HTML_NODEFDTD);
// Replace links
if ($fix_links)
{
$links = $dom->getElementsByTagName('a');
foreach ($links as $link)
{
$resource = $link->getAttribute('href');
if (empty($resource) || mb_substr($resource, 0, 1) == '#')
{
continue;
}
$url = new URL($resource, $factory);
if (!$url->isInternal())
{
continue;
}
$newURL = $url->toAbsolute();
$link->setAttribute('href', $newURL);
$replacements++;
}
}
// Replace images
if ($fix_images)
{
$images = $dom->getElementsByTagName('img');
foreach ($images as $image)
{
$resource = $image->getAttribute('src');
if (empty($resource))
{
continue;
}
$url = new URL($resource, $factory);
if (!$url->isInternal())
{
continue;
}
$newURL = $url->toAbsolute();
$image->setAttribute('src', $newURL);
$replacements++;
}
}
// If we don't have any replacements took place, proceed no further and return the original text.
if ($replacements == 0)
{
return $text;
}
$html = trim($dom->saveHTML($dom->documentElement));
// Make sure no <body> or <html> tags are added in the text
// In case the final string starts with <html><body>, we assume the elements are added by DOMDocument incorectly and we remove them.
// In case the final string starts with <html lang="en-gb" dir="ltr"><head>..., we assume the elements are included in the original text and we must leave them.
if (strpos($html, '<html><body>') !== false)
{
$html = str_replace(['<html><body>', '</body></html>'], '', $html);
}
return $html;
} catch (\Throwable $th)
{
return $text;
}
}
}