226 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			226 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php 
 | |
| 
 | |
| /**
 | |
|  * @author          Tassos Marinos <info@tassos.gr>
 | |
|  * @link            https://www.tassos.gr
 | |
|  * @copyright       Copyright © 2024 Tassos All Rights Reserved
 | |
|  * @license         GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> or later
 | |
| */
 | |
| 
 | |
| namespace NRFramework;
 | |
| 
 | |
| use NRFramework\URL;
 | |
| 
 | |
| defined('_JEXEC') or die('Restricted access');
 | |
| 
 | |
| class URLHelper
 | |
| {
 | |
|     /**
 | |
|      * Searches the given HTML for all external links and appends the affiliate paramter aff=id to every link based on an affiliate list.
 | |
|      *
 | |
|      * @param   string  $text           The html to search for external links
 | |
|      * @param   array   $affiliates     A key value array: domain name => affiliate parameter
 | |
|      *
 | |
|      * @return  string
 | |
|      */
 | |
|     public static function replaceAffiliateLinks($text, $affiliates, $factory = null)
 | |
|     {
 | |
|         if (!class_exists('DOMDocument') || empty($text))
 | |
| 		{
 | |
|             return $text;
 | |
|         }
 | |
| 
 | |
|         $factory = $factory ? $factory : new \NRFramework\Factory();
 | |
| 
 | |
| 		libxml_use_internal_errors(true);
 | |
|         $dom = new \DOMDocument;
 | |
|         $dom->encoding = 'UTF-8';
 | |
|         $dom->loadHTML($text);
 | |
| 
 | |
|         $links = $dom->getElementsByTagName('a');
 | |
| 
 | |
|         foreach ($links as $link)
 | |
|         {
 | |
|             $linkHref = $link->getAttribute('href');
 | |
| 
 | |
|             if (empty($linkHref))
 | |
|             {
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             $url = new URL($linkHref, $factory);
 | |
| 
 | |
|             if ($url->isInternal())
 | |
|             {
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             $domain = $url->getDomainName();
 | |
| 
 | |
|             if (!array_key_exists($domain, $affiliates))
 | |
|             {
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             $urlInstance = $url->getInstance();
 | |
|             $urlQuery = $urlInstance->getQuery();
 | |
|             $affQuery = $affiliates[$domain];
 | |
| 
 | |
|             // If both queries are the same, skip the link tag
 | |
|             if ($urlQuery === $affQuery)
 | |
|             {
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             if (empty($urlQuery))
 | |
|             {
 | |
|                 $urlInstance->setQuery($affQuery);
 | |
|             } else 
 | |
|             {
 | |
|                 parse_str($urlQuery, $params);
 | |
|                 parse_str($affQuery, $params_);
 | |
|                 $params_new = array_merge($params, $params_);
 | |
|                 $urlInstance->setQuery(http_build_query($params_new));
 | |
|             }
 | |
| 
 | |
|             $newURL = $urlInstance->toString();
 | |
| 
 | |
|             if ($newURL === $linkHref)
 | |
|             {
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             $link->setAttribute('href', $newURL);
 | |
|         }
 | |
| 
 | |
|         return $dom->saveHtml();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Convert all <img> and <a> tags with relative paths to absolute URLs
 | |
|      *
 | |
|      * @param  string   $text          The text/HTML to search for relative paths
 | |
|      * @param  object   $factory       The framework's factory
 | |
|      * @param  object   $fix_links     Should we parse links?
 | |
|      * @param  object   $fix_images    Should we parse images?
 | |
|      *
 | |
|      * @return void     The converted HTML string
 | |
|      */
 | |
|     public static function relativePathsToAbsoluteURLs($text, $factory = null, $fix_links = true, $fix_images = true)
 | |
|     {
 | |
|         // Make sure DOMDocument is installed
 | |
|         if (!class_exists('DOMDocument'))
 | |
| 		{
 | |
|             return $text;
 | |
|         }
 | |
| 
 | |
|         // Quick check the given text has some links or images
 | |
|         $hasImages = $fix_images && strpos($text, '<img') !== false;
 | |
|         $hasLinks  = $fix_links && strpos($text, '<a') !== false;
 | |
| 
 | |
|         if (empty($text) || (!$hasImages && !$hasLinks))
 | |
|         {
 | |
|             return $text;
 | |
|         }
 | |
| 
 | |
|         $factory = $factory ? $factory : new \NRFramework\Factory();
 | |
|         $replacements = 0;
 | |
| 
 | |
|         try
 | |
|         {
 | |
|             libxml_use_internal_errors(true);
 | |
|             $dom = new \DOMDocument;
 | |
|             $dom->encoding = 'UTF-8';
 | |
| 
 | |
|             // Handle non-latin characters to UTF8
 | |
|             $text_ = iconv('UTF-8', 'UTF-8', $text);
 | |
|             $text_ = mb_encode_numericentity($text_, [0x80, 0x10FFFF, 0, 0x1FFFFF], 'UTF-8');
 | |
| 
 | |
|             // Load HTML without adding a doctype.
 | |
|             // Do not ever try to remove <html><body> tags with LIBXML_HTML_NOIMPLIED constant as it's rather unstable.
 | |
|             // https://stackoverflow.com/questions/4879946/how-to-savehtml-of-domdocument-without-html-wrapper/44866403#44866403
 | |
|             // LIBXML_HTML_NODEFDTD requires Libxml >= 2.7.8 - https://www.php.net/manual/en/libxml.constants.php
 | |
|             $dom->loadHTML($text_, LIBXML_HTML_NODEFDTD);
 | |
|     
 | |
|             // Replace links
 | |
|             if ($fix_links)
 | |
|             {
 | |
|                 $links = $dom->getElementsByTagName('a');
 | |
|         
 | |
|                 foreach ($links as $link)
 | |
|                 {
 | |
|                     $resource = $link->getAttribute('href');
 | |
|         
 | |
|                     if (empty($resource) || mb_substr($resource, 0, 1) == '#')
 | |
|                     {
 | |
|                         continue;
 | |
|                     }
 | |
|         
 | |
|                     $url = new URL($resource, $factory);
 | |
|         
 | |
|                     if (!$url->isInternal())
 | |
|                     {
 | |
|                         continue;
 | |
|                     }
 | |
|         
 | |
|                     $newURL = $url->toAbsolute();
 | |
|         
 | |
|                     $link->setAttribute('href', $newURL);
 | |
| 
 | |
|                     $replacements++;
 | |
|                 }
 | |
|             }
 | |
|     
 | |
|             // Replace images
 | |
|             if ($fix_images)
 | |
|             {
 | |
|                 $images = $dom->getElementsByTagName('img');
 | |
| 
 | |
|                 foreach ($images as $image)
 | |
|                 {
 | |
|                     $resource = $image->getAttribute('src');
 | |
| 
 | |
|                     if (empty($resource))
 | |
|                     {
 | |
|                         continue;
 | |
|                     }
 | |
|         
 | |
|                     $url = new URL($resource, $factory);
 | |
|         
 | |
|                     if (!$url->isInternal())
 | |
|                     {
 | |
|                         continue;
 | |
|                     }
 | |
|         
 | |
|                     $newURL = $url->toAbsolute();
 | |
|         
 | |
|                     $image->setAttribute('src', $newURL);
 | |
| 
 | |
|                     $replacements++;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             // If we don't have any replacements took place, proceed no further and return the original text.
 | |
|             if ($replacements == 0)
 | |
|             {
 | |
|                 return $text;
 | |
|             }
 | |
| 
 | |
|             $html = trim($dom->saveHTML($dom->documentElement));
 | |
| 
 | |
|             // Make sure no <body> or <html> tags are added in the text
 | |
|             // In case the final string starts with <html><body>, we assume the elements are added by DOMDocument incorectly and we remove them.
 | |
|             // In case the final string starts with <html lang="en-gb" dir="ltr"><head>..., we assume the elements are included in the original text and we must leave them.
 | |
|             if (strpos($html, '<html><body>') !== false)
 | |
|             {
 | |
|                 $html = str_replace(['<html><body>', '</body></html>'], '', $html);
 | |
|             }
 | |
| 
 | |
|             return $html;
 | |
| 
 | |
|         } catch (\Throwable $th)
 | |
|         {
 | |
|             return $text;
 | |
|         }
 | |
|     }
 | |
| } |