lib: Add API documentation

This commit is contained in:
logmanoriginal 2018-11-16 21:48:59 +01:00
parent b29ba5b973
commit c4550be812
15 changed files with 1212 additions and 91 deletions

View file

@ -1,6 +1,48 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Authentication module for RSS-Bridge.
*
* This class implements an authentication module for RSS-Bridge, utilizing the
* HTTP authentication capabilities of PHP.
*
* _Notice_: Authentication via HTTP does not prevent users from accessing files
* on your server. If your server supports `.htaccess`, you should globally restrict
* access to files instead.
*
* @link https://php.net/manual/en/features.http-auth.php HTTP authentication with PHP
* @link https://httpd.apache.org/docs/2.4/howto/htaccess.html Apache HTTP Server
* Tutorial: .htaccess files
*
* @todo This class should respond with an error when creating an object from it.
* See {@see Bridge}, {@see Cache} or {@see Format} for reference.
* @todo Configuration parameters should be stored internally instead of accessing
* the configuration class directly.
* @todo Add functions to detect if a user is authenticated or not. This can be
* utilized for limiting access to authorized users only.
*/
class Authentication {
/**
* Requests the user for login credentials if necessary.
*
* Responds to an authentication request or returns the `WWW-Authenticate`
* header if authentication is enabled in the configuration of RSS-Bridge
* (`[authentication] enable = true`).
*
* @return void
*/
public static function showPromptIfNeeded() {
if(Configuration::getConfig('authentication', 'enable') === true) {
@ -13,6 +55,13 @@ class Authentication {
}
/**
* Verifies if an authentication request was received and compares the
* provided username and password to the configuration of RSS-Bridge
* (`[authentication] username` and `[authentication] password`).
*
* @return bool True if authentication succeeded.
*/
public static function verifyPrompt() {
if(isset($_SERVER['PHP_AUTH_USER']) && isset($_SERVER['PHP_AUTH_PW'])) {

View file

@ -1,32 +1,112 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* An abstract class for bridges
*
* This class implements {@see BridgeInterface} with most common functions in
* order to reduce code duplication. Bridges should inherit from this class
* instead of implementing the interface manually.
*
* @todo Move constants to the interface (this is supported by PHP)
* @todo Change visibility of constants to protected
* @todo Return `self` on more functions to allow chaining
* @todo Add specification for PARAMETERS ()
* @todo Add specification for $items
*/
abstract class BridgeAbstract implements BridgeInterface {
/**
* Name of the bridge
*
* Use {@see BridgeAbstract::getName()} to read this parameter
*/
const NAME = 'Unnamed bridge';
const URI = '';
const DESCRIPTION = 'No description provided';
const MAINTAINER = 'No maintainer';
const CACHE_TIMEOUT = 3600;
const PARAMETERS = array();
protected $items = array();
protected $inputs = array();
protected $queriedContext = '';
/**
* Return items stored in the bridge
* @return mixed
* URI to the site the bridge is intended to be used for.
*
* Use {@see BridgeAbstract::getURI()} to read this parameter
*/
const URI = '';
/**
* A brief description of what the bridge can do
*
* Use {@see BridgeAbstract::getDescription()} to read this parameter
*/
const DESCRIPTION = 'No description provided';
/**
* The name of the maintainer. Multiple maintainers can be separated by comma
*
* Use {@see BridgeAbstract::getMaintainer()} to read this parameter
*/
const MAINTAINER = 'No maintainer';
/**
* The default cache timeout for the bridge
*
* Use {@see BridgeAbstract::getCacheTimeout()} to read this parameter
*/
const CACHE_TIMEOUT = 3600;
/**
* Parameters for the bridge
*
* Use {@see BridgeAbstract::getParameters()} to read this parameter
*/
const PARAMETERS = array();
/**
* Holds the list of items collected by the bridge
*
* Items must be collected by {@see BridgeInterface::collectData()}
*
* Use {@see BridgeAbstract::getItems()} to access items.
*
* @var array
*/
protected $items = array();
/**
* Holds the list of input parameters used by the bridge
*
* Do not access this parameter directly!
* Use {@see BridgeAbstract::setInputs()} and {@see BridgeAbstract::getInput()} instead!
*
* @var array
*/
protected $inputs = array();
/**
* Holds the name of the queried context
*
* @var string
*/
protected $queriedContext = '';
/** {@inheritdoc} */
public function getItems(){
return $this->items;
}
/**
* Sets the input values for a given context. Existing values are
* overwritten.
* Sets the input values for a given context.
*
* @param array $inputs Associative array of inputs
* @param string $context The context name
* @param string $queriedContext The context name
* @return void
*/
protected function setInputs(array $inputs, $queriedContext){
// Import and assign all inputs to their context
@ -103,8 +183,14 @@ abstract class BridgeAbstract implements BridgeInterface {
}
/**
* Defined datas with parameters depending choose bridge
* @param array array with expected bridge paramters
* Set inputs for the bridge
*
* Returns errors and aborts execution if the provided input parameters are
* invalid.
*
* @param array List of input parameters. Each element in this list must
* relate to an item in {@see BridgeAbstract::PARAMETERS}
* @return void
*/
public function setDatas(array $inputs){
@ -148,7 +234,7 @@ abstract class BridgeAbstract implements BridgeInterface {
* Returns the value for the provided input
*
* @param string $input The input name
* @return mixed Returns the input value or null if the input is not defined
* @return mixed|null The input value or null if the input is not defined
*/
protected function getInput($input){
if(!isset($this->inputs[$this->queriedContext][$input]['value'])) {
@ -157,30 +243,37 @@ abstract class BridgeAbstract implements BridgeInterface {
return $this->inputs[$this->queriedContext][$input]['value'];
}
/** {@inheritdoc} */
public function getDescription(){
return static::DESCRIPTION;
}
/** {@inheritdoc} */
public function getMaintainer(){
return static::MAINTAINER;
}
/** {@inheritdoc} */
public function getName(){
return static::NAME;
}
/** {@inheritdoc} */
public function getIcon(){
return '';
}
/** {@inheritdoc} */
public function getParameters(){
return static::PARAMETERS;
}
/** {@inheritdoc} */
public function getURI(){
return static::URI;
}
/** {@inheritdoc} */
public function getCacheTimeout(){
return static::CACHE_TIMEOUT;
}

View file

@ -1,6 +1,33 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* A generator class for a single bridge card on the home page of RSS-Bridge.
*
* This class generates the HTML content for a single bridge card for the home
* page of RSS-Bridge.
*
* @todo Return error if a caller creates an object of this class.
* @todo Use self:: instead of BridgeCard:: in this class
*/
final class BridgeCard {
/**
* Build a HTML document string of buttons for each of the provided formats
*
* @param array $formats A list of format names
* @return string The document string
*/
private static function buildFormatButtons($formats) {
$buttons = '';
@ -16,6 +43,13 @@ final class BridgeCard {
return $buttons;
}
/**
* Get the form header for a bridge card
*
* @param string $bridgeName The bridge name
* @param bool $isHttps If disabled, adds a warning to the form
* @return string The form header
*/
private static function getFormHeader($bridgeName, $isHttps = false) {
$form = <<<EOD
<form method="GET" action="?">
@ -31,6 +65,17 @@ This bridge is not fetching its content through a secure connection</div>';
return $form;
}
/**
* Get the form body for a bridge
*
* @param string $bridgeName The bridge name
* @param array $formats A list of supported formats
* @param bool $isActive Indicates if a bridge is enabled or not
* @param bool $isHttps Indicates if a bridge uses HTTPS or not
* @param string $parameterName Sets the bridge context for the current form
* @param array $parameters The bridge parameters
* @return string The form body
*/
private static function getForm($bridgeName,
$formats,
$isActive = false,
@ -88,6 +133,12 @@ This bridge is not fetching its content through a secure connection</div>';
return $form . '</form>' . PHP_EOL;
}
/**
* Get input field attributes
*
* @param array $entry The current entry
* @return string The input field attributes
*/
private static function getInputAttributes($entry) {
$retVal = '';
@ -103,6 +154,14 @@ This bridge is not fetching its content through a secure connection</div>';
return $retVal;
}
/**
* Get text input
*
* @param array $entry The current entry
* @param string $id The field ID
* @param string $name The field name
* @return string The text input field
*/
private static function getTextInput($entry, $id, $name) {
return '<input '
. BridgeCard::getInputAttributes($entry)
@ -118,6 +177,14 @@ This bridge is not fetching its content through a secure connection</div>';
. PHP_EOL;
}
/**
* Get number input
*
* @param array $entry The current entry
* @param string $id The field ID
* @param string $name The field name
* @return string The number input field
*/
private static function getNumberInput($entry, $id, $name) {
return '<input '
. BridgeCard::getInputAttributes($entry)
@ -133,6 +200,14 @@ This bridge is not fetching its content through a secure connection</div>';
. PHP_EOL;
}
/**
* Get list input
*
* @param array $entry The current entry
* @param string $id The field ID
* @param string $name The field name
* @return string The list input field
*/
private static function getListInput($entry, $id, $name) {
$list = '<select '
. BridgeCard::getInputAttributes($entry)
@ -185,6 +260,14 @@ This bridge is not fetching its content through a secure connection</div>';
return $list;
}
/**
* Get checkbox input
*
* @param array $entry The current entry
* @param string $id The field ID
* @param string $name The field name
* @return string The checkbox input field
*/
private static function getCheckboxInput($entry, $id, $name) {
return '<input '
. BridgeCard::getInputAttributes($entry)
@ -198,6 +281,14 @@ This bridge is not fetching its content through a secure connection</div>';
. PHP_EOL;
}
/**
* Gets a single bridge card
*
* @param string $bridgeName The bridge name
* @param array $formats A list of formats
* @param bool $isActive Indicates if the bridge is active or not
* @return string The bridge card
*/
static function displayBridgeCard($bridgeName, $formats, $isActive = true){
$bridge = Bridge::create($bridgeName);

View file

@ -1,4 +1,57 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* The bridge interface
*
* A bridge is a class that is responsible for collecting and transforming data
* from one hosting provider into an internal representation of feed data, that
* can later be transformed into different feed formats (see {@see FormatInterface}).
*
* For this purpose, all bridges need to perform three common operations:
*
* 1. Collect data from a remote site.
* 2. Extract the required contents.
* 3. Add the contents to the internal data structure.
*
* Bridges can optionally specify parameters to customize bridge behavior based
* on user input. For example, a user could specify how many items to return in
* the feed and where to get them.
*
* In order to present a bridge on the home page, and for the purpose of bridge
* specific behaviour, additional information must be provided by the bridge:
*
* * **Name**
* The name of the bridge that can be displayed to users.
*
* * **Description**
* A brief description for the bridge that can be displayed to users.
*
* * **URI**
* A link to the hosting provider.
*
* * **Maintainer**
* The GitHub username of the bridge maintainer
*
* * **Parameters**
* A list of parameters for customization
*
* * **Icon**
* A link to the favicon of the hosting provider
*
* * **Cache timeout**
* The default cache timeout for the bridge.
*/
interface BridgeInterface {
/**

View file

@ -1,6 +1,31 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* A generator class for the home page of RSS-Bridge.
*
* This class generates the HTML content for displaying all bridges on the home
* page of RSS-Bridge.
*
* @todo Return error if a caller creates an object of this class.
*/
final class BridgeList {
/**
* Get the document head
*
* @return string The document head
*/
private static function getHead() {
return <<<EOD
<head>
@ -22,6 +47,15 @@ final class BridgeList {
EOD;
}
/**
* Get the document body for all bridge cards
*
* @param bool $showInactive Inactive bridges are visible on the home page if
* enabled.
* @param int $totalBridges (ref) Returns the total number of bridges.
* @param int $totalActiveBridges (ref) Returns the number of active bridges.
* @return string The document body for all bridge cards.
*/
private static function getBridges($showInactive, &$totalBridges, &$totalActiveBridges) {
$body = '';
@ -54,6 +88,11 @@ EOD;
return $body;
}
/**
* Get the document header
*
* @return string The document header
*/
private static function getHeader() {
$warning = '';
@ -80,6 +119,11 @@ EOD;
EOD;
}
/**
* Get the searchbar
*
* @return string The searchbar
*/
private static function getSearchbar() {
$query = filter_input(INPUT_GET, 'q');
@ -93,6 +137,16 @@ EOD;
EOD;
}
/**
* Get the document footer
*
* @param int $totalBridges The total number of bridges, shown in the footer
* @param int $totalActiveBridges The total number of active bridges, shown
* in the footer.
* @param bool $showInactive Sets the 'Show active'/'Show inactive' text in
* the footer.
* @return string The document footer
*/
private static function getFooter($totalBridges, $totalActiveBridges, $showInactive) {
$version = Configuration::getVersion();
@ -131,6 +185,13 @@ EOD;
EOD;
}
/**
* Create the entire home page
*
* @param bool $showInactive Inactive bridges are displayed on the home page,
* if enabled.
* @return string The home page
*/
static function create($showInactive = true) {
$totalBridges = 0;

View file

@ -1,7 +1,51 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* The cache interface
*
* @todo Add missing function to the interface
* @todo Explain parameters and return values in more detail
* @todo Return self more often (to allow call chaining)
*/
interface CacheInterface {
/**
* Loads data from cache
*
* @return mixed The cache data
*/
public function loadData();
/**
* Stores data to the cache
*
* @param mixed $datas The data to store
* @return self The cache object
*/
public function saveData($datas);
/**
* Returns the timestamp for the curent cache file
*
* @return int Timestamp
*/
public function getTime();
/**
* Removes any data that is older than the specified duration from cache
*
* @param int $duration The cache duration in seconds
*/
public function purgeCache($duration);
}

View file

@ -1,10 +1,77 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Configuration module for RSS-Bridge.
*
* This class implements a configuration module for RSS-Bridge.
*
* @todo Throw an exception if the caller tries to create objects of this class.
* @todo Make this class final.
*/
class Configuration {
/**
* Holds the current release version of RSS-Bridge.
*
* Do not access this property directly!
* Use {@see Configuration::getVersion()} instead.
*
* @var string
*
* @todo Replace this property by a constant.
*/
public static $VERSION = 'dev.2018-11-10';
/**
* Holds the configuration data.
*
* Do not access this property directly!
* Use {@see Configuration::getConfig()} instead.
*
* @var array|null
*
* @todo Change the scope of this property to protected or private
*/
public static $config = null;
/**
* Verifies the current installation of RSS-Bridge and PHP.
*
* Returns an error message and aborts execution if the installation does
* not satisfy the requirements of RSS-Bridge.
*
* **Requirements**
* - PHP 5.6.0 or higher
* - `openssl` extension
* - `libxml` extension
* - `mbstring` extension
* - `simplexml` extension
* - `curl` extension
* - `json` extension
* - The cache folder specified by {@see PATH_CACHE} requires write permission
* - The whitelist file specified by {@see WHITELIST} requires write permission
*
* @link http://php.net/supported-versions.php PHP Supported Versions
* @link http://php.net/manual/en/book.openssl.php OpenSSL
* @link http://php.net/manual/en/book.libxml.php libxml
* @link http://php.net/manual/en/book.mbstring.php Multibyte String (mbstring)
* @link http://php.net/manual/en/book.simplexml.php SimpleXML
* @link http://php.net/manual/en/book.curl.php Client URL Library (curl)
* @link http://php.net/manual/en/book.json.php JavaScript Object Notation (json)
*
* @return void
*/
public static function verifyInstallation() {
// Check PHP version
@ -40,6 +107,33 @@ class Configuration {
}
/**
* Loads the configuration from disk and checks if the parameters are valid.
*
* Returns an error message and aborts execution if the configuration is invalid.
*
* The RSS-Bridge configuration is split into two files:
* - `config.default.ini.php`: The default configuration file that ships with
* every release of RSS-Bridge (do not modify this file!).
* - `config.ini.php`: The local configuration file that can be modified by
* server administrators.
*
* RSS-Bridge will first load `config.default.ini.php` into memory and then
* replace parameters with the contents of `config.ini.php`. That way new
* parameters are automatically initialized with default values and custom
* configurations can be reduced to the minimum set of parametes necessary
* (only the ones that changed).
*
* The configuration files must be placed in the root folder of RSS-Bridge
* (next to `index.php`).
*
* _Notice_: The configuration is stored in {@see Configuration::$config}.
*
* @return void
*
* @todo Use {@see PATH_ROOT} to locate configuration files.
* @todo Add documentation for constants defined by this function.
*/
public static function loadConfiguration() {
if(!file_exists('config.default.ini.php'))
@ -97,6 +191,15 @@ class Configuration {
}
/**
* Returns the value of a parameter identified by category and key.
*
* @param string $category The section name (category).
* @param string $key The property name (key).
* @return mixed|null The parameter value.
*
* @todo Rename $category to $section for clarity.
*/
public static function getConfig($category, $key) {
if(array_key_exists($category, self::$config) && array_key_exists($key, self::$config[$category])) {
@ -107,6 +210,15 @@ class Configuration {
}
/**
* Returns the current version string of RSS-Bridge.
*
* This function returns the contents of {@see Configuration::$VERSION} for
* regular installations and the git branch name and commit id for instances
* running in a git environment.
*
* @return string The version string.
*/
public static function getVersion() {
$headFile = '.git/HEAD';

View file

@ -1,17 +1,35 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Implements a RSS-Bridge specific exception class
*
* @todo This class serves no purpose, remove it!
*/
class HttpException extends \Exception{}
/**
* Returns an URL that automatically populates a new issue on GitHub based
* on the information provided
*
* @param $title string Sets the title of the issue
* @param $body string Sets the body of the issue (GitHub markdown applies)
* @param $labels mixed (optional) Specifies labels to add to the issue
* @param $maintainer string (optional) Specifies the maintainer for the issue.
* @param string $title string Sets the title of the issue
* @param string $body string Sets the body of the issue (GitHub markdown applies)
* @param string $labels mixed (optional) Specifies labels to add to the issue
* @param string $maintainer string (optional) Specifies the maintainer for the issue.
* The maintainer only applies if part of the development team!
* @return string Returns a qualified URL to a new issue with populated conent.
* Returns null if title or body is null or empty
* @return string|null A qualified URL to a new issue with populated conent or null.
*
* @todo This function belongs inside a class
*/
function buildGitHubIssueQuery($title, $body, $labels = null, $maintainer = null){
if(!isset($title) || !isset($body) || empty($title) || empty($body)) {
@ -49,10 +67,11 @@ function buildGitHubIssueQuery($title, $body, $labels = null, $maintainer = null
/**
* Returns the exception message as HTML string
*
* @param $e Exception The exception to show
* @param $bridge object The bridge object
* @return string Returns the exception as HTML string. Returns null if the
* provided parameter are invalid
* @param object $e Exception The exception to show
* @param object $bridge object The bridge object
* @return string|null Returns the exception as HTML string or null.
*
* @todo This function belongs inside a class
*/
function buildBridgeException($e, $bridge){
if(( !($e instanceof \Exception) && !($e instanceof \Error)) || !($bridge instanceof \BridgeInterface)) {
@ -87,10 +106,11 @@ EOD;
/**
* Returns the exception message as HTML string
*
* @param $e Exception The exception to show
* @param $bridge object The bridge object
* @return string Returns the exception as HTML string. Returns null if the
* provided parameter are invalid
* @param object $e Exception The exception to show
* @param object $bridge object The bridge object
* @return string|null Returns the exception as HTML string or null.
*
* @todo This function belongs inside a class
*/
function buildTransformException($e, $bridge){
if(( !($e instanceof \Exception) && !($e instanceof \Error)) || !($bridge instanceof \BridgeInterface)) {
@ -114,6 +134,15 @@ function buildTransformException($e, $bridge){
return buildPage($title, $header, $section);
}
/**
* Builds a new HTML header with data from a exception an a bridge
*
* @param object $e The exception object
* @param object $bridge The bridge object
* @return string The HTML header
*
* @todo This function belongs inside a class
*/
function buildHeader($e, $bridge){
return <<<EOD
<header>
@ -124,6 +153,17 @@ function buildHeader($e, $bridge){
EOD;
}
/**
* Builds a new HTML section
*
* @param object $e The exception object
* @param object $bridge The bridge object
* @param string $message The message to display
* @param string $link The link to include in the anchor
* @return string The HTML section
*
* @todo This function belongs inside a class
*/
function buildSection($e, $bridge, $message, $link){
return <<<EOD
<section>
@ -142,6 +182,16 @@ function buildSection($e, $bridge, $message, $link){
EOD;
}
/**
* Builds a new HTML page
*
* @param string $title The HTML title
* @param string $header The HTML header
* @param string $section The HTML section
* @return string The HTML page
*
* @todo This function belongs inside a class
*/
function buildPage($title, $header, $section){
return <<<EOD
<!DOCTYPE html>

View file

@ -1,11 +1,74 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* An abstract class for bridges that need to transform existing RSS or Atom
* feeds.
*
* This class extends {@see BridgeAbstract} with functions to extract contents
* from existing RSS or Atom feeds. Bridges that need to transform existing feeds
* should inherit from this class instead of {@see BridgeAbstract}.
*
* Bridges that extend this class don't need to concern themselves with getting
* contents from existing feeds, but can focus on adding additional contents
* (i.e. by downloading additional data), filtering or just transforming a feed
* into another format.
*
* @link http://www.rssboard.org/rss-0-9-1 RSS 0.91 Specification
* @link http://web.resource.org/rss/1.0/spec RDF Site Summary (RSS) 1.0
* @link http://www.rssboard.org/rss-specification RSS 2.0 Specification
* @link https://tools.ietf.org/html/rfc4287 The Atom Syndication Format
*
* @todo Return `self` on more functions to allow chaining
* @todo The parsing functions should all be private. This class is complicated
* enough without having to consider children overriding functions.
*/
abstract class FeedExpander extends BridgeAbstract {
/**
* Holds the title of the current feed
*
* @var string
* @todo Rename this parameter to $title for clarity
*/
private $name;
/**
* Holds the URI of the feed
*
* @var string
*/
private $uri;
/**
* Holds the feed type during internal operations.
*
* @var string
* @todo Define possible values as constant instead of static strings
*/
private $feedType;
/**
* Collects data from an existing feed.
*
* Children should call this function in {@see BridgeInterface::collectData()}
* to extract a feed.
*
* @param string $url URL to the feed.
* @param int $maxItems Maximum number of items to collect from the feed
* (`-1`: no limit).
* @return void
*/
public function collectExpandableDatas($url, $maxItems = -1){
if(empty($url)) {
returnServerError('There is no $url for this RSS expander');
@ -44,6 +107,19 @@ abstract class FeedExpander extends BridgeAbstract {
$this->{'collect_' . $this->feedType . '_data'}($rssContent, $maxItems);
}
/**
* Collect data from a RSS 1.0 compatible feed
*
* @link http://web.resource.org/rss/1.0/spec RDF Site Summary (RSS) 1.0
*
* @param string $rssContent The RSS content
* @param int $maxItems Maximum number of items to collect from the feed
* (`-1`: no limit).
* @return void
*
* @todo Instead of passing $maxItems to all functions, just add all items
* and remove excessive items later.
*/
protected function collect_RSS_1_0_data($rssContent, $maxItems){
$this->load_RSS_2_0_feed_data($rssContent->channel[0]);
foreach($rssContent->item as $item) {
@ -56,6 +132,19 @@ abstract class FeedExpander extends BridgeAbstract {
}
}
/**
* Collect data from a RSS 2.0 compatible feed
*
* @link http://www.rssboard.org/rss-specification RSS 2.0 Specification
*
* @param object $rssContent The RSS content
* @param int $maxItems Maximum number of items to collect from the feed
* (`-1`: no limit).
* @return void
*
* @todo Instead of passing $maxItems to all functions, just add all items
* and remove excessive items later.
*/
protected function collect_RSS_2_0_data($rssContent, $maxItems){
$rssContent = $rssContent->channel[0];
Debug::log('RSS content is ===========\n'
@ -73,6 +162,19 @@ abstract class FeedExpander extends BridgeAbstract {
}
}
/**
* Collect data from a Atom 1.0 compatible feed
*
* @link https://tools.ietf.org/html/rfc4287 The Atom Syndication Format
*
* @param object $content The Atom content
* @param int $maxItems Maximum number of items to collect from the feed
* (`-1`: no limit).
* @return void
*
* @todo Instead of passing $maxItems to all functions, just add all items
* and remove excessive items later.
*/
protected function collect_ATOM_1_0_data($content, $maxItems){
$this->load_ATOM_feed_data($content);
foreach($content->entry as $item) {
@ -85,16 +187,35 @@ abstract class FeedExpander extends BridgeAbstract {
}
}
/**
* Convert RSS 2.0 time to timestamp
*
* @param object $item A feed item
* @return int The timestamp
*/
protected function RSS_2_0_time_to_timestamp($item){
return DateTime::createFromFormat('D, d M Y H:i:s e', $item->pubDate)->getTimestamp();
}
// TODO set title, link, description, language, and so on
/**
* Load RSS 2.0 feed data into RSS-Bridge
*
* @param object $rssContent The RSS content
* @return void
*
* @todo set title, link, description, language, and so on
*/
protected function load_RSS_2_0_feed_data($rssContent){
$this->name = trim((string)$rssContent->title);
$this->uri = trim((string)$rssContent->link);
}
/**
* Load Atom feed data into RSS-Bridge
*
* @param object $content The Atom content
* @return void
*/
protected function load_ATOM_feed_data($content){
$this->name = (string)$content->title;
@ -114,6 +235,16 @@ abstract class FeedExpander extends BridgeAbstract {
}
}
/**
* Parse the contents of a single Atom feed item into a RSS-Bridge item for
* further transformation.
*
* @param object $feedItem A single feed item
* @return object The RSS-Bridge item
*
* @todo To reduce confusion, the RSS-Bridge item should maybe have a class
* of its own?
*/
protected function parseATOMItem($feedItem){
// Some ATOM entries also contain RSS 2.0 fields
$item = $this->parseRSS_2_0_Item($feedItem);
@ -139,6 +270,16 @@ abstract class FeedExpander extends BridgeAbstract {
return $item;
}
/**
* Parse the contents of a single RSS 0.91 feed item into a RSS-Bridge item
* for further transformation.
*
* @param object $feedItem A single feed item
* @return object The RSS-Bridge item
*
* @todo To reduce confusion, the RSS-Bridge item should maybe have a class
* of its own?
*/
protected function parseRSS_0_9_1_Item($feedItem){
$item = array();
if(isset($feedItem->link)) $item['uri'] = (string)$feedItem->link;
@ -150,6 +291,16 @@ abstract class FeedExpander extends BridgeAbstract {
return $item;
}
/**
* Parse the contents of a single RSS 1.0 feed item into a RSS-Bridge item
* for further transformation.
*
* @param object $feedItem A single feed item
* @return object The RSS-Bridge item
*
* @todo To reduce confusion, the RSS-Bridge item should maybe have a class
* of its own?
*/
protected function parseRSS_1_0_Item($feedItem){
// 1.0 adds optional elements around the 0.91 standard
$item = $this->parseRSS_0_9_1_Item($feedItem);
@ -164,6 +315,16 @@ abstract class FeedExpander extends BridgeAbstract {
return $item;
}
/**
* Parse the contents of a single RSS 2.0 feed item into a RSS-Bridge item
* for further transformation.
*
* @param object $feedItem A single feed item
* @return object The RSS-Bridge item
*
* @todo To reduce confusion, the RSS-Bridge item should maybe have a class
* of its own?
*/
protected function parseRSS_2_0_Item($feedItem){
// Primary data is compatible to 0.91 with some additional data
$item = $this->parseRSS_0_9_1_Item($feedItem);
@ -211,9 +372,11 @@ abstract class FeedExpander extends BridgeAbstract {
}
/**
* Method should return, from a source RSS item given by lastRSS, one of our Items objects
* @param $item the input rss item
* @return a RSS-Bridge Item, with (hopefully) the whole content)
* Parse the contents of a single feed item, depending on the current feed
* type, into a RSS-Bridge item.
*
* @param object $item The current feed item
* @return object A RSS-Bridge item, with (hopefully) the whole content
*/
protected function parseItem($item){
switch($this->feedType) {
@ -230,14 +393,17 @@ abstract class FeedExpander extends BridgeAbstract {
}
}
/** {@inheritdoc} */
public function getURI(){
return !empty($this->uri) ? $this->uri : parent::getURI();
}
/** {@inheritdoc} */
public function getName(){
return !empty($this->name) ? $this->name : parent::getName();
}
/** {@inheritdoc} */
public function getIcon(){
return !empty($this->icon) ? $this->icon : parent::getIcon();
}

View file

@ -1,41 +1,95 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license https://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* An abstract class for format implementations
*
* This class implements {@see FormatInterface}
*/
abstract class FormatAbstract implements FormatInterface {
/** The default charset (UTF-8) */
const DEFAULT_CHARSET = 'UTF-8';
protected
$contentType,
$charset,
$items,
$lastModified,
$extraInfos;
/** @var string $contentType The content type */
protected $contentType;
/** @var string $charset The charset */
protected $charset;
/** @var array $items The items */
protected $items;
/**
* @var int $lastModified A timestamp to indicate the last modified time of
* the output data.
*/
protected $lastModified;
/** @var array $extraInfos The extra infos */
protected $extraInfos;
/**
* {@inheritdoc}
*
* @param string $charset {@inheritdoc}
*/
public function setCharset($charset){
$this->charset = $charset;
return $this;
}
/** {@inheritdoc} */
public function getCharset(){
$charset = $this->charset;
return is_null($charset) ? static::DEFAULT_CHARSET : $charset;
}
/**
* Set the content type
*
* @param string $contentType The content type
* @return self The format object
*/
protected function setContentType($contentType){
$this->contentType = $contentType;
return $this;
}
/**
* Set the last modified time
*
* @param int $lastModified The last modified time
* @return void
*/
public function setLastModified($lastModified){
$this->lastModified = $lastModified;
}
/**
* Send header with the currently specified content type
*
* @return void
* @todo This should throw an error if no content type is set
*/
protected function callContentType(){
header('Content-Type: ' . $this->contentType);
}
/** {@inheritdoc} */
public function display(){
if ($this->lastModified) {
header('Last-Modified: ' . gmdate('D, d M Y H:i:s ', $this->lastModified) . 'GMT');
@ -45,12 +99,18 @@ abstract class FormatAbstract implements FormatInterface {
return $this;
}
/**
* {@inheritdoc}
*
* @param array $items {@inheritdoc}
*/
public function setItems(array $items){
$this->items = array_map(array($this, 'array_trim'), $items);
return $this;
}
/** {@inheritdoc} */
public function getItems(){
if(!is_array($this->items))
throw new \LogicException('Feed the ' . get_class($this) . ' with "setItems" method before !');
@ -59,9 +119,9 @@ abstract class FormatAbstract implements FormatInterface {
}
/**
* Define common informations can be required by formats and set default value for unknown values
* @param array $extraInfos array with know informations (there isn't merge !!!)
* @return this
* {@inheritdoc}
*
* @param array $extraInfos {@inheritdoc}
*/
public function setExtraInfos(array $extraInfos = array()){
foreach(array('name', 'uri', 'icon') as $infoName) {
@ -75,10 +135,7 @@ abstract class FormatAbstract implements FormatInterface {
return $this;
}
/**
* Return extra infos
* @return array See "setExtraInfos" detail method to know what extra are disponibles
*/
/** {@inheritdoc} */
public function getExtraInfos(){
if(is_null($this->extraInfos)) { // No extra info ?
$this->setExtraInfos(); // Define with default value
@ -88,12 +145,17 @@ abstract class FormatAbstract implements FormatInterface {
}
/**
* Sanitized html while leaving it functionnal.
* The aim is to keep html as-is (with clickable hyperlinks)
* while reducing annoying and potentially dangerous things.
* Yes, I know sanitizing HTML 100% is an impossible task.
* Maybe we'll switch to http://htmlpurifier.org/
* or http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/index.php
* Sanitize HTML while leaving it functional.
*
* Keeps HTML as-is (with clickable hyperlinks) while reducing annoying and
* potentially dangerous things.
*
* @param string $html The HTML content
* @return string The sanitized HTML content
*
* @todo This belongs into `html.php`
* @todo Maybe switch to http://htmlpurifier.org/
* @todo Maybe switch to http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/index.php
*/
protected function sanitizeHtml($html)
{
@ -104,6 +166,17 @@ abstract class FormatAbstract implements FormatInterface {
return $html;
}
/**
* Trim each element of an array
*
* This function applies `trim()` to all elements in the array, if the element
* is a valid string.
*
* @param array $elements The array to trim
* @return array The trimmed array
*
* @todo This is a utility function that doesn't belong here, find a new home.
*/
protected function array_trim($elements){
foreach($elements as $key => $value) {
if(is_string($value))

View file

@ -1,11 +1,84 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* The format interface
*
* @todo Add missing function to the interface
* @todo Explain parameters and return values in more detail
* @todo Return self more often (to allow call chaining)
*/
interface FormatInterface {
/**
* Generate a string representation of the current data
*
* @return string The string representation
*/
public function stringify();
/**
* Display the current data to the user
*
* @return self The format object
*/
public function display();
/**
* Set items
*
* @param array $bridges The items
* @return self The format object
*
* @todo Rename parameter `$bridges` to `$items`
*/
public function setItems(array $bridges);
/**
* Return items
*
* @throws \LogicException if the items are not set
* @return array The items
*/
public function getItems();
/**
* Set extra information
*
* @param array $infos Extra information
* @return self The format object
*/
public function setExtraInfos(array $infos);
/**
* Return extra information
*
* @return array Extra information
*/
public function getExtraInfos();
/**
* Set charset
*
* @param string $charset The charset
* @return self The format object
*/
public function setCharset($charset);
/**
* Return current charset
*
* @return string The charset
*/
public function getCharset();
}

View file

@ -1,10 +1,35 @@
<?php
/**
* Implements a validator for bridge parameters
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Validator for bridge parameters
*/
class ParameterValidator {
/**
* Holds the list of invalid parameters
*
* @var array
*/
private $invalid = array();
/**
* Add item to list of invalid parameters
*
* @param string $name The name of the parameter
* @param string $reason The reason for that parameter being invalid
* @return void
*/
private function addInvalidParameter($name, $reason){
$this->invalid[] = array(
'name' => $name,
@ -13,13 +38,23 @@ class ParameterValidator {
}
/**
* Returns an array of invalid parameters, where each element is an
* array of 'name' and 'reason'.
* Return list of invalid parameters.
*
* Each element is an array of 'name' and 'reason'.
*
* @return array List of invalid parameters
*/
public function getInvalidParameters() {
return $this->invalid;
}
/**
* Validate value for a text input
*
* @param string $value The value of a text input
* @param string|null $pattern (optional) A regex pattern
* @return string|null The filtered value or null if the value is invalid
*/
private function validateTextValue($value, $pattern = null){
if(!is_null($pattern)) {
$filteredValue = filter_var($value,
@ -38,6 +73,12 @@ class ParameterValidator {
return $filteredValue;
}
/**
* Validate value for a number input
*
* @param int $value The value of a number input
* @return int|null The filtered value or null if the value is invalid
*/
private function validateNumberValue($value){
$filteredValue = filter_var($value, FILTER_VALIDATE_INT);
@ -47,10 +88,23 @@ class ParameterValidator {
return $filteredValue;
}
/**
* Validate value for a checkbox
*
* @param bool $value The value of a checkbox
* @return bool The filtered value
*/
private function validateCheckboxValue($value){
return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
}
/**
* Validate value for a list
*
* @param string $value The value of a list
* @param array $expectedValues A list of expected values
* @return string|null The filtered value or null if the value is invalid
*/
private function validateListValue($value, $expectedValues){
$filteredValue = filter_var($value);
@ -69,9 +123,11 @@ class ParameterValidator {
}
/**
* Checks if all required parameters are supplied by the user
* @param $data An array of parameters provided by the user
* @param $parameters An array of bridge parameters
* Check if all required parameters are satisfied
*
* @param array $data (ref) A list of input values
* @param array $parameters The bridge parameters
* @return bool True if all parameters are satisfied
*/
public function validateData(&$data, $parameters){
@ -122,11 +178,11 @@ class ParameterValidator {
}
/**
* Returns the name of the context matching the provided inputs
* Get the name of the context matching the provided inputs
*
* @param array $data Associative array of user data
* @param array $parameters Array of bridge parameters
* @return mixed Returns the context name or null if no match was found
* @return string|null Returns the context name or null if no match was found
*/
public function getQueriedContext($data, $parameters){
$queriedContexts = array();

View file

@ -1,4 +1,31 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Gets contents from the Internet.
*
* @param string $url The URL.
* @param array $header (optional) A list of cURL header.
* For more information follow the links below.
* * https://php.net/manual/en/function.curl-setopt.php
* * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html
* @param array $opts (optional) A list of cURL options as associative array in
* the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX`
* option and `$value` the corresponding value.
*
* For more information see http://php.net/manual/en/function.curl-setopt.php
* @return string The contents.
*/
function getContents($url, $header = array(), $opts = array()){
Debug::log('Reading contents from "' . $url . '"');
@ -74,6 +101,32 @@ EOD
return substr($data, $headerSize);
}
/**
* Gets contents from the Internet as simplhtmldom object.
*
* @param string $url The URL.
* @param array $header (optional) A list of cURL header.
* For more information follow the links below.
* * https://php.net/manual/en/function.curl-setopt.php
* * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html
* @param array $opts (optional) A list of cURL options as associative array in
* the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX`
* option and `$value` the corresponding value.
*
* For more information see http://php.net/manual/en/function.curl-setopt.php
* @param bool $lowercase Force all selectors to lowercase.
* @param bool $forceTagsClosed Forcefully close tags in malformed HTML.
*
* _Remarks_: Forcefully closing tags is great for malformed HTML, but it can
* lead to parsing errors.
* @param string $target_charset Defines the target charset.
* @param bool $stripRN Replace all occurrences of `"\r"` and `"\n"` by `" "`.
* @param string $defaultBRText Specifies the replacement text for `<br>` tags
* when returning plaintext.
* @param string $defaultSpanText Specifies the replacement text for `<span />`
* tags when returning plaintext.
* @return string Contents as simplehtmldom object.
*/
function getSimpleHTMLDOM($url,
$header = array(),
$opts = array(),
@ -94,10 +147,34 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){
}
/**
* Maintain locally cached versions of pages to avoid multiple downloads.
* @param url url to cache
* @param duration duration of the cache file in seconds (default: 24h/86400s)
* @return content of the file as string
* Gets contents from the Internet as simplhtmldom object. Contents are cached
* and re-used for subsequent calls until the cache duration elapsed.
*
* _Notice_: Cached contents are forcefully removed after 24 hours (86400 seconds).
*
* @param string $url The URL.
* @param int $duration Cache duration in seconds.
* @param array $header (optional) A list of cURL header.
* For more information follow the links below.
* * https://php.net/manual/en/function.curl-setopt.php
* * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html
* @param array $opts (optional) A list of cURL options as associative array in
* the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX`
* option and `$value` the corresponding value.
*
* For more information see http://php.net/manual/en/function.curl-setopt.php
* @param bool $lowercase Force all selectors to lowercase.
* @param bool $forceTagsClosed Forcefully close tags in malformed HTML.
*
* _Remarks_: Forcefully closing tags is great for malformed HTML, but it can
* lead to parsing errors.
* @param string $target_charset Defines the target charset.
* @param bool $stripRN Replace all occurrences of `"\r"` and `"\n"` by `" "`.
* @param string $defaultBRText Specifies the replacement text for `<br>` tags
* when returning plaintext.
* @param string $defaultSpanText Specifies the replacement text for `<span />`
* tags when returning plaintext.
* @return string Contents as simplehtmldom object.
*/
function getSimpleHTMLDOMCached($url,
$duration = 86400,
@ -142,9 +219,12 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){
}
/**
* Parses the provided response header into an associative array
* Parses the cURL response header into an associative array
*
* Based on https://stackoverflow.com/a/18682872
*
* @param string $header The cURL response header.
* @return array An associative array of response headers.
*/
function parseResponseHeader($header) {
@ -177,10 +257,18 @@ function parseResponseHeader($header) {
}
/**
* Determine MIME type from URL/Path file extension
* Remark: Built-in functions mime_content_type or fileinfo requires fetching remote content
* Remark: A bridge can hint for a MIME type by appending #.ext to a URL, e.g. #.image
* Determines the MIME type from a URL/Path file extension.
*
* _Remarks_:
*
* * The built-in functions `mime_content_type` and `fileinfo` require fetching
* remote contents.
* * A caller can hint for a MIME type by appending `#.ext` to the URL (i.e. `#.image`).
*
* Based on https://stackoverflow.com/a/1147952
*
* @param string $url The URL or path to the file.
* @return string The MIME type of the file.
*/
function getMimeType($url) {
static $mime = null;

View file

@ -1,12 +1,43 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Throws an exception when called.
*
* @throws \HttpException when called
* @param string $message The error message
* @param int $code The HTTP error code
* @link https://en.wikipedia.org/wiki/List_of_HTTP_status_codes List of HTTP
* status codes
*/
function returnError($message, $code){
throw new \HttpException($message, $code);
}
/**
* Returns HTTP Error 400 (Bad Request) when called.
*
* @param string $message The error message
*/
function returnClientError($message){
returnError($message, 400);
}
/**
* Returns HTTP Error 500 (Internal Server Error) when called.
*
* @param string $message The error message
*/
function returnServerError($message){
returnError($message, 500);
}

View file

@ -1,4 +1,34 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Removes unwanted tags from a given HTML text.
*
* @param string $textToSanitize The HTML text to sanitize.
* @param array $removedTags A list of tags to remove from the DOM.
* @param array $keptAttributes A list of attributes to keep on tags (other
* attributes are removed).
* @param array $keptText A list of tags where the innertext replaces the tag
* (i.e. `<p>Hello World!</p>` becomes `Hello World!`).
* @return object A simplehtmldom object of the remaining contents.
*
* @todo Check if this implementation is still necessary, because simplehtmldom
* already removes some of the tags (search for `remove_noise` in simple_html_dom.php).
* @todo Rename parameters to make more sense. `$textToSanitize` must be HTML,
* `$removedTags`, `$keptAttributes` and `$keptText` are past tense.
* @todo Clarify the meaning of `*[!b38fd2b1fe7f4747d6b1c1254ccd055e]`, which
* looks like a SHA1 hash (does simplehtmldom not support `find('*')`?).
*/
function sanitize($textToSanitize,
$removedTags = array('script', 'iframe', 'input', 'form'),
$keptAttributes = array('title', 'href', 'src'),
@ -21,6 +51,35 @@ $keptText = array()){
return $htmlContent;
}
/**
* Replace background by image
*
* Replaces tags with styles of `backgroud-image` by `<img />` tags.
*
* For example:
*
* ```HTML
* <html>
* <body style="background-image: url('bgimage.jpg');">
* <h1>Hello world!</h1>
* </body>
* </html>
* ```
*
* results in this output:
*
* ```HTML
* <html>
* <img style="display:block;" src="bgimage.jpg" />
* </html>
* ```
*
* @param string $htmlContent The HTML content
* @return string The HTML content with all ocurrences replaced
*
* @todo Clarify the meaning of `*[!b38fd2b1fe7f4747d6b1c1254ccd055e]`, which
* looks like a SHA1 hash (does simplehtmldom not support `find('*')`?).
*/
function backgroundToImg($htmlContent) {
$regex = '/background-image[ ]{0,}:[ ]{0,}url\([\'"]{0,}(.*?)[\'"]{0,}\)/';
@ -42,9 +101,17 @@ function backgroundToImg($htmlContent) {
/**
* Convert relative links in HTML into absolute links
* @param $content HTML content to fix. Supports HTML objects or string objects
* @param $server full URL to the page containing relative links
* @return content with fixed URLs, as HTML object or string depending on input type
*
* This function is based on `php-urljoin`.
*
* @link https://github.com/plaidfluff/php-urljoin php-urljoin
*
* @param string|object $content The HTML content. Supports HTML objects or string objects
* @param string $server Fully qualified URL to the page containing relative links
* @return object Content with fixed URLs.
*
* @todo If the input type was a string, this function should return a string as
* well. This is currently done implicitly by how the simplehtmldom object works.
*/
function defaultLinkTo($content, $server){
$string_convert = false;
@ -70,10 +137,14 @@ function defaultLinkTo($content, $server){
/**
* Extract the first part of a string matching the specified start and end delimiters
* @param $string input string, e.g. '<div>Post author: John Doe</div>'
* @param $start start delimiter, e.g. 'author: '
* @param $end end delimiter, e.g. '<'
* @return extracted string, e.g. 'John Doe', or false if the delimiters were not found.
*
* @param string $string Input string, e.g. `<div>Post author: John Doe</div>`
* @param string $start Start delimiter, e.g. `author: `
* @param string $end End delimiter, e.g. `<`
* @return string|bool Extracted string, e.g. `John Doe`, or false if the
* delimiters were not found.
*
* @todo This function can possibly be simplified to use a single `substr` command.
*/
function extractFromDelimiters($string, $start, $end) {
if (strpos($string, $start) !== false) {
@ -85,10 +156,13 @@ function extractFromDelimiters($string, $start, $end) {
/**
* Remove one or more part(s) of a string using a start and end delmiters
* @param $string input string, e.g. 'foo<script>superscript()</script>bar'
* @param $start start delimiter, e.g. '<script'
* @param $end end delimiter, e.g. '</script>'
* @return cleaned string, e.g. 'foobar'
*
* @param string $string Input string, e.g. `foo<script>superscript()</script>bar`
* @param string $start Start delimiter, e.g. `<script`
* @param string $end End delimiter, e.g. `</script>`
* @return string Cleaned string, e.g. `foobar`
*
* @todo This function can possibly be simplified to use a single `substr` command.
*/
function stripWithDelimiters($string, $start, $end) {
while(strpos($string, $start) !== false) {
@ -101,10 +175,13 @@ function stripWithDelimiters($string, $start, $end) {
/**
* Remove HTML sections containing one or more sections using the same HTML tag
* @param $string input string, e.g. 'foo<div class="ads"><div>ads</div>ads</div>bar'
* @param $tag_name name of the HTML tag, e.g. 'div'
* @param $tag_start start of the HTML tag to remove, e.g. '<div class="ads">'
* @return cleaned string, e.g. 'foobar'
*
* @param string $string Input string, e.g. `foo<div class="ads"><div>ads</div>ads</div>bar`
* @param string $tag_name Name of the HTML tag, e.g. `div`
* @param string $tag_start Start of the HTML tag to remove, e.g. `<div class="ads">`
* @return string Cleaned String, e.g. `foobar`
*
* @todo This function needs more documentation to make it maintainable.
*/
function stripRecursiveHTMLSection($string, $tag_name, $tag_start){
$open_tag = '<' . $tag_name;
@ -131,9 +208,13 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){
}
/**
* Convert Markdown tags into HTML tags. Only a subset of the Markdown syntax is implemented.
* @param $string input string in Markdown format
* @return output string in HTML format
* Convert Markdown into HTML. Only a subset of the Markdown syntax is implemented.
*
* @link https://daringfireball.net/projects/markdown/ Markdown
* @link https://github.github.com/gfm/ GitHub Flavored Markdown Spec
*
* @param string $string Input string in Markdown format
* @return string output string in HTML format
*/
function markdownToHtml($string) {