mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-25 19:06:23 +03:00
e027bd9274
* fix: improve FeedExpander Include the first libxml error in exception. Give better error message if trying to parse the empty string. Log all libxml errors if debug mode is enabled. * error handling and logging tweak
576 lines
16 KiB
PHP
576 lines
16 KiB
PHP
<?php
|
|
|
|
/**
|
|
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
|
* Atom feeds for websites that don't have one.
|
|
*
|
|
* For the full license information, please view the UNLICENSE file distributed
|
|
* with this source code.
|
|
*
|
|
* @package Core
|
|
* @license http://unlicense.org/ UNLICENSE
|
|
* @link https://github.com/rss-bridge/rss-bridge
|
|
*/
|
|
|
|
/**
|
|
* Represents a simple feed item for transformation into various feed formats.
|
|
*
|
|
* This class represents a feed item. A feed item is an entity that can be
|
|
* transformed into various feed formats. It holds a set of pre-defined
|
|
* properties:
|
|
*
|
|
* - **URI**: URI to the full article (i.e. "https://...")
|
|
* - **Title**: The title
|
|
* - **Timestamp**: A timestamp of when the item was first released
|
|
* - **Author**: Name of the author
|
|
* - **Content**: Body of the feed, as text or HTML
|
|
* - **Enclosures**: A list of links to media objects (images, videos, etc...)
|
|
* - **Categories**: A list of category names or tags to categorize the item
|
|
*
|
|
* _Note_: A feed item can have any number of additional parameters, all of which
|
|
* may or may not be transformed to the selected output format.
|
|
*
|
|
* _Remarks_: This class supports legacy items via {@see FeedItem::__construct()}
|
|
* (i.e. `$feedItem = \FeedItem($item);`). Support for legacy items may be removed
|
|
* in future versions of RSS-Bridge.
|
|
*/
|
|
class FeedItem
|
|
{
|
|
/** @var string|null URI to the full article */
|
|
protected $uri = null;
|
|
|
|
/** @var string|null Title of the item */
|
|
protected $title = null;
|
|
|
|
/** @var int|null Timestamp of when the item was first released */
|
|
protected $timestamp = null;
|
|
|
|
/** @var string|null Name of the author */
|
|
protected $author = null;
|
|
|
|
/** @var string|null Body of the feed */
|
|
protected $content = null;
|
|
|
|
/** @var array List of links to media objects */
|
|
protected $enclosures = [];
|
|
|
|
/** @var array List of category names or tags */
|
|
protected $categories = [];
|
|
|
|
/** @var string Unique ID for the current item */
|
|
protected $uid = null;
|
|
|
|
/** @var array Associative list of additional parameters */
|
|
protected $misc = []; // Custom parameters
|
|
|
|
/**
|
|
* Create object from legacy item.
|
|
*
|
|
* The provided array must be an associative array of key-value-pairs, where
|
|
* keys may correspond to any of the properties of this class.
|
|
*
|
|
* Example use:
|
|
*
|
|
* ```PHP
|
|
* <?php
|
|
* $item = array();
|
|
*
|
|
* $item['uri'] = 'https://www.github.com/rss-bridge/rss-bridge/';
|
|
* $item['title'] = 'Title';
|
|
* $item['timestamp'] = strtotime('now');
|
|
* $item['author'] = 'Unknown author';
|
|
* $item['content'] = 'Hello World!';
|
|
* $item['enclosures'] = array('https://github.com/favicon.ico');
|
|
* $item['categories'] = array('php', 'rss-bridge', 'awesome');
|
|
*
|
|
* $feedItem = new \FeedItem($item);
|
|
*
|
|
* ```
|
|
*
|
|
* The result of the code above is the same as the code below:
|
|
*
|
|
* ```PHP
|
|
* <?php
|
|
* $feedItem = \FeedItem();
|
|
*
|
|
* $feedItem->uri = 'https://www.github.com/rss-bridge/rss-bridge/';
|
|
* $feedItem->title = 'Title';
|
|
* $feedItem->timestamp = strtotime('now');
|
|
* $feedItem->autor = 'Unknown author';
|
|
* $feedItem->content = 'Hello World!';
|
|
* $feedItem->enclosures = array('https://github.com/favicon.ico');
|
|
* $feedItem->categories = array('php', 'rss-bridge', 'awesome');
|
|
* ```
|
|
*
|
|
* @param array $item (optional) A legacy item (empty: no legacy support).
|
|
* @return object A new object of this class
|
|
*/
|
|
public function __construct($item = [])
|
|
{
|
|
if (!is_array($item)) {
|
|
Debug::log('Item must be an array!');
|
|
}
|
|
|
|
foreach ($item as $key => $value) {
|
|
$this->__set($key, $value);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get current URI.
|
|
*
|
|
* Use {@see FeedItem::setURI()} to set the URI.
|
|
*
|
|
* @return string|null The URI or null if it hasn't been set.
|
|
*/
|
|
public function getURI()
|
|
{
|
|
return $this->uri;
|
|
}
|
|
|
|
/**
|
|
* Set URI to the full article.
|
|
*
|
|
* Use {@see FeedItem::getURI()} to get the URI.
|
|
*
|
|
* _Note_: Removes whitespace from the beginning and end of the URI.
|
|
*
|
|
* _Remarks_: Uses the attribute "href" or "src" if the provided URI is an
|
|
* object of simple_html_dom_node.
|
|
*
|
|
* @param object|string $uri URI to the full article.
|
|
* @return self
|
|
*/
|
|
public function setURI($uri)
|
|
{
|
|
$this->uri = null; // Clear previous data
|
|
|
|
if ($uri instanceof simple_html_dom_node) {
|
|
if ($uri->hasAttribute('href')) { // Anchor
|
|
$uri = $uri->href;
|
|
} elseif ($uri->hasAttribute('src')) { // Image
|
|
$uri = $uri->src;
|
|
} else {
|
|
Debug::log('The item provided as URI is unknown!');
|
|
}
|
|
}
|
|
if (!is_string($uri)) {
|
|
Debug::log(sprintf('Expected $uri to be string but got %s', gettype($uri)));
|
|
return $this;
|
|
}
|
|
$uri = trim($uri);
|
|
// Intentionally doing a weak url validation here because FILTER_VALIDATE_URL is too strict
|
|
if (!preg_match('#^https?://#i', $uri)) {
|
|
Debug::log(sprintf('Not a valid url: "%s"', $uri));
|
|
return $this;
|
|
}
|
|
$this->uri = $uri;
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get current title.
|
|
*
|
|
* Use {@see FeedItem::setTitle()} to set the title.
|
|
*
|
|
* @return string|null The current title or null if it hasn't been set.
|
|
*/
|
|
public function getTitle()
|
|
{
|
|
return $this->title;
|
|
}
|
|
|
|
/**
|
|
* Set title.
|
|
*
|
|
* Use {@see FeedItem::getTitle()} to get the title.
|
|
*
|
|
* _Note_: Removes whitespace from beginning and end of the title.
|
|
*
|
|
* @param string $title The title
|
|
* @return self
|
|
*/
|
|
public function setTitle($title)
|
|
{
|
|
$this->title = null; // Clear previous data
|
|
|
|
if (!is_string($title)) {
|
|
Debug::log('Title must be a string!');
|
|
} else {
|
|
$this->title = truncate(trim($title));
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get current timestamp.
|
|
*
|
|
* Use {@see FeedItem::setTimestamp()} to set the timestamp.
|
|
*
|
|
* @return int|null The current timestamp or null if it hasn't been set.
|
|
*/
|
|
public function getTimestamp()
|
|
{
|
|
return $this->timestamp;
|
|
}
|
|
|
|
/**
|
|
* Set timestamp of first release.
|
|
*
|
|
* _Note_: The timestamp should represent the number of seconds since
|
|
* January 1 1970 00:00:00 GMT (Unix time).
|
|
*
|
|
* _Remarks_: If the provided timestamp is a string (not numeric), this
|
|
* function automatically attempts to parse the string using
|
|
* [strtotime](http://php.net/manual/en/function.strtotime.php)
|
|
*
|
|
* @link http://php.net/manual/en/function.strtotime.php strtotime (PHP)
|
|
* @link https://en.wikipedia.org/wiki/Unix_time Unix time (Wikipedia)
|
|
*
|
|
* @param string|int $timestamp A timestamp of when the item was first released
|
|
* @return self
|
|
*/
|
|
public function setTimestamp($timestamp)
|
|
{
|
|
$this->timestamp = null; // Clear previous data
|
|
|
|
if (
|
|
!is_numeric($timestamp)
|
|
&& !$timestamp = strtotime($timestamp)
|
|
) {
|
|
Debug::log('Unable to parse timestamp!');
|
|
}
|
|
|
|
if ($timestamp <= 0) {
|
|
Debug::log('Timestamp must be greater than zero!');
|
|
} else {
|
|
$this->timestamp = $timestamp;
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get the current author name.
|
|
*
|
|
* Use {@see FeedItem::setAuthor()} to set the author.
|
|
*
|
|
* @return string|null The author or null if it hasn't been set.
|
|
*/
|
|
public function getAuthor()
|
|
{
|
|
return $this->author;
|
|
}
|
|
|
|
/**
|
|
* Set the author name.
|
|
*
|
|
* Use {@see FeedItem::getAuthor()} to get the author.
|
|
*
|
|
* @param string $author The author name.
|
|
* @return self
|
|
*/
|
|
public function setAuthor($author)
|
|
{
|
|
$this->author = null; // Clear previous data
|
|
|
|
if (!is_string($author)) {
|
|
Debug::log('Author must be a string!');
|
|
} else {
|
|
$this->author = $author;
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get item content.
|
|
*
|
|
* Use {@see FeedItem::setContent()} to set the item content.
|
|
*
|
|
* @return string|null The item content or null if it hasn't been set.
|
|
*/
|
|
public function getContent()
|
|
{
|
|
return $this->content;
|
|
}
|
|
|
|
/**
|
|
* Set item content.
|
|
*
|
|
* Note: This function casts objects of type simple_html_dom and
|
|
* simple_html_dom_node to string.
|
|
*
|
|
* Use {@see FeedItem::getContent()} to get the current item content.
|
|
*
|
|
* @param string|object $content The item content as text or simple_html_dom object.
|
|
* @return self
|
|
*/
|
|
public function setContent($content)
|
|
{
|
|
$this->content = null; // Clear previous data
|
|
|
|
if (
|
|
$content instanceof simple_html_dom
|
|
|| $content instanceof simple_html_dom_node
|
|
) {
|
|
$content = (string)$content;
|
|
}
|
|
|
|
if (is_string($content)) {
|
|
$this->content = $content;
|
|
} else {
|
|
Debug::log(sprintf('Feed content must be a string but got %s', gettype($content)));
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get item enclosures.
|
|
*
|
|
* Use {@see FeedItem::setEnclosures()} to set feed enclosures.
|
|
*
|
|
* @return array Enclosures as array of enclosure URIs.
|
|
*/
|
|
public function getEnclosures()
|
|
{
|
|
return $this->enclosures;
|
|
}
|
|
|
|
/**
|
|
* Set item enclosures.
|
|
*
|
|
* Use {@see FeedItem::getEnclosures()} to get the current item enclosures.
|
|
*
|
|
* @param array $enclosures Array of enclosures, where each element links to
|
|
* one enclosure.
|
|
* @return self
|
|
*/
|
|
public function setEnclosures($enclosures)
|
|
{
|
|
$this->enclosures = [];
|
|
|
|
if (is_array($enclosures)) {
|
|
foreach ($enclosures as $enclosure) {
|
|
if (
|
|
!filter_var(
|
|
$enclosure,
|
|
FILTER_VALIDATE_URL,
|
|
FILTER_FLAG_PATH_REQUIRED
|
|
)
|
|
) {
|
|
Debug::log('Each enclosure must contain a scheme, host and path!');
|
|
} elseif (!in_array($enclosure, $this->enclosures)) {
|
|
$this->enclosures[] = $enclosure;
|
|
}
|
|
}
|
|
} else {
|
|
Debug::log('Enclosures must be an array!');
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get item categories.
|
|
*
|
|
* Use {@see FeedItem::setCategories()} to set item categories.
|
|
*
|
|
* @param array The item categories.
|
|
*/
|
|
public function getCategories()
|
|
{
|
|
return $this->categories;
|
|
}
|
|
|
|
/**
|
|
* Set item categories.
|
|
*
|
|
* Use {@see FeedItem::getCategories()} to get the current item categories.
|
|
*
|
|
* @param array $categories Array of categories, where each element defines
|
|
* a single category name.
|
|
* @return self
|
|
*/
|
|
public function setCategories($categories)
|
|
{
|
|
$this->categories = [];
|
|
|
|
if (is_array($categories)) {
|
|
foreach ($categories as $category) {
|
|
if (!is_string($category)) {
|
|
Debug::log('Category must be a string!');
|
|
} else {
|
|
$this->categories[] = $category;
|
|
}
|
|
}
|
|
} else {
|
|
Debug::log('Categories must be an array!');
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get unique id
|
|
*
|
|
* Use {@see FeedItem::setUid()} to set the unique id.
|
|
*
|
|
* @param string The unique id.
|
|
*/
|
|
public function getUid()
|
|
{
|
|
return $this->uid;
|
|
}
|
|
|
|
/**
|
|
* Set unique id.
|
|
*
|
|
* Use {@see FeedItem::getUid()} to get the unique id.
|
|
*
|
|
* @param string $uid A string that uniquely identifies the current item
|
|
* @return self
|
|
*/
|
|
public function setUid($uid)
|
|
{
|
|
$this->uid = null; // Clear previous data
|
|
|
|
if (!is_string($uid)) {
|
|
Debug::log('Unique id must be a string!');
|
|
} elseif (preg_match('/^[a-f0-9]{40}$/', $uid)) {
|
|
// keep id if it already is a SHA-1 hash
|
|
$this->uid = $uid;
|
|
} else {
|
|
$this->uid = sha1($uid);
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Add miscellaneous elements to the item.
|
|
*
|
|
* @param string $key Name of the element.
|
|
* @param mixed $value Value of the element.
|
|
* @return self
|
|
*/
|
|
public function addMisc($key, $value)
|
|
{
|
|
if (!is_string($key)) {
|
|
Debug::log('Key must be a string!');
|
|
} elseif (in_array($key, get_object_vars($this))) {
|
|
Debug::log('Key must be unique!');
|
|
} else {
|
|
$this->misc[$key] = $value;
|
|
}
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Transform current object to array
|
|
*
|
|
* @return array
|
|
*/
|
|
public function toArray()
|
|
{
|
|
return array_merge(
|
|
[
|
|
'uri' => $this->uri,
|
|
'title' => $this->title,
|
|
'timestamp' => $this->timestamp,
|
|
'author' => $this->author,
|
|
'content' => $this->content,
|
|
'enclosures' => $this->enclosures,
|
|
'categories' => $this->categories,
|
|
'uid' => $this->uid,
|
|
],
|
|
$this->misc
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Set item property
|
|
*
|
|
* Allows simple assignment to parameters. This method is slower, but easier
|
|
* to implement in some cases:
|
|
*
|
|
* ```PHP
|
|
* $item = new \FeedItem();
|
|
* $item->content = 'Hello World!';
|
|
* $item->my_id = 42;
|
|
* ```
|
|
*
|
|
* @param string $name Property name
|
|
* @param mixed $value Property value
|
|
*/
|
|
public function __set($name, $value)
|
|
{
|
|
switch ($name) {
|
|
case 'uri':
|
|
$this->setURI($value);
|
|
break;
|
|
case 'title':
|
|
$this->setTitle($value);
|
|
break;
|
|
case 'timestamp':
|
|
$this->setTimestamp($value);
|
|
break;
|
|
case 'author':
|
|
$this->setAuthor($value);
|
|
break;
|
|
case 'content':
|
|
$this->setContent($value);
|
|
break;
|
|
case 'enclosures':
|
|
$this->setEnclosures($value);
|
|
break;
|
|
case 'categories':
|
|
$this->setCategories($value);
|
|
break;
|
|
case 'uid':
|
|
$this->setUid($value);
|
|
break;
|
|
default:
|
|
$this->addMisc($name, $value);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get item property
|
|
*
|
|
* Allows simple assignment to parameters. This method is slower, but easier
|
|
* to implement in some cases.
|
|
*
|
|
* @param string $name Property name
|
|
* @return mixed Property value
|
|
*/
|
|
public function __get($name)
|
|
{
|
|
switch ($name) {
|
|
case 'uri':
|
|
return $this->getURI();
|
|
case 'title':
|
|
return $this->getTitle();
|
|
case 'timestamp':
|
|
return $this->getTimestamp();
|
|
case 'author':
|
|
return $this->getAuthor();
|
|
case 'content':
|
|
return $this->getContent();
|
|
case 'enclosures':
|
|
return $this->getEnclosures();
|
|
case 'categories':
|
|
return $this->getCategories();
|
|
case 'uid':
|
|
return $this->getUid();
|
|
default:
|
|
if (array_key_exists($name, $this->misc)) {
|
|
return $this->misc[$name];
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
}
|