rss-bridge/formats/MrssFormat.php
Jan Tojnar 1af6cbeb1e
[XML formats] Ensure elements are connected to DOM before further manipulation (#2806)
We are setting xmlns attributes at the root element but PHP would
still attach redundant ones to the DOM elements created with `createElementNS`.
That was because PHP reconciles namespace attributes when appending elements to DOM
but since we previously only attached the elements after all children were attached,
the reconciliation algorithm was not able to see the root element’s attributes.

To fix this, let’s attach each element to its parent immediately after it is created.
2022-06-09 18:33:23 +02:00

169 lines
6.3 KiB
PHP

<?php
/**
* MrssFormat - RSS 2.0 + Media RSS
* http://www.rssboard.org/rss-specification
* http://www.rssboard.org/media-rss
*
* Validators:
* https://validator.w3.org/feed/
* http://www.rssboard.org/rss-validator/
*
* Notes about the implementation:
*
* - The item author is not supported as it needs to be an e-mail address to be
* valid.
* - The RSS specification does not explicitly allow to have more than one
* enclosure as every item is meant to provide one "story", thus having
* multiple enclosures per item may lead to unexpected behavior.
* On top of that, it requires to have a length specified, which RSS-Bridge
* can't provide.
* - The Media RSS extension comes in handy, since it allows to have multiple
* enclosures, even though they recommend to have only one enclosure because
* of the one-story-per-item reason. It only requires to specify the URL,
* everything else is optional.
* - Since the Media RSS extension has its own namespace, the output is a valid
* RSS 2.0 feed that works with feed readers that don't support the extension.
*/
class MrssFormat extends FormatAbstract {
const MIME_TYPE = 'application/rss+xml';
protected const ATOM_NS = 'http://www.w3.org/2005/Atom';
protected const MRSS_NS = 'http://search.yahoo.com/mrss/';
const ALLOWED_IMAGE_EXT = array(
'.gif', '.jpg', '.png'
);
public function stringify(){
$urlPrefix = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on') ? 'https://' : 'http://';
$urlHost = (isset($_SERVER['HTTP_HOST'])) ? $_SERVER['HTTP_HOST'] : '';
$urlPath = (isset($_SERVER['PATH_INFO'])) ? $_SERVER['PATH_INFO'] : '';
$urlRequest = (isset($_SERVER['REQUEST_URI'])) ? $_SERVER['REQUEST_URI'] : '';
$feedUrl = $urlPrefix . $urlHost . $urlRequest;
$extraInfos = $this->getExtraInfos();
$uri = !empty($extraInfos['uri']) ? $extraInfos['uri'] : REPOSITORY;
$document = new DomDocument('1.0', $this->getCharset());
$document->formatOutput = true;
$feed = $document->createElement('rss');
$document->appendChild($feed);
$feed->setAttribute('version', '2.0');
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:atom', self::ATOM_NS);
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS);
$channel = $document->createElement('channel');
$feed->appendChild($channel);
$title = $extraInfos['name'];
$channelTitle = $document->createElement('title');
$channel->appendChild($channelTitle);
$channelTitle->appendChild($document->createTextNode($title));
$link = $document->createElement('link');
$channel->appendChild($link);
$link->appendChild($document->createTextNode($uri));
$description = $document->createElement('description');
$channel->appendChild($description);
$description->appendChild($document->createTextNode($extraInfos['name']));
$icon = $extraInfos['icon'];
if (!empty($icon) && in_array(substr($icon, -4), self::ALLOWED_IMAGE_EXT)) {
$feedImage = $document->createElement('image');
$channel->appendChild($feedImage);
$iconUrl = $document->createElement('url');
$iconUrl->appendChild($document->createTextNode($icon));
$feedImage->appendChild($iconUrl);
$iconTitle = $document->createElement('title');
$iconTitle->appendChild($document->createTextNode($title));
$feedImage->appendChild($iconTitle);
$iconLink = $document->createElement('link');
$iconLink->appendChild($document->createTextNode($uri));
$feedImage->appendChild($iconLink);
}
$linkAlternate = $document->createElementNS(self::ATOM_NS, 'link');
$channel->appendChild($linkAlternate);
$linkAlternate->setAttribute('rel', 'alternate');
$linkAlternate->setAttribute('type', 'text/html');
$linkAlternate->setAttribute('href', $uri);
$linkSelf = $document->createElementNS(self::ATOM_NS, 'link');
$channel->appendChild($linkSelf);
$linkSelf->setAttribute('rel', 'self');
$linkSelf->setAttribute('type', 'application/atom+xml');
$linkSelf->setAttribute('href', $feedUrl);
foreach($this->getItems() as $item) {
$itemTimestamp = $item->getTimestamp();
$itemTitle = $item->getTitle();
$itemUri = $item->getURI();
$itemContent = $this->sanitizeHtml($item->getContent());
$entryID = $item->getUid();
$isPermaLink = 'false';
if (empty($entryID) && !empty($itemUri)) { // Fallback to provided URI
$entryID = $itemUri;
$isPermaLink = 'true';
}
if (empty($entryID)) // Fallback to title and content
$entryID = hash('sha1', $itemTitle . $itemContent);
$entry = $document->createElement('item');
$channel->appendChild($entry);
if (!empty($itemTitle)) {
$entryTitle = $document->createElement('title');
$entry->appendChild($entryTitle);
$entryTitle->appendChild($document->createTextNode($itemTitle));
}
if (!empty($itemUri)) {
$entryLink = $document->createElement('link');
$entry->appendChild($entryLink);
$entryLink->appendChild($document->createTextNode($itemUri));
}
$entryGuid = $document->createElement('guid');
$entryGuid->setAttribute('isPermaLink', $isPermaLink);
$entry->appendChild($entryGuid);
$entryGuid->appendChild($document->createTextNode($entryID));
if (!empty($itemTimestamp)) {
$entryPublished = $document->createElement('pubDate');
$entry->appendChild($entryPublished);
$entryPublished->appendChild($document->createTextNode(gmdate(DATE_RFC2822, $itemTimestamp)));
}
if (!empty($itemContent)) {
$entryDescription = $document->createElement('description');
$entry->appendChild($entryDescription);
$entryDescription->appendChild($document->createTextNode($itemContent));
}
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosure = $document->createElementNS(self::MRSS_NS, 'content');
$entry->appendChild($entryEnclosure);
$entryEnclosure->setAttribute('url', $enclosure);
$entryEnclosure->setAttribute('type', getMimeType($enclosure));
}
$entryCategories = '';
foreach($item->getCategories() as $category) {
$entryCategory = $document->createElement('category');
$entry->appendChild($entryCategory);
$entryCategory->appendChild($document->createTextNode($category));
}
}
$toReturn = $document->saveXML();
// Remove invalid non-UTF8 characters
ini_set('mbstring.substitute_character', 'none');
$toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8');
return $toReturn;
}
}