[{Atom,Mrss}Format]: Generate using DomDocument (#2771)

* [AtomFormat]: Generate using DomDocument

This will escape the HTML content for us as needed.

* [MrssFormat]: Generate using DomDocument

This will escape the HTML content for us as needed.
This commit is contained in:
Jan Tojnar 2022-06-07 23:22:03 +02:00 committed by GitHub
parent fb501652d5
commit 90d22f0d80
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 198 additions and 162 deletions

View file

@ -9,6 +9,9 @@
class AtomFormat extends FormatAbstract{
const MIME_TYPE = 'application/atom+xml';
protected const ATOM_NS = 'http://www.w3.org/2005/Atom';
protected const MRSS_NS = 'http://search.yahoo.com/mrss/';
const LIMIT_TITLE = 140;
public function stringify(){
@ -17,26 +20,66 @@ class AtomFormat extends FormatAbstract{
$urlPath = (isset($_SERVER['PATH_INFO'])) ? $_SERVER['PATH_INFO'] : '';
$urlRequest = (isset($_SERVER['REQUEST_URI'])) ? $_SERVER['REQUEST_URI'] : '';
$feedUrl = $this->xml_encode($urlPrefix . $urlHost . $urlRequest);
$feedUrl = $urlPrefix . $urlHost . $urlRequest;
$extraInfos = $this->getExtraInfos();
$title = $this->xml_encode($extraInfos['name']);
$uri = !empty($extraInfos['uri']) ? $extraInfos['uri'] : REPOSITORY;
$document = new DomDocument('1.0', $this->getCharset());
$document->formatOutput = true;
$feed = $document->createElementNS(self::ATOM_NS, 'feed');
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS);
$document->appendChild($feed);
$title = $document->createElement('title');
$title->setAttribute('type', 'text');
$title->appendChild($document->createTextNode($extraInfos['name']));
$feed->appendChild($title);
$id = $document->createElement('id');
$id->appendChild($document->createTextNode($feedUrl));
$feed->appendChild($id);
$uriparts = parse_url($uri);
if(!empty($extraInfos['icon'])) {
$iconUrl = $extraInfos['icon'];
} else {
$iconUrl = $uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico';
}
$icon = $document->createElement('icon');
$icon->appendChild($document->createTextNode($iconUrl));
$feed->appendChild($icon);
$logo = $document->createElement('logo');
$logo->appendChild($document->createTextNode($iconUrl));
$feed->appendChild($logo);
$feedTimestamp = gmdate(DATE_ATOM, $this->lastModified);
$updated = $document->createElement('updated');
$updated->appendChild($document->createTextNode($feedTimestamp));
$feed->appendChild($updated);
// since we can't guarantee that all items have an author,
// a global feed author is mandatory
$feedAuthor = 'RSS-Bridge';
$author = $document->createElement('author');
$authorName = $document->createElement('name');
$authorName->appendChild($document->createTextNode($feedAuthor));
$author->appendChild($authorName);
$feed->appendChild($author);
$uriparts = parse_url($uri);
if(!empty($extraInfos['icon'])) {
$icon = $extraInfos['icon'];
} else {
$icon = $this->xml_encode($uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico');
}
$linkAlternate = $document->createElement('link');
$linkAlternate->setAttribute('rel', 'alternate');
$linkAlternate->setAttribute('type', 'text/html');
$linkAlternate->setAttribute('href', $uri);
$feed->appendChild($linkAlternate);
$uri = $this->xml_encode($uri);
$linkSelf = $document->createElement('link');
$linkSelf->setAttribute('rel', 'self');
$linkSelf->setAttribute('type', 'application/atom+xml');
$linkSelf->setAttribute('href', $feedUrl);
$feed->appendChild($linkSelf);
$entries = '';
foreach($this->getItems() as $item) {
$entryTimestamp = $item->getTimestamp();
$entryTitle = $item->getTitle();
@ -48,7 +91,7 @@ class AtomFormat extends FormatAbstract{
$entryID = 'urn:sha1:' . $item->getUid();
if (empty($entryID)) // Fallback to provided URI
$entryID = $this->xml_encode($entryUri);
$entryID = $entryUri;
if (empty($entryID)) // Fallback to title and content
$entryID = 'urn:sha1:' . hash('sha1', $entryTitle . $entryContent);
@ -67,96 +110,75 @@ class AtomFormat extends FormatAbstract{
if (empty($entryContent))
$entryContent = ' ';
$entryAuthor = '';
if ($item->getAuthor()) {
$entryAuthor = $this->xml_encode($item->getAuthor());
}
$entry = $document->createElement('entry');
$entryTitle = $this->xml_encode($entryTitle);
$entryUri = $this->xml_encode($entryUri);
$entryTimestamp = $this->xml_encode(gmdate(DATE_ATOM, $entryTimestamp));
$entryContent = $this->xml_encode($this->sanitizeHtml($entryContent));
$title = $document->createElement('title');
$title->setAttribute('type', 'html');
$title->appendChild($document->createTextNode($entryTitle));
$entry->appendChild($title);
$entryEnclosures = '';
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<link rel="enclosure" href="'
. $this->xml_encode($enclosure)
. '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
}
$entryTimestamp = gmdate(DATE_ATOM, $entryTimestamp);
$published = $document->createElement('published');
$published->appendChild($document->createTextNode($entryTimestamp));
$entry->appendChild($published);
$entryCategories = '';
foreach($item->getCategories() as $category) {
$entryCategories .= '<category term="'
. $this->xml_encode($category)
. '"/>'
. PHP_EOL;
}
$updated = $document->createElement('updated');
$updated->appendChild($document->createTextNode($entryTimestamp));
$entry->appendChild($updated);
$entryThumbnail = $item->thumbnail;
if (!empty($entryThumbnail))
$entryThumbnail = '<media:thumbnail url="' . $this->xml_encode($entryThumbnail) . '"/>';
$id = $document->createElement('id');
$id->appendChild($document->createTextNode($entryID));
$entry->appendChild($id);
$entryLinkAlternate = '';
if (!empty($entryUri)) {
$entryLinkAlternate = '<link rel="alternate" type="text/html" href="'
. $entryUri
. '"/>';
$entryLinkAlternate = $document->createElement('link');
$entryLinkAlternate->setAttribute('rel', 'alternate');
$entryLinkAlternate->setAttribute('type', 'text/html');
$entryLinkAlternate->setAttribute('href', $entryUri);
$entry->appendChild($entryLinkAlternate);
}
if (!empty($entryAuthor)) {
$entryAuthor = '<author><name>'
. $entryAuthor
. '</name></author>';
if (!empty($item->getAuthor())) {
$author = $document->createElement('author');
$authorName = $document->createElement('name');
$authorName->appendChild($document->createTextNode($item->getAuthor()));
$author->appendChild($authorName);
$entry->appendChild($author);
}
$entries .= <<<EOD
$content = $document->createElement('content');
$content->setAttribute('type', 'html');
$content->appendChild($document->createTextNode($this->sanitizeHtml($entryContent)));
$entry->appendChild($content);
<entry>
<title type="html">{$entryTitle}</title>
<published>{$entryTimestamp}</published>
<updated>{$entryTimestamp}</updated>
<id>{$entryID}</id>
{$entryLinkAlternate}
{$entryAuthor}
<content type="html">{$entryContent}</content>
{$entryEnclosures}
{$entryCategories}
{$entryThumbnail}
</entry>
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosure = $document->createElement('link');
$entryEnclosure->setAttribute('rel', 'enclosure');
$entryEnclosure->setAttribute('type', getMimeType($enclosure));
$entryEnclosure->setAttribute('href', $enclosure);
$entry->appendChild($entryEnclosure);
}
EOD;
foreach($item->getCategories() as $category) {
$entryCategory = $document->createElement('category');
$entryCategory->setAttribute('term', $category);
$entry->appendChild($entryCategory);
}
if (!empty($item->thumbnail)) {
$thumbnail = $document->createElementNS(self::MRSS_NS, 'media:thumbnail');
$thumbnail->setAttribute('url', $item->thumbnail);
$entry->appendChild($thumbnail);
}
$feed->appendChild($entry);
}
$feedTimestamp = gmdate(DATE_ATOM, $this->lastModified);
$charset = $this->getCharset();
/* Data are prepared, now let's begin the "MAGIE !!!" */
$toReturn = <<<EOD
<?xml version="1.0" encoding="{$charset}"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<title type="text">{$title}</title>
<id>{$feedUrl}</id>
<icon>{$icon}</icon>
<logo>{$icon}</logo>
<updated>{$feedTimestamp}</updated>
<author>
<name>{$feedAuthor}</name>
</author>
<link rel="alternate" type="text/html" href="{$uri}" />
<link rel="self" type="application/atom+xml" href="{$feedUrl}" />
{$entries}
</feed>
EOD;
$toReturn = $document->saveXML();
// Remove invalid characters
ini_set('mbstring.substitute_character', 'none');
$toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8');
return $toReturn;
}
private function xml_encode($text){
return htmlspecialchars($text, ENT_XML1);
}
}

View file

@ -27,6 +27,9 @@
class MrssFormat extends FormatAbstract {
const MIME_TYPE = 'application/rss+xml';
protected const ATOM_NS = 'http://www.w3.org/2005/Atom';
protected const MRSS_NS = 'http://search.yahoo.com/mrss/';
const ALLOWED_IMAGE_EXT = array(
'.gif', '.jpg', '.png'
);
@ -37,24 +40,67 @@ class MrssFormat extends FormatAbstract {
$urlPath = (isset($_SERVER['PATH_INFO'])) ? $_SERVER['PATH_INFO'] : '';
$urlRequest = (isset($_SERVER['REQUEST_URI'])) ? $_SERVER['REQUEST_URI'] : '';
$feedUrl = $this->xml_encode($urlPrefix . $urlHost . $urlRequest);
$feedUrl = $urlPrefix . $urlHost . $urlRequest;
$extraInfos = $this->getExtraInfos();
$title = $this->xml_encode($extraInfos['name']);
$icon = $extraInfos['icon'];
$uri = !empty($extraInfos['uri']) ? $extraInfos['uri'] : REPOSITORY;
if(!empty($extraInfos['uri'])) {
$uri = $this->xml_encode($extraInfos['uri']);
} else {
$uri = REPOSITORY;
$document = new DomDocument('1.0', $this->getCharset());
$document->formatOutput = true;
$feed = $document->createElement('rss');
$feed->setAttribute('version', '2.0');
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:atom', self::ATOM_NS);
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS);
$document->appendChild($feed);
$channel = $document->createElement('channel');
$feed->appendChild($channel);
$title = $extraInfos['name'];
$channelTitle = $document->createElement('title');
$channelTitle->appendChild($document->createTextNode($title));
$channel->appendChild($channelTitle);
$link = $document->createElement('link');
$link->appendChild($document->createTextNode($uri));
$channel->appendChild($link);
$description = $document->createElement('description');
$description->appendChild($document->createTextNode($extraInfos['name']));
$channel->appendChild($description);
$icon = $extraInfos['icon'];
if (!empty($icon) && in_array(substr($icon, -4), self::ALLOWED_IMAGE_EXT)) {
$feedImage = $document->createElement('image');
$channel->appendChild($feedImage);
$iconUrl = $document->createElement('url');
$iconUrl->appendChild($document->createTextNode($icon));
$feedImage->appendChild($iconUrl);
$iconTitle = $document->createElement('title');
$iconTitle->appendChild($document->createTextNode($title));
$feedImage->appendChild($iconTitle);
$iconLink = $document->createElement('link');
$iconLink->appendChild($document->createTextNode($uri));
$feedImage->appendChild($iconLink);
}
$items = '';
$linkAlternate = $document->createElementNS(self::ATOM_NS, 'atom:link');
$linkAlternate->setAttribute('rel', 'alternate');
$linkAlternate->setAttribute('type', 'text/html');
$linkAlternate->setAttribute('href', $uri);
$channel->appendChild($linkAlternate);
$linkSelf = $document->createElementNS(self::ATOM_NS, 'atom:link');
$linkSelf->setAttribute('rel', 'self');
$linkSelf->setAttribute('type', 'application/atom+xml');
$linkSelf->setAttribute('href', $feedUrl);
$channel->appendChild($linkSelf);
foreach($this->getItems() as $item) {
$itemTimestamp = $item->getTimestamp();
$itemTitle = $this->xml_encode($item->getTitle());
$itemUri = $this->xml_encode($item->getURI());
$itemContent = $this->xml_encode($this->sanitizeHtml($item->getContent()));
$itemTitle = $item->getTitle();
$itemUri = $item->getURI();
$itemContent = $this->sanitizeHtml($item->getContent());
$entryID = $item->getUid();
$isPermaLink = 'false';
@ -66,91 +112,59 @@ class MrssFormat extends FormatAbstract {
if (empty($entryID)) // Fallback to title and content
$entryID = hash('sha1', $itemTitle . $itemContent);
$entryTitle = '';
if (!empty($itemTitle))
$entryTitle = '<title>' . $itemTitle . '</title>';
$entry = $document->createElement('item');
$entryLink = '';
if (!empty($itemUri))
$entryLink = '<link>' . $itemUri . '</link>';
$entryPublished = '';
if (!empty($itemTimestamp)) {
$entryPublished = '<pubDate>'
. $this->xml_encode(gmdate(DATE_RFC2822, $itemTimestamp))
. '</pubDate>';
if (!empty($itemTitle)) {
$entryTitle = $document->createElement('title');
$entryTitle->appendChild($document->createTextNode($itemTitle));
$entry->appendChild($entryTitle);
}
$entryDescription = '';
if (!empty($itemContent))
$entryDescription = '<description>' . $itemContent . '</description>';
if (!empty($itemUri)) {
$entryLink = $document->createElement('link');
$entryLink->appendChild($document->createTextNode($itemUri));
$entry->appendChild($entryLink);
}
$entryGuid = $document->createElement('guid');
$entryGuid->setAttribute('isPermaLink', $isPermaLink);
$entryGuid->appendChild($document->createTextNode($entryID));
$entry->appendChild($entryGuid);
if (!empty($itemTimestamp)) {
$entryPublished = $document->createElement('pubDate');
$entryPublished->appendChild($document->createTextNode(gmdate(DATE_RFC2822, $itemTimestamp)));
$entry->appendChild($entryPublished);
}
if (!empty($itemContent)) {
$entryDescription = $document->createElement('description');
$entryDescription->appendChild($document->createTextNode($itemContent));
$entry->appendChild($entryDescription);
}
$entryEnclosures = '';
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<media:content url="'
. $this->xml_encode($enclosure)
. '" type="' . getMimeType($enclosure) . '"/>'
. PHP_EOL;
$entryEnclosure = $document->createElementNS(self::MRSS_NS, 'media:content');
$entryEnclosure->setAttribute('url', $enclosure);
$entryEnclosure->setAttribute('type', getMimeType($enclosure));
$entry->appendChild($entryEnclosure);
}
$entryCategories = '';
foreach($item->getCategories() as $category) {
$entryCategories .= '<category>'
. $category . '</category>'
. PHP_EOL;
$entryCategory = $document->createElement('category');
$entryCategory->appendChild($document->createTextNode($category));
$entry->appendChild($entryCategory);
}
$items .= <<<EOD
<item>
{$entryTitle}
{$entryLink}
<guid isPermaLink="{$isPermaLink}">{$entryID}</guid>
{$entryPublished}
{$entryDescription}
{$entryEnclosures}
{$entryCategories}
</item>
EOD;
$channel->appendChild($entry);
}
$charset = $this->getCharset();
$feedImage = '';
if (!empty($icon) && in_array(substr($icon, -4), self::ALLOWED_IMAGE_EXT)) {
$feedImage .= <<<EOD
<image>
<url>{$icon}</url>
<title>{$title}</title>
<link>{$uri}</link>
</image>
EOD;
}
/* Data are prepared, now let's begin the "MAGIE !!!" */
$toReturn = <<<EOD
<?xml version="1.0" encoding="{$charset}"?>
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>{$title}</title>
<link>{$uri}</link>
<description>{$title}</description>
{$feedImage}
<atom:link rel="alternate" type="text/html" href="{$uri}"/>
<atom:link rel="self" href="{$feedUrl}" type="application/atom+xml"/>
{$items}
</channel>
</rss>
EOD;
$toReturn = $document->saveXML();
// Remove invalid non-UTF8 characters
ini_set('mbstring.substitute_character', 'none');
$toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8');
return $toReturn;
}
private function xml_encode($text){
return htmlspecialchars($text, ENT_XML1);
}
}