mirror of
synced 2025-03-14 20:21:14 +03:00
[XenForoBridge] Add new bridge
Adds a bridge for forums powered by XenForo (see https://xenforo.com). Support between forums may vary due to ever changing versions with no clear distinction. Especially timestamps may not work depending on the supported language (should currently work on en-US and de-DE). Tested on - https://xenforo.com/community/ - http://www.ign.com/boards/ Notice: XenForo provides RSS feeds for forums (but not specific topics). For example: https://xenforo.com/community/forums/-/index.rss
This commit is contained in:
1 changed files with 464 additions and 0 deletions
Normal file
Normal file
@ -0,0 +1,464 @@
* This bridge generates feeds for threads from forums running XenForo version 2
* Examples:
* - https://xenforo.com/community/
* - http://www.ign.com/boards/
* Notice: XenForo does provide RSS feeds for forums. For example:
* - https://xenforo.com/community/forums/-/index.rss
* For more information on XenForo, visit
* - https://xenforo.com/
* - https://en.wikipedia.org/wiki/XenForo
class XenForoBridge extends BridgeAbstract {
// Bridge specific constants
const CONTEXT_THREAD = 'Thread';
const XENFORO_VERSION_1 = '1.0';
const XENFORO_VERSION_2 = '2.0';
// RSS-Bridge constants
const NAME = 'XenForo Bridge';
const URI = 'https://xenforo.com/';
const DESCRIPTION = 'Generates feeds for threads in forums powered by XenForo';
const MAINTAINER = 'logmanoriginal';
const PARAMETERS = array(
self::CONTEXT_THREAD => array(
'url' => array(
'name' => 'Thread URL',
'type' => 'text',
'required' => true,
'title' => 'Insert URL to the thread for which the feed should be generated',
'exampleValue' => 'https://xenforo.com/community/threads/guide-to-suggestions.2285/'
'global' => array(
'limit' => array(
'name' => 'Limit',
'type' => 'number',
'required' => false,
'title' => 'Specify maximum number of elements to return in the feed',
'defaultValue' => 10
const CACHE_TIMEOUT = 7200; // 10 minutes
private $title = '';
private $threadurl = '';
private $version; // Holds the XenForo version
public function getName() {
switch($this->queriedContext) {
case self::CONTEXT_THREAD: return $this->title . ' - ' . static::NAME;
return parent::getName();
public function getURI() {
switch($this->queriedContext) {
case self::CONTEXT_THREAD: return $this->threadurl;
return parent::getURI();
public function collectData() {
$this->threadurl = filter_var(
if($this->threadurl === false) {
returnClientError('The URL you provided is invalid!');
$urlparts = parse_url($this->threadurl, PHP_URL_SCHEME);
// Scheme must be "http" or "https"
if(preg_match('/http[s]{0,1}/', parse_url($this->threadurl, PHP_URL_SCHEME)) == false) {
returnClientError('The URL you provided doesn\'t specify a valid scheme (http or https)!');
// Path cannot be root (../)
if(parse_url($this->threadurl, PHP_URL_PATH) === '/') {
returnClientError('The URL you provided doesn\'t link to a valid thread (root path)!');
// XenForo adds a thread ID to the URL, like "...-thread.454934283". It must be present
if(preg_match('/.+\.\d+[\/]{0,1}/', parse_URL($this->threadurl, PHP_URL_PATH)) == false) {
returnClientError('The URL you provided doesn\'t link to a valid thread (ID missing)!');
// We want to start at the first page in the thread. XenForo uses "../page-n" syntax
// to identify pages (except for the first page).
// Notice: XenForo uses the concept of "sentinels" to find and replace parts in the
// URL. Technically forum hosts can change the syntax!
if(preg_match('/.+\/(page-\d+.*)$/', $this->threadurl, $matches) != false) {
// before: https://xenforo.com/community/threads/guide-to-suggestions.2285/page-5
// after : https://xenforo.com/community/threads/guide-to-suggestions.2285/
$this->threadurl = str_replace($matches[1], '', $this->threadurl);
$html = getSimpleHTMLDOMCached($this->threadurl)
or returnServerError('Failed loading data from "' . $this->threadurl . '"!');
$html = defaultLinkTo($html, $this->threadurl);
// Notice: The DOM structure changes depending on the XenForo version used
if($mainContent = $html->find('div.mainContent', 0)) {
$this->version = self::XENFORO_VERSION_1;
} elseif ($mainContent = $html->find('div[class="p-body"]', 0)) {
$this->version = self::XENFORO_VERSION_2;
} else {
returnServerError('This forum is currently not supported!');
switch($this->version) {
case self::XENFORO_VERSION_1:
$titleBar = $mainContent->find('div.titleBar h1', 0)
or returnServerError('Error finding title bar!');
$this->title = $titleBar->plaintext;
// Store items from current page (we'll use $this->items as LIFO buffer)
$this->extractThreadPostsV1($html, $this->threadurl);
case self::XENFORO_VERSION_2:
$titleBar = $mainContent->find('div[class="p-title"] h1', 0)
or returnServerError('Error finding title bar!');
$this->title = $titleBar->plaintext;
$this->extractThreadPostsV2($html, $this->threadurl);
while(count($this->items) > $this->getInput('limit')) {
* Extracts thread posts
* @param $html A simplehtmldom object
* @param $url The url from which $html was loaded
private function extractThreadPostsV1($html, $url) {
$lang = $html->find('html', 0)->lang;
// Posts are contained in an "ol"
$messageList = $html->find('#messageList li')
or returnServerError('Error finding message list!');
foreach($messageList as $post) {
if(!isset($post->attr['id'])) { // Skip ads
$item = array();
$item['uri'] = $url . '#' . $post->getAttribute('id');
$content = $post->find('.messageContent article', 0);
// Add some style to quotes
foreach($content->find('.bbCodeQuote') as $quote) {
$quote->style = '
color: #495566;
background-color: rgb(248,251,253);
border: 1px solid rgb(111, 140, 180);
border-color: rgb(111, 140, 180);
font-style: italic;';
// Remove script tags
foreach($content->find('script') as $script) {
$script->outertext = '';
$item['content'] = $content->innertext;
// Remove quotes (for the title)
foreach($content->find('.bbCodeQuote') as $quote) {
$quote->innertext = '';
$title = trim($content->plaintext);
if(strlen($title) > 70) {
$item['title'] = substr($title, 0, strpos($title, ' ', 70)) . '...';
} else {
$item['title'] = $title;
* Timestamps are presented in two forms:
* 1) short version (for older posts?)
* <span
* class="DateTime"
* title="22 Oct. 2018 at 23:47"
* >22 Oct. 2018</span>
* This form has to be interpreted depending on the current language.
* 2) long version (for newer posts?)
* <abbr
* class="DateTime"
* data-time="1541008785"
* data-diff="310694"
* data-datestring="31 Oct. 2018"
* data-timestring="18:59"
* title="31 Oct. 2018 at 18:59"
* >Wednesday at 18:59</abbr>
* This form has the timestamp embedded (data-time)
if($timestamp = $post->find('abbr.DateTime', 0)) { // long version (preffered)
$item['timestamp'] = $timestamp->{'data-time'};
} elseif($timestamp = $post->find('span.DateTime', 0)) { // short version
$item['timestamp'] = $this->fixDate($timestamp->title, $lang);
$item['author'] = $post->getAttribute('data-author');
// Bridge specific properties
$item['id'] = $post->getAttribute('id');
$this->items[] = $item;
private function extractThreadPostsV2($html, $url) {
$lang = $html->find('html', 0)->lang;
$messageList = $html->find('div[class="block-body"] article')
or returnServerError('Error finding message list!');
foreach($messageList as $post) {
if(!isset($post->attr['id'])) { // Skip ads
$item = array();
$item['uri'] = $url . '#' . $post->getAttribute('id');
$title = $post->find('div[class="message-content"] article', 0)->plaintext;
$end = strpos($title, ' ', 70);
$item['title'] = substr($title, 0, $end);
$item['timestamp'] = $this->fixDate($post->find('time', 0)->title, $lang);
$item['author'] = $post->getAttribute('data-author');
$item['content'] = $post->find('div[class="message-content"] article', 0);
// Bridge specific properties
$item['id'] = $post->getAttribute('id');
$this->items[] = $item;
private function extractPagesV1($html) {
// A navigation bar becomes available if the number of posts grows too
// high. When this happens we need to load further pages (from last backwards)
if(($pageNav = $html->find('div.PageNav', 0)) !== false) {
$lastpage = $pageNav->{'data-last'};
$baseurl = $pageNav->{'data-baseurl'};
$sentinel = $pageNav->{'data-sentinel'};
$hosturl = parse_url($this->threadurl, PHP_URL_SCHEME)
. '://'
. parse_url($this->threadurl, PHP_URL_HOST)
. '/';
$page = $lastpage;
// Load at least the last page
do {
$pageurl = $hosturl . str_replace($sentinel, $lastpage, $baseurl);
// We can optimize performance by caching all but the last page
if($page != $lastpage) {
$html = getSimpleHTMLDOMCached($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
} else {
$html = getSimpleHTMLDOM($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
$html = defaultLinkTo($html, $hosturl);
$this->extractThreadPostsV1($html, $pageurl);
} while (count($this->items) < $this->getInput('limit') && $page != 1);
private function extractPagesV2($html) {
// A navigation bar becomes available if the number of posts grows too
// high. When this happens we need to load further pages (from last backwards)
if(($pageNav = $html->find('div.pageNav', 0)) !== false) {
foreach($pageNav->find('li') as $nav) {
$lastpage = $nav->plaintext;
// Manually extract baseurl and inject sentinel
$baseurl = $pageNav->find('li a', -1)->href;
$baseurl = str_replace('page-' . $lastpage, 'page-{{sentinel}}', $baseurl);
$sentinel = '{{sentinel}}';
$hosturl = parse_url($this->threadurl, PHP_URL_SCHEME)
. '://'
. parse_url($this->threadurl, PHP_URL_HOST);
$page = $lastpage;
// Load at least the last page
do {
$pageurl = $hosturl . str_replace($sentinel, $lastpage, $baseurl);
// We can optimize performance by caching all but the last page
if($page != $lastpage) {
$html = getSimpleHTMLDOMCached($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
} else {
$html = getSimpleHTMLDOM($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
$html = defaultLinkTo($html, $this->hosturl);
$this->extractThreadPostsV2($html, $this->pageurl);
} while (count($this->items) < $this->getInput('limit') && $page != 1);
* Fixes dates depending on the choosen language:
* de : dd.mm.yy
* en : dd.mm.yy
* it : dd/mm/yy
* Basically strtotime doesn't convert dates correctly due to formats
* being hard to interpret. So we use the DateTime object.
* We don't know the timezone, so just assume +00:00 (or whatever
* DateTime chooses)
private function fixDate($date, $lang = 'en-US') {
$mnamesen = [
switch($lang) {
case 'en-US': // example: Jun 9, 2018 at 11:46 PM
$df = date_create_from_format('M d, Y \a\t H:i A', $date);
case 'de-DE': // example: 19 Juli 2018 um 19:27 Uhr
$mnamesde = [
$mnamesdeshort = [
$date = str_ireplace($mnamesde, $mnamesen, $date);
$date = str_ireplace($mnamesdeshort, $mnamesen, $date);
$df = date_create_from_format('d M Y \u\m H:i \U\h\r', $date);
// debugMessage(date_format($df, 'U'));
return date_format($df, 'U');
Add table
Reference in a new issue