mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-29 14:48:52 +03:00
[Wikipedia] Improve title search algorithm
This commit is contained in:
parent
d4435e0b13
commit
0b923ab76a
1 changed files with 11 additions and 2 deletions
|
@ -107,9 +107,18 @@ class WikipediaBridge extends BridgeAbstract{
|
||||||
// Clean the bottom of the featured article
|
// Clean the bottom of the featured article
|
||||||
$element->find('div', -1)->outertext = '';
|
$element->find('div', -1)->outertext = '';
|
||||||
|
|
||||||
|
// The title and URI of the article is best defined in an anchor containint the string '...' ('full article ...')
|
||||||
|
$target = $element->find('p/a', 0); // We'll use the first anchor as fallback
|
||||||
|
foreach($element->find('//a') as $anchor){
|
||||||
|
if(strpos($anchor->innertext, '...') !== false){
|
||||||
|
$target = $anchor;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
$item->uri = $this->uri . $element->find('p', 0)->find('a', 0)->href;
|
$item->uri = $this->uri . $target->href;
|
||||||
$item->title = $element->find('p', 0)->find('a', 0)->title;
|
$item->title = $target->title;
|
||||||
|
|
||||||
if(!$fullArticle)
|
if(!$fullArticle)
|
||||||
$item->content = strip_tags(str_replace('href="/', 'href="' . $this->uri . '/', $element->innertext), '<a><p><br><img>');
|
$item->content = strip_tags(str_replace('href="/', 'href="' . $this->uri . '/', $element->innertext), '<a><p><br><img>');
|
||||||
|
|
Loading…
Reference in a new issue