rss-bridge/bridges/ScribdBridge.php

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

82 lines
2.4 KiB
PHP
Raw Normal View History

2020-02-04 16:26:34 +00:00
<?php
class ScribdBridge extends BridgeAbstract
{
2020-02-04 16:26:34 +00:00
const NAME = 'Scribd Bridge';
const URI = 'https://www.scribd.com';
const DESCRIPTION = 'Returns documents uploaded by a user.';
const MAINTAINER = 'VerifiedJoseph';
const PARAMETERS = [[
'profile' => [
'name' => 'Profile URL',
'type' => 'text',
'required' => true,
'title' => 'Profile URL. Example: https://www.scribd.com/user/164147088/Ars-Technica',
'exampleValue' => 'https://www.scribd.com/user/164147088/Ars-Technica'
],
]];
2020-02-04 16:26:34 +00:00
const CACHE_TIMEOUT = 3600;
private $profileUrlRegex = '/scribd\.com\/(user\/[0-9]+\/[\w-]+)\/?/';
2020-02-04 16:26:34 +00:00
private $feedName = '';
public function collectData()
{
2022-03-24 20:32:16 +00:00
$html = getSimpleHTMLDOM($this->getURI());
2020-02-04 16:26:34 +00:00
2022-03-24 20:32:16 +00:00
$this->feedName = $html->find('div.header', 0)->plaintext;
2020-02-04 16:26:34 +00:00
foreach ($html->find('ul.document_cells > li') as $index => $li) {
$item = [];
$item['title'] = $li->find('div.under_title', 0)->plaintext;
$item['uri'] = $li->find('a', 0)->href;
$item['author'] = $li->find('span.uploader', 0)->plaintext;
2020-02-04 16:26:34 +00:00
$item['uid'] = $li->find('a', 0)->href;
$pageHtml = getSimpleHTMLDOMCached($item['uri'], 3600);
$image = $pageHtml->find('meta[property="og:image"]', 0)->content;
$description = $pageHtml->find('meta[property="og:description"]', 0)->content;
foreach ($pageHtml->find('ul.interest_pills li') as $pills) {
$item['categories'][] = $pills->plaintext;
}
2020-02-04 16:26:34 +00:00
$item['content'] = <<<EOD
<p>{$description}<p><p><img src="{$image}"></p>
EOD;
$item['enclosures'][] = $image;
$this->items[] = $item;
if (count($this->items) >= 15) {
break;
}
2020-02-04 16:26:34 +00:00
}
}
2020-02-04 16:26:34 +00:00
public function getName()
{
2020-02-04 16:26:34 +00:00
if ($this->feedName) {
return $this->feedName . ' - Scribd';
}
return parent::getName();
}
2020-02-04 16:26:34 +00:00
public function getURI()
{
2020-02-04 16:26:34 +00:00
if (!is_null($this->getInput('profile'))) {
preg_match($this->profileUrlRegex, $this->getInput('profile'), $user)
or returnServerError('Could not extract user ID and name from given profile URL.');
return self::URI . '/' . $user[1] . '/uploads';
}
return parent::getURI();
}
}