AO3Bridge: add options to fetch chapter contents and list titles (#3981)

* AO3Bridge: add options to fetch chapter contents and titles for list feeds

and add downloads for each fic to enclosures

* AO3Bridge: fix list default value

* AO3Bridge: fix erroneous dynamic property usage

* AO3Bridge: fix unit test failure for getURI
This commit is contained in:
July 2024-02-15 22:14:17 -05:00 committed by GitHub
parent 4d15ffd2cf
commit 7813f4564e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -12,8 +12,20 @@ class AO3Bridge extends BridgeAbstract
'url' => [
'name' => 'url',
'required' => true,
// Example: F/F tag, complete works only
'exampleValue' => 'https://archiveofourown.org/works?work_search[complete]=T&tag_id=F*s*F',
// Example: F/F tag
'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works',
],
'range' => [
'name' => 'Chapter Content',
'title' => 'Chapter(s) to include in each work\'s feed entry',
'defaultValue' => null,
'type' => 'list',
'values' => [
'None' => null,
'First' => 'first',
'Latest' => 'last',
'Entire work' => 'all',
],
],
],
'Bookmarks' => [
@ -39,18 +51,13 @@ class AO3Bridge extends BridgeAbstract
{
switch ($this->queriedContext) {
case 'Bookmarks':
$user = $this->getInput('user');
$this->title = $user;
$url = self::URI
. '/users/' . $user
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
$this->collectList($url);
$this->collectList($this->getURI());
break;
case 'List':
$this->collectList($this->getInput('url'));
$this->collectList($this->getURI());
break;
case 'Work':
$this->collectWork($this->getInput('id'));
$this->collectWork($this->getURI());
break;
}
}
@ -61,9 +68,21 @@ class AO3Bridge extends BridgeAbstract
*/
private function collectList($url)
{
$html = getSimpleHTMLDOM($url);
$httpClient = RssBridge::getHttpClient();
$version = 'v0.0.1';
$agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"];
$response = $httpClient->request($url, $agent);
$html = \str_get_html($response->getBody());
$html = defaultLinkTo($html, self::URI);
// Get list title. Will include page range + count in some cases
$heading = ($html->find('#main > h2', 0));
if ($heading->find('a.tag')) {
$heading = $heading->find('a.tag', 0);
}
$this->title = $heading->plaintext;
foreach ($html->find('.index.group > li') as $element) {
$item = [];
@ -83,6 +102,36 @@ class AO3Bridge extends BridgeAbstract
$chapters = (isset($chapters) ? $chapters->plaintext : 0);
$item['uid'] = $item['uri'] . "/$strdate/$chapters";
// Fetch workskin of desired chapter(s) in list
if ($this->getInput('range')) {
$url = $item['uri'];
switch ($this->getInput('range')) {
case ('all'):
$url .= '?view_full_work=true';
break;
case ('first'):
break;
case ('last'):
// only way to get this is using the navigate page unfortunately
$url .= '/navigate';
$response = $httpClient->request($url, $agent);
$html = \str_get_html($response->getBody());
$html = defaultLinkTo($html, self::URI);
$url = $html->find('ol.index.group > li > a', -1)->href;
break;
}
$response = $httpClient->request($url, $agent);
$html = \str_get_html($response->getBody());
$html = defaultLinkTo($html, self::URI);
$item['content'] .= $html->find('#workskin', 0);
}
// Use predictability of download links to generate enclosures
$wid = explode('/', $item['uri'])[4];
foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) {
$item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext;
}
$this->items[] = $item;
}
}
@ -90,26 +139,29 @@ class AO3Bridge extends BridgeAbstract
/**
* Feed for recent chapters of a specific work.
*/
private function collectWork($id)
private function collectWork($url)
{
$url = self::URI . "/works/$id/navigate";
$httpClient = RssBridge::getHttpClient();
$version = 'v0.0.1';
$response = $httpClient->request($url, [
'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)",
]);
$agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"];
$response = $httpClient->request($url . '/navigate', $agent);
$html = \str_get_html($response->getBody());
$html = defaultLinkTo($html, self::URI);
$response = $httpClient->request($url . '?view_full_work=true', $agent);
$workhtml = \str_get_html($response->getBody());
$workhtml = defaultLinkTo($workhtml, self::URI);
$this->title = $html->find('h2 a', 0)->plaintext;
foreach ($html->find('ol.index.group > li') as $element) {
$nav = $html->find('ol.index.group > li');
for ($i = 0; $i < count($nav); $i++) {
$item = [];
$element = $nav[$i];
$item['title'] = $element->find('a', 0)->plaintext;
$item['content'] = $element;
$item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0);
$item['uri'] = $element->find('a', 0)->href;
$strdate = $element->find('span.datetime', 0)->plaintext;
@ -138,4 +190,24 @@ class AO3Bridge extends BridgeAbstract
{
return self::URI . '/favicon.ico';
}
public function getURI()
{
$url = parent::getURI();
switch ($this->queriedContext) {
case 'Bookmarks':
$user = $this->getInput('user');
$url = self::URI
. '/users/' . $user
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
break;
case 'List':
$url = $this->getInput('url');
break;
case 'Work':
$url = self::URI . '/works/' . $this->getInput('id');
break;
}
return $url;
}
}