2022-06-09 10:56:52 -04:00
|
|
|
<?php
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-06-09 10:56:52 -04:00
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
final class UsenixBridge extends BridgeAbstract
|
|
|
|
{
|
2022-07-01 15:10:30 +02:00
|
|
|
const NAME = 'USENIX';
|
|
|
|
const URI = 'https://www.usenix.org/publications';
|
|
|
|
const DESCRIPTION = 'Digital publications from USENIX (usenix.org)';
|
|
|
|
const MAINTAINER = 'dvikan';
|
|
|
|
const PARAMETERS = [
|
|
|
|
'USENIX ;login:' => [
|
|
|
|
],
|
|
|
|
];
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
if ($this->queriedContext === 'USENIX ;login:') {
|
|
|
|
$this->collectLoginOnlineItems();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
returnClientError('Illegal Context');
|
|
|
|
}
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
private function collectLoginOnlineItems(): void
|
|
|
|
{
|
|
|
|
$url = 'https://www.usenix.org/publications/loginonline';
|
|
|
|
$dom = getSimpleHTMLDOMCached($url);
|
|
|
|
$items = $dom->find('div.view-content > div');
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
foreach ($items as $item) {
|
|
|
|
$title = $item->find('.views-field-title > span', 0);
|
|
|
|
$author = $item->find('.views-field-pseudo-author-list > span.field-content', 0);
|
|
|
|
$relativeUrl = $item->find('.views-field-nothing-1 > span > a', 0);
|
|
|
|
$uri = sprintf('https://www.usenix.org%s', $relativeUrl->href);
|
|
|
|
// June 2, 2022
|
|
|
|
$createdAt = $item->find('div.views-field-field-lv2-publication-date > div > span', 0);
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$item = [
|
|
|
|
'title' => $title->innertext,
|
|
|
|
'author' => strstr($author->plaintext, ',', true) ?: $author->plaintext,
|
|
|
|
'uri' => $uri,
|
|
|
|
'timestamp' => $createdAt->innertext,
|
|
|
|
];
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$this->items[] = array_merge($item, $this->getItemContent($uri));
|
|
|
|
}
|
|
|
|
}
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
private function getItemContent(string $uri): array
|
|
|
|
{
|
|
|
|
$html = getSimpleHTMLDOMCached($uri);
|
|
|
|
$content = $html->find('.paragraphs-items-full', 0)->innertext;
|
|
|
|
$extra = $html->find('fieldset', 0);
|
|
|
|
if (!empty($extra)) {
|
|
|
|
$content .= $extra->innertext;
|
|
|
|
}
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$tags = [];
|
|
|
|
foreach ($html->find('.field-name-field-lv2-tags div.field-item') as $tag) {
|
|
|
|
$tags[] = $tag->plaintext;
|
|
|
|
}
|
2022-06-09 10:56:52 -04:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
return [
|
|
|
|
'content' => $content,
|
|
|
|
'categories' => $tags
|
|
|
|
];
|
|
|
|
}
|
2022-06-09 10:56:52 -04:00
|
|
|
}
|