[
'username' => [
'name' => 'Username',
'type' => 'text',
'required' => true,
'exampleValue' => '@verifiedjoseph',
],
'content' => [
'name' => 'Content',
'type' => 'list',
'values' => [
'Uploads' => 'uploads',
'Posts' => 'posts',
'Reviews' => 'reviews',
'Collections' => 'collections',
'Web Archives' => 'web-archive',
],
'defaultValue' => 'uploads',
],
'limit' => self::LIMIT,
]
];
const CACHE_TIMEOUT = 900; // 15 mins
const TEST_DETECT_PARAMETERS = [
'https://archive.org/details/@verifiedjoseph' => [
'context' => 'Account', 'username' => 'verifiedjoseph', 'content' => 'uploads'
],
'https://archive.org/details/@verifiedjoseph?tab=collections' => [
'context' => 'Account', 'username' => 'verifiedjoseph', 'content' => 'collections'
],
];
private $skipClasses = [
'item-ia mobile-header hidden-tiles',
'item-ia account-ia'
];
private $detectParamsRegex = '/https?:\/\/archive\.org\/details\/@([\w]+)(?:\?tab=([a-z-]+))?/';
public function detectParameters($url)
{
$params = [];
if (preg_match($this->detectParamsRegex, $url, $matches) > 0) {
$params['context'] = 'Account';
$params['username'] = $matches[1];
$params['content'] = 'uploads';
if (isset($matches[2])) {
$params['content'] = $matches[2];
}
return $params;
}
return null;
}
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI());
$html = defaultLinkTo($html, $this->getURI());
if ($this->getInput('content') !== 'posts') {
$detailsDivNumber = 0;
$results = $html->find('div.results > div[data-id]');
foreach ($results as $index => $result) {
$item = [];
if (in_array($result->class, $this->skipClasses)) {
continue;
}
switch ($result->class) {
case 'item-ia':
switch ($this->getInput('content')) {
case 'reviews':
$item = $this->processReview($result);
break;
case 'uploads':
$item = $this->processUpload($result);
break;
}
break;
case 'item-ia url-item':
$item = $this->processWebArchives($result);
break;
case 'item-ia collection-ia':
$item = $this->processCollection($result);
break;
}
if ($this->getInput('content') !== 'reviews') {
$hiddenDetails = $this->processHiddenDetails($html, $detailsDivNumber, $item);
$this->items[] = array_merge($item, $hiddenDetails);
} else {
$this->items[] = $item;
}
$detailsDivNumber++;
$limit = $this->getInput('limit') ?? 10;
if (count($this->items) >= $limit) {
break;
}
}
}
if ($this->getInput('content') === 'posts') {
$this->items = $this->processPosts($html);
}
}
public function getURI()
{
if (!is_null($this->getInput('username')) && !is_null($this->getInput('content'))) {
return self::URI . '/details/' . $this->processUsername() . '&tab=' . $this->getInput('content');
}
return parent::getURI();
}
public function getName()
{
if (!is_null($this->getInput('username')) && !is_null($this->getInput('content'))) {
return $this->getKey('content') . ' - '
. $this->processUsername() . ' - Internet Archive';
}
return parent::getName();
}
private function processUsername()
{
if (substr($this->getInput('username'), 0, 1) !== '@') {
return '@' . $this->getInput('username');
}
return $this->getInput('username');
}
private function processUpload($result)
{
$item = [];
$collection = $result->find('a.stealth', 0);
$collectionLink = $collection->href;
$collectionTitle = $collection->find('div.item-parent-ttl', 0)->plaintext;
$item['title'] = trim($result->find('div.ttl', 0)->innertext);
$item['timestamp'] = strtotime($result->find('div.hidden-tiles.pubdate.C.C3', 0)->children(0)->plaintext);
$item['uri'] = $result->find('div.item-ttl.C.C2 > a', 0)->href;
if ($result->find('div.by.C.C4', 0)->children(2)) {
$item['author'] = $result->find('div.by.C.C4', 0)->children(2)->plaintext;
}
$item['content'] = <<
Collection: {$collectionTitle}
{$result->find('div.hidden-lists.review', 0)->children(1)->plaintext}
EOD; $item['enclosures'][] = self::URI . $result->find('img.item-img', 0)->source; return $item; } private function processWebArchives($result) { $item = []; $item['title'] = trim($result->find('div.ttl', 0)->plaintext); $item['timestamp'] = strtotime($result->find('div.hidden-lists', 0)->children(0)->plaintext); $item['uri'] = $result->find('div.item-ttl.C.C2 > a', 0)->href; $item['content'] = <<' . $description . '
' . $item['content']; } return $item; } private function processPosts($html) { $items = []; foreach ($html->find('table.forumTable > tr') as $index => $tr) { $item = []; if ($index === 0) { continue; } $item['title'] = $tr->find('td', 0)->plaintext; $item['timestamp'] = strtotime($tr->find('td', 4)->children(0)->plaintext); $item['uri'] = $tr->find('td', 0)->children(0)->href; $formLink = <<