rss-bridge/bridges/HinduTamilBridge.php

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

98 lines
3.1 KiB
PHP
Raw Normal View History

2024-05-26 18:21:14 +03:00
<?php
class HinduTamilBridge extends FeedExpander
{
const NAME = 'HinduTamil';
const URI = 'https://www.hindutamil.in';
2024-08-28 20:45:54 +03:00
const FEED_BASE_URL = 'https://feeds.feedburner.com/Hindu_Tamil_';
2024-05-26 18:21:14 +03:00
const DESCRIPTION = 'Retrieve full articles from hindutamil.in feeds';
const MAINTAINER = 'tillcash';
const PARAMETERS = [
[
'topic' => [
'name' => 'topic',
'type' => 'list',
'defaultValue' => 'crime',
'values' => [
'Astrology' => 'astrology',
'Blogs' => 'blogs',
'Business' => 'business',
'Cartoon' => 'cartoon',
'Cinema' => 'cinema',
'Crime' => 'crime',
'Discussion' => 'discussion',
'Education' => 'education',
'Environment' => 'environment',
'India' => 'india',
'Lifestyle' => 'life-style',
'Literature' => 'literature',
'Opinion' => 'opinion',
'Reporters' => 'reporters-page',
'Socialmedia' => 'social-media',
'Spirituals' => 'spirituals',
'Sports' => 'sports',
'Supplements' => 'supplements',
'Tamilnadu' => 'tamilnadu',
'Technology' => 'technology',
'Tourism' => 'tourism',
'World' => 'world',
],
],
'limit' => [
'name' => 'limit (max 100)',
'type' => 'number',
'defaultValue' => 10,
],
],
];
public function getName()
{
$topic = $this->getKey('topic');
return self::NAME . ($topic ? ' - ' . $topic : '');
}
public function collectData()
{
$limit = min(100, $this->getInput('limit'));
$url = self::FEED_BASE_URL . $this->getInput('topic');
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
{
$dom = getSimpleHTMLDOMCached($item['uri']);
$content = $dom->find('#pgContentPrint', 0);
2024-05-26 18:21:14 +03:00
if ($content === null) {
return $item;
2024-05-26 18:21:14 +03:00
}
2024-08-28 20:45:54 +03:00
$item['timestamp'] = $this->getTimestamp($dom) ?? $item['timestamp'];
$item['content'] = $this->getImage($dom) . $this->cleanContent($content);
2024-05-26 18:21:14 +03:00
return $item;
}
2024-08-28 20:45:54 +03:00
private function cleanContent($content): string
2024-05-26 18:21:14 +03:00
{
2024-08-28 20:45:54 +03:00
foreach ($content->find('div[align="center"], script, .adsplacement') as $remove) {
2024-05-26 18:21:14 +03:00
$remove->outertext = '';
}
2024-08-28 20:45:54 +03:00
return $content->innertext;
2024-05-26 18:21:14 +03:00
}
2024-08-28 20:45:54 +03:00
private function getTimestamp($dom): ?string
{
2024-08-28 20:45:54 +03:00
$date = $dom->find('meta[property="article:published_time"]', 0);
return $date ? $date->getAttribute('content') : null;
}
2024-08-28 20:45:54 +03:00
private function getImage($dom): string
{
$image = $dom->find('meta[property="og:image"]', 0);
return $image ? sprintf('<p><img src="%s"></p>', $image->getAttribute('content')) : '';
}
2024-05-26 18:21:14 +03:00
}