mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-02-16 07:09:54 +03:00
fix(FeedExpander): if parse fails, include offending url in exception message (#3938)
Also some refactors
This commit is contained in:
parent
b2c8475b2c
commit
d01c462ad5
5 changed files with 56 additions and 41 deletions
|
@ -4,14 +4,14 @@ class DetectAction implements ActionInterface
|
|||
{
|
||||
public function execute(Request $request)
|
||||
{
|
||||
$targetURL = $request->get('url');
|
||||
$url = $request->get('url');
|
||||
$format = $request->get('format');
|
||||
|
||||
if (!$targetURL) {
|
||||
throw new \Exception('You must specify a url!');
|
||||
if (!$url) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url']));
|
||||
}
|
||||
if (!$format) {
|
||||
throw new \Exception('You must specify a format!');
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']));
|
||||
}
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
@ -23,19 +23,23 @@ class DetectAction implements ActionInterface
|
|||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$bridgeParams = $bridge->detectParameters($targetURL);
|
||||
$bridgeParams = $bridge->detectParameters($url);
|
||||
|
||||
if (is_null($bridgeParams)) {
|
||||
if (!$bridgeParams) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bridgeParams['bridge'] = $bridgeClassName;
|
||||
$bridgeParams['format'] = $format;
|
||||
|
||||
$url = '?action=display&' . http_build_query($bridgeParams);
|
||||
return new Response('', 301, ['location' => $url]);
|
||||
$query = [
|
||||
'action' => 'display',
|
||||
'bridge' => $bridgeClassName,
|
||||
'format' => $format,
|
||||
];
|
||||
$query = array_merge($query, $bridgeParams);
|
||||
return new Response('', 301, ['location' => '?' . http_build_query($query)]);
|
||||
}
|
||||
|
||||
throw new \Exception('No bridge found for given URL: ' . $targetURL);
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', [
|
||||
'message' => 'No bridge found for given URL: ' . $url,
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,10 +9,10 @@ class FindfeedAction implements ActionInterface
|
|||
{
|
||||
public function execute(Request $request)
|
||||
{
|
||||
$targetURL = $request->get('url');
|
||||
$url = $request->get('url');
|
||||
$format = $request->get('format');
|
||||
|
||||
if (!$targetURL) {
|
||||
if (!$url) {
|
||||
return new Response('You must specify a url', 400);
|
||||
}
|
||||
if (!$format) {
|
||||
|
@ -29,7 +29,7 @@ class FindfeedAction implements ActionInterface
|
|||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$bridgeParams = $bridge->detectParameters($targetURL);
|
||||
$bridgeParams = $bridge->detectParameters($url);
|
||||
|
||||
if ($bridgeParams === null) {
|
||||
continue;
|
||||
|
|
|
@ -102,12 +102,17 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
|
|||
//-----------------------------------------------------
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$parsed_url = parse_url($url);
|
||||
if ($parsed_url['host'] != 'www.bmd.com') {
|
||||
try {
|
||||
$parsedUrl = Url::fromString($url);
|
||||
} catch (UrlException $e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$path = explode('/', $parsed_url['path']);
|
||||
if ($parsedUrl->getHost() != 'www.bmd.com') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$path = explode('/', $parsedUrl->getPath());
|
||||
|
||||
if ($this->getURIbyCountry($path[1]) == '') {
|
||||
return null;
|
||||
|
|
|
@ -19,23 +19,6 @@ class FirefoxAddonsBridge extends BridgeAbstract
|
|||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
private $feedName = '';
|
||||
private $releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
|
||||
private $xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
|
||||
private $outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';
|
||||
|
||||
private $urlRegex = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$params = [];
|
||||
|
||||
if (preg_match($this->urlRegex, $url, $matches)) {
|
||||
$params['id'] = $matches[1];
|
||||
return $params;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
|
@ -52,7 +35,8 @@ class FirefoxAddonsBridge extends BridgeAbstract
|
|||
$item['uri'] = $this->getURI();
|
||||
$item['author'] = $author;
|
||||
|
||||
if (preg_match($this->releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
|
||||
$releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
|
||||
if (preg_match($releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
|
||||
$item['timestamp'] = $match[1];
|
||||
$size = $match[2];
|
||||
}
|
||||
|
@ -68,7 +52,8 @@ class FirefoxAddonsBridge extends BridgeAbstract
|
|||
|
||||
$releaseNotes = $this->removeLinkRedirects($li->find('div.AddonVersionCard-releaseNotes', 0));
|
||||
|
||||
if (preg_match($this->xpiFileRegex, $downloadlink, $match)) {
|
||||
$xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
|
||||
if (preg_match($xpiFileRegex, $downloadlink, $match)) {
|
||||
$xpiFilename = $match[0];
|
||||
}
|
||||
|
||||
|
@ -110,10 +95,25 @@ EOD;
|
|||
*/
|
||||
private function removeLinkRedirects($html)
|
||||
{
|
||||
$outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';
|
||||
foreach ($html->find('a') as $a) {
|
||||
$a->href = urldecode(preg_replace($this->outgoingRegex, '', $a->href));
|
||||
$a->href = urldecode(preg_replace($outgoingRegex, '', $a->href));
|
||||
}
|
||||
|
||||
return $html->innertext;
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$params = [];
|
||||
|
||||
// Example: https://addons.mozilla.org/en-US/firefox/addon/ublock-origin
|
||||
$pattern = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';
|
||||
if (preg_match($pattern, $url, $matches)) {
|
||||
$params['id'] = $matches[1];
|
||||
return $params;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,14 +23,20 @@ abstract class FeedExpander extends BridgeAbstract
|
|||
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
|
||||
}
|
||||
// prepare/massage the xml to make it more acceptable
|
||||
$badStrings = [
|
||||
$problematicStrings = [
|
||||
' ',
|
||||
'»',
|
||||
'’',
|
||||
];
|
||||
$xmlString = str_replace($badStrings, '', $xmlString);
|
||||
$xmlString = str_replace($problematicStrings, '', $xmlString);
|
||||
|
||||
$feedParser = new FeedParser();
|
||||
$this->feed = $feedParser->parseFeed($xmlString);
|
||||
try {
|
||||
$this->feed = $feedParser->parseFeed($xmlString);
|
||||
} catch (\Exception $e) {
|
||||
throw new \Exception(sprintf('Failed to parse xml from %s: %s', $url, create_sane_exception_message($e)));
|
||||
}
|
||||
|
||||
$items = array_slice($this->feed['items'], 0, $maxItems);
|
||||
// todo: extract parse logic out from FeedParser
|
||||
foreach ($items as $item) {
|
||||
|
|
Loading…
Add table
Reference in a new issue