[FilterBridge] Various improvements (#2148)

- Add option for case-insensitive regex
- Allow matching item content or author in addition to item title
- Optionally attempt to convert encoding when applying matches
This commit is contained in:
ORelio 2021-06-07 20:11:12 +02:00 committed by GitHub
parent 973e49d93e
commit 75cc52a62c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2,11 +2,11 @@
class FilterBridge extends FeedExpander { class FilterBridge extends FeedExpander {
const MAINTAINER = 'Frenzie'; const MAINTAINER = 'Frenzie, ORelio';
const NAME = 'Filter'; const NAME = 'Filter';
const CACHE_TIMEOUT = 3600; // 1h const CACHE_TIMEOUT = 3600; // 1h
const DESCRIPTION = 'Filters a feed of your choice'; const DESCRIPTION = 'Filters a feed of your choice';
const URI = 'https://github.com/rss-bridge/rss-bridge'; const URI = 'https://github.com/RSS-Bridge/rss-bridge';
const PARAMETERS = array(array( const PARAMETERS = array(array(
'url' => array( 'url' => array(
@ -14,7 +14,7 @@ class FilterBridge extends FeedExpander {
'required' => true, 'required' => true,
), ),
'filter' => array( 'filter' => array(
'name' => 'Filter item title (regular expression)', 'name' => 'Filter (regular expression)',
'required' => false, 'required' => false,
), ),
'filter_type' => array( 'filter_type' => array(
@ -22,60 +22,101 @@ class FilterBridge extends FeedExpander {
'type' => 'list', 'type' => 'list',
'required' => false, 'required' => false,
'values' => array( 'values' => array(
'Permit' => 'permit', 'Keep matching items' => 'permit',
'Block' => 'block', 'Hide matching items' => 'block',
), ),
'defaultValue' => 'permit', 'defaultValue' => 'permit',
), ),
'title_from_content' => array( 'case_insensitive' => array(
'name' => 'Generate title from content', 'name' => 'Case-insensitive filter',
'type' => 'checkbox', 'type' => 'checkbox',
'required' => false, 'required' => false,
) ),
'fix_encoding' => array(
'name' => 'Attempt Latin1/UTF-8 fixes when evaluating filter',
'type' => 'checkbox',
'required' => false,
),
'target_title' => array(
'name' => 'Apply filter on title',
'type' => 'checkbox',
'required' => false,
'defaultValue' => 'checked'
),
'target_content' => array(
'name' => 'Apply filter on content',
'type' => 'checkbox',
'required' => false,
),
'target_author' => array(
'name' => 'Apply filter on author',
'type' => 'checkbox',
'required' => false,
),
'title_from_content' => array(
'name' => 'Generate title from content (overwrite existing title)',
'type' => 'checkbox',
'required' => false,
),
'length_limit' => array(
'name' => 'Max length analyzed by filter (-1: no limit)',
'type' => 'number',
'required' => false,
'defaultValue' => -1,
),
)); ));
protected function parseItem($newItem){ protected function parseItem($newItem){
$item = parent::parseItem($newItem); $item = parent::parseItem($newItem);
// Generate title from first 50 characters of content?
if($this->getInput('title_from_content') && array_key_exists('content', $item)) { if($this->getInput('title_from_content') && array_key_exists('content', $item)) {
$content = str_get_html($item['content']); $content = str_get_html($item['content']);
$pos = strpos($item['content'], ' ', 50); $pos = strpos($item['content'], ' ', 50);
$item['title'] = substr($content->plaintext, 0, $pos);
$item['title'] = substr(
$content->plaintext,
0,
$pos
);
if(strlen($content->plaintext) >= $pos) { if(strlen($content->plaintext) >= $pos) {
$item['title'] .= '...'; $item['title'] .= '...';
} }
} }
switch(true) { // Build regular expression
case $this->getFilterType() === 'permit': $regex = '/' . $this->getInput('filter') . '/';
if (preg_match($this->getFilter(), $item['title'])) { if($this->getInput('case_insensitive')) {
return $item; $regex .= 'i';
}
break;
case $this->getFilterType() === 'block':
if (!preg_match($this->getFilter(), $item['title'])) {
return $item;
}
break;
} }
return null;
}
protected function getFilter(){ // Retrieve fields to check
return '/' . $this->getInput('filter') . '/'; $filter_fields = array();
} if($this->getInput('target_title')) {
$filter_fields[] = $item['title'];
}
if($this->getInput('target_content')) {
$filter_fields[] = $item['content'];
}
if($this->getInput('target_author')) {
$filter_fields[] = $item['author'];
}
protected function getFilterType(){ // Apply filter on item
return $this->getInput('filter_type'); $keep_item = false;
$length_limit = intval($this->getInput('length_limit'));
foreach($filter_fields as $field) {
if($length_limit > 0) {
$field = substr($field, 0, $length_limit);
}
$keep_item |= boolval(preg_match($regex, $field));
if($this->getInput('fix_encoding')) {
$keep_item |= boolval(preg_match($regex, utf8_decode($field)));
$keep_item |= boolval(preg_match($regex, utf8_encode($field)));
}
}
// Reverse result? (keep everything but matching items)
if($this->getInput('filter_type') === 'block') {
$keep_item = !$keep_item;
}
return $keep_item ? $item : null;
} }
public function getURI(){ public function getURI(){
@ -84,18 +125,15 @@ class FilterBridge extends FeedExpander {
if(empty($url)) { if(empty($url)) {
$url = parent::getURI(); $url = parent::getURI();
} }
return $url; return $url;
} }
public function collectData(){ public function collectData(){
if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { if($this->getInput('url') && substr($this->getInput('url'), 0, 4) !== 'http') {
// just in case someone find a way to access local files by playing with the url // just in case someone finds a way to access local files by playing with the url
returnClientError('The url parameter must either refer to http or https protocol.'); returnClientError('The url parameter must either refer to http or https protocol.');
} }
try{ $this->collectExpandableDatas($this->getURI());
$this->collectExpandableDatas($this->getURI());
} catch (Exception $e) {
$this->collectExpandableDatas($this->getURI());
}
} }
} }