rss-bridge/bridges/WeLiveSecurityBridge.php

<?php
class WeLiveSecurityBridge extends BridgeAbstract {

    public function loadMetadatas() {
        $this->maintainer = 'ORelio';
        $this->name = 'We Live Security';
        $this->uri = 'http://www.welivesecurity.com/';
        $this->description = 'Returns the newest articles.';
    }

    public function collectData(){

        function ExtractFromDelimiters($string, $start, $end) {
            if (strpos($string, $start) !== false) {
                $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
                $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
                return $section_retrieved;
            } return false;
        }

        function StripWithDelimiters($string, $start, $end) {
            while (strpos($string, $start) !== false) {
                $section_to_remove = substr($string, strpos($string, $start));
                $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
                $string = str_replace($section_to_remove, '', $string);
            } return $string;
        }

        $feed = $this->getURI().'feed/';
        $html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
        $limit = 0;

        foreach ($html->find('item') as $element) {
            if ($limit < 5) {

                $article_image = $element->find('image', 0)->plaintext;
                $article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
                $article_summary = ExtractFromDelimiters($element->innertext, '<description><![CDATA[<p>', '</p>');
                $article_html = file_get_contents($article_url) or $this->returnServerError('Could not request '.$this->getName().': '.$article_url);
                if (substr($article_html, 0, 2) == "\x1f\x8b") //http://www.gzip.org/zlib/rfc-gzip.html#header-trailer -> GZip ID1
                    $article_html = gzdecode($article_html);   //Response is GZipped even if we didn't accept GZip!? Let's decompress...
                $article_html = str_get_html($article_html);   //Now we have our HTML data. But still, that's an important HTTP violation...
                $article_content = $article_html->find('div.wlistingsingletext', 0)->innertext;
                $article_content = StripWithDelimiters($article_content, '<script', '</script>');
                $article_content = '<p><img src="'.$article_image.'" /></p>'
                    .'<p><b>'.$article_summary.'</b></p>'
                    .trim($article_content);

                $item = array();
                $item['uri'] = $article_url;
                $item['title'] = $element->find('title', 0)->plaintext;
                $item['author'] = $article_html->find('a[rel=author]', 0)->plaintext;
                $item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
                $item['content'] = $article_content;
                $this->items[] = $item;
                $limit++;

            }
        }
    }
}
[WeLiveSecurity] New bridge, Security News. 2016-07-19 20:37:33 +03:00			`<?php`
			`class WeLiveSecurityBridge extends BridgeAbstract {`

			`public function loadMetadatas() {`
			`$this->maintainer = 'ORelio';`
bridges: Put name/uri directly in bridge metadata Some bridges used getName() and getURI() to put information into the metadatas. Instead the metadatas should be initialized with data and (not yet done) returned by default via getName() and getURI(). 2016-08-06 18:55:29 +03:00			`$this->name = 'We Live Security';`
			`$this->uri = 'http://www.welivesecurity.com/';`
[WeLiveSecurity] New bridge, Security News. 2016-07-19 20:37:33 +03:00			`$this->description = 'Returns the newest articles.';`
			`}`

[core] store parameters values in BridgeAbstract::parameters This way, any BridgeAbstract method can now have access to these values, no only collectData Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com> 2016-08-25 02:24:53 +03:00			`public function collectData(){`
[WeLiveSecurity] New bridge, Security News. 2016-07-19 20:37:33 +03:00
			`function ExtractFromDelimiters($string, $start, $end) {`
			`if (strpos($string, $start) !== false) {`
			`$section_retrieved = substr($string, strpos($string, $start) + strlen($start));`
			`$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));`
			`return $section_retrieved;`
			`} return false;`
			`}`

			`function StripWithDelimiters($string, $start, $end) {`
			`while (strpos($string, $start) !== false) {`
			`$section_to_remove = substr($string, strpos($string, $start));`
			`$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));`
			`$string = str_replace($section_to_remove, '', $string);`
			`} return $string;`
			`}`

			`$feed = $this->getURI().'feed/';`
bridges: rename file_get_html to getSimpleHTMLDOM Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com> 2016-08-09 15:57:42 +03:00			`$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);`
[WeLiveSecurity] New bridge, Security News. 2016-07-19 20:37:33 +03:00			`$limit = 0;`

			`foreach ($html->find('item') as $element) {`
			`if ($limit < 5) {`

			`$article_image = $element->find('image', 0)->plaintext;`
			`$article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');`
			`$article_summary = ExtractFromDelimiters($element->innertext, '<description><![CDATA[<p>', '</p>');`
bridges: Replace returnError function with more specific Replacements depend on original error code: 400: returnClientError 404: returnServerError 500: returnServerError 501: returnServerError 2016-08-17 15:45:08 +03:00			`$article_html = file_get_contents($article_url) or $this->returnServerError('Could not request '.$this->getName().': '.$article_url);`
[WeLiveSecurity] New bridge, Security News. 2016-07-19 20:37:33 +03:00			`if (substr($article_html, 0, 2) == "\x1f\x8b") //http://www.gzip.org/zlib/rfc-gzip.html#header-trailer -> GZip ID1`
			`$article_html = gzdecode($article_html); //Response is GZipped even if we didn't accept GZip!? Let's decompress...`
			`$article_html = str_get_html($article_html); //Now we have our HTML data. But still, that's an important HTTP violation...`
			`$article_content = $article_html->find('div.wlistingsingletext', 0)->innertext;`
			`$article_content = StripWithDelimiters($article_content, '<script', '</script>');`
			`$article_content = '<p><img src="'.$article_image.'" /></p>'`
			`.'<p><b>'.$article_summary.'</b></p>'`
			`.trim($article_content);`

[bridges] Change all occurrences of the Item object to array 2016-08-22 19:55:59 +03:00			`$item = array();`
			`$item['uri'] = $article_url;`
			`$item['title'] = $element->find('title', 0)->plaintext;`
			`$item['author'] = $article_html->find('a[rel=author]', 0)->plaintext;`
			`$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);`
			`$item['content'] = $article_content;`
[WeLiveSecurity] New bridge, Security News. 2016-07-19 20:37:33 +03:00			`$this->items[] = $item;`
			`$limit++;`

			`}`
			`}`
			`}`
bridges: rename file_get_html to getSimpleHTMLDOM Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com> 2016-08-09 15:57:42 +03:00			`}`