Merge pull request #4997 from ngosang/fix_python2_encoding

[Search engine] Python2 encoding and URL getting mechanism in BTDigg
2024-11-25 10:46:15 +03:00 · 2016-03-27 11:09:45 -05:00 · 2016-03-27 11:09:45 -05:00 · bdd31b8f82
commit bdd31b8f82
parent 1eb6b05bbb e557634feb
6 changed files with 59 additions and 65 deletions
--- a/src/searchengine/nova/engines/btdigg.py
+++ b/src/searchengine/nova/engines/btdigg.py
@ -1,5 +1,6 @@
-#VERSION: 1.30
+#VERSION: 1.31
 #AUTHORS: BTDigg team (research@btdigg.org)
+# Contributors: Diego de las Heras (ngosang@hotmail.es)

 #                    GNU GENERAL PUBLIC LICENSE
 #                       Version 3, 29 June 2007
@ -16,11 +17,10 @@
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.

-import urllib
-import urllib2
-import sys
-
 from novaprinter import prettyPrinter
+from helpers import retrieve_url, download_file
+import urllib
+import sys

 class btdigg(object):
    url = 'https://btdigg.org'
@ -37,31 +37,27 @@ class btdigg(object):
        i = 0
        results = 0
        while i < 3:
-            u = urllib2.urlopen('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i)))
-            for line in u:
-                try:
-                    line = line.decode('utf8')
-                    if line.startswith('#'):
-                        continue
+            data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i)))
+            for line in data.splitlines():
+                if line.startswith('#'):
+                    continue

-                    info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
-                    name = name.replace('|', '')
-                    # BTDigg returns unrelated results, we need to filter
-                    if not all(word in name.lower() for word in what_list):
-                        continue
+                info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
+                name = name.replace('|', '')
+                # BTDigg returns unrelated results, we need to filter
+                if not all(word in name.lower() for word in what_list):
+                    continue

-                    res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))),
-                               name = name,
-                               size = size,
-                               seeds = int(dl),
-                               leech = int(dl),
-                               engine_url = self.url,
-                               desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req))))
+                res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))),
+                           name = name,
+                           size = size,
+                           seeds = int(dl),
+                           leech = int(dl),
+                           engine_url = self.url,
+                           desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req))))

-                    prettyPrinter(res)
-                    results += 1
-                except:
-                    pass
+                prettyPrinter(res)
+                results += 1

            if results == 0:
                break
--- a/src/searchengine/nova/engines/torrentz.py
+++ b/src/searchengine/nova/engines/torrentz.py
@ -1,4 +1,4 @@
-#VERSION: 2.17
+#VERSION: 2.18
 #AUTHORS: Diego de las Heras (ngosang@hotmail.es)

 # Redistribution and use in source and binary forms, with or without
@ -92,8 +92,8 @@ class torrentz(object):
                # display item
                self.td_counter = None
                self.current_item['engine_url'] = self.url
-                if self.current_item['name'].find(' \xc2'):
-                    self.current_item['name'] = self.current_item['name'].split(' \xc2')[0]
+                if self.current_item['name'].find(u' \xc2'):
+                    self.current_item['name'] = self.current_item['name'].split(u' \xc2')[0]
                self.current_item['link'] += '&' + urlencode({'dn' : self.current_item['name']})
                self.current_item['name'] = self.current_item['name'].decode('utf8')

--- a/src/searchengine/nova/engines/versions.txt
+++ b/src/searchengine/nova/engines/versions.txt
@ -1,4 +1,4 @@
-btdigg: 1.30
+btdigg: 1.31
 demonoid: 1.2
 extratorrent: 2.04
 kickasstorrents: 1.28
@ -6,4 +6,4 @@ legittorrents: 2.00
 mininova: 2.01
 piratebay: 2.14
 torrentreactor: 1.41
-torrentz: 2.17
+torrentz: 2.18
--- a/src/searchengine/nova/helpers.py
+++ b/src/searchengine/nova/helpers.py
@ -1,7 +1,9 @@
-#VERSION: 1.40
+#VERSION: 1.41

 # Author:
 #  Christophe DUMEZ (chris@qbittorrent.org)
+# Contributors:
+#  Diego de las Heras (ngosang@hotmail.es)

 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@ -86,7 +88,7 @@ def retrieve_url(url):
        pass
    dat = dat.decode(charset, 'replace')
    dat = htmlentitydecode(dat)
-    return dat.encode('utf-8', 'replace')
+    return dat

 def download_file(url, referer=None):
    """ Download file at url and write it to a file, return the path to the file and the url """
--- a/src/searchengine/nova3/engines/btdigg.py
+++ b/src/searchengine/nova3/engines/btdigg.py
@ -1,5 +1,6 @@
-#VERSION: 1.30
+#VERSION: 1.31
 #AUTHORS: BTDigg team (research@btdigg.org)
+# Contributors: Diego de las Heras (ngosang@hotmail.es)

 #                    GNU GENERAL PUBLIC LICENSE
 #                       Version 3, 29 June 2007
@ -16,52 +17,47 @@
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.

-import urllib.request, urllib.parse, urllib.error
-import urllib.request, urllib.error, urllib.parse
-import sys
-
 from novaprinter import prettyPrinter
+from helpers import retrieve_url, download_file
+import urllib
+import sys

 class btdigg(object):
    url = 'https://btdigg.org'
-    name = 'BTDigg' 
+    name = 'BTDigg'

    supported_categories = {'all': ''}
-	
+
    def __init__(self):
        pass
-        
+
    def search(self, what, cat='all'):
        req = urllib.parse.unquote(what)
        what_list = req.split()
        i = 0
        results = 0
        while i < 3:
-            u = urllib.request.urlopen('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.parse.urlencode(dict(q = req, p = i)))
-            for line in u:
-                try:
-                    line = line.decode('utf8')
-                    if line.startswith('#'):
-                        continue
+            data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.parse.urlencode(dict(q = req, p = i)))
+            for line in data.splitlines():
+                if line.startswith('#'):
+                    continue

-                    info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
-                    name = name.replace('|', '')
-                    # BTDigg returns unrelated results, we need to filter
-                    if not all(word in name.lower() for word in what_list):
-                        continue
+                info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
+                name = name.replace('|', '')
+                # BTDigg returns unrelated results, we need to filter
+                if not all(word in name.lower() for word in what_list):
+                    continue

-                    res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.parse.quote(name)),
-                               name = name,
-                               size = size,
-                               seeds = int(dl),
-                               leech = int(dl),
-                               engine_url = self.url,
-                               desc_link = '%s/search?%s' % (self.url, urllib.parse.urlencode(dict(info_hash = info_hash, q = req))))
+                res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.parse.quote(name)),
+                           name = name,
+                           size = size,
+                           seeds = int(dl),
+                           leech = int(dl),
+                           engine_url = self.url,
+                           desc_link = '%s/search?%s' % (self.url, urllib.parse.urlencode(dict(info_hash = info_hash, q = req))))

-                    prettyPrinter(res)
-                    results += 1
-                except:
-                    pass
+                prettyPrinter(res)
+                results += 1

            if results == 0:
                break
--- a/src/searchengine/nova3/engines/versions.txt
+++ b/src/searchengine/nova3/engines/versions.txt
@ -1,4 +1,4 @@
-btdigg: 1.30
+btdigg: 1.31
 demonoid: 1.2
 extratorrent: 2.04
 kickasstorrents: 1.28