From 376ff79f164d9d6a13a9c567cdf768c2b5d44202 Mon Sep 17 00:00:00 2001 From: ngosang Date: Wed, 23 Mar 2016 13:35:55 +0100 Subject: [PATCH 1/2] [Search engine] Fix Python 2 encoding problems --- src/searchengine/nova/engines/torrentz.py | 6 +++--- src/searchengine/nova/engines/versions.txt | 2 +- src/searchengine/nova/helpers.py | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/searchengine/nova/engines/torrentz.py b/src/searchengine/nova/engines/torrentz.py index f0e6767d9..591db2ed4 100644 --- a/src/searchengine/nova/engines/torrentz.py +++ b/src/searchengine/nova/engines/torrentz.py @@ -1,4 +1,4 @@ -#VERSION: 2.17 +#VERSION: 2.18 #AUTHORS: Diego de las Heras (ngosang@hotmail.es) # Redistribution and use in source and binary forms, with or without @@ -92,8 +92,8 @@ class torrentz(object): # display item self.td_counter = None self.current_item['engine_url'] = self.url - if self.current_item['name'].find(' \xc2'): - self.current_item['name'] = self.current_item['name'].split(' \xc2')[0] + if self.current_item['name'].find(u' \xc2'): + self.current_item['name'] = self.current_item['name'].split(u' \xc2')[0] self.current_item['link'] += '&' + urlencode({'dn' : self.current_item['name']}) self.current_item['name'] = self.current_item['name'].decode('utf8') diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt index b32625719..2185a4c43 100644 --- a/src/searchengine/nova/engines/versions.txt +++ b/src/searchengine/nova/engines/versions.txt @@ -6,4 +6,4 @@ legittorrents: 2.00 mininova: 2.01 piratebay: 2.14 torrentreactor: 1.41 -torrentz: 2.17 +torrentz: 2.18 diff --git a/src/searchengine/nova/helpers.py b/src/searchengine/nova/helpers.py index c4c6ad865..7298d981c 100644 --- a/src/searchengine/nova/helpers.py +++ b/src/searchengine/nova/helpers.py @@ -1,7 +1,9 @@ -#VERSION: 1.40 +#VERSION: 1.41 # Author: # Christophe DUMEZ (chris@qbittorrent.org) +# Contributors: +# Diego de las Heras (ngosang@hotmail.es) # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -86,7 +88,7 @@ def retrieve_url(url): pass dat = dat.decode(charset, 'replace') dat = htmlentitydecode(dat) - return dat.encode('utf-8', 'replace') + return dat def download_file(url, referer=None): """ Download file at url and write it to a file, return the path to the file and the url """ From e557634feb975c2c7f2b45141b82b74c47017f63 Mon Sep 17 00:00:00 2001 From: ngosang Date: Wed, 23 Mar 2016 13:53:03 +0100 Subject: [PATCH 2/2] [Search engine] Change URL getting mechanism in BTDigg --- src/searchengine/nova/engines/btdigg.py | 50 +++++++++--------- src/searchengine/nova/engines/versions.txt | 2 +- src/searchengine/nova3/engines/btdigg.py | 56 ++++++++++----------- src/searchengine/nova3/engines/versions.txt | 2 +- 4 files changed, 51 insertions(+), 59 deletions(-) diff --git a/src/searchengine/nova/engines/btdigg.py b/src/searchengine/nova/engines/btdigg.py index 79d924b57..5cf0d82bb 100644 --- a/src/searchengine/nova/engines/btdigg.py +++ b/src/searchengine/nova/engines/btdigg.py @@ -1,5 +1,6 @@ -#VERSION: 1.30 +#VERSION: 1.31 #AUTHORS: BTDigg team (research@btdigg.org) +# Contributors: Diego de las Heras (ngosang@hotmail.es) # GNU GENERAL PUBLIC LICENSE # Version 3, 29 June 2007 @@ -16,11 +17,10 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -import urllib -import urllib2 -import sys - from novaprinter import prettyPrinter +from helpers import retrieve_url, download_file +import urllib +import sys class btdigg(object): url = 'https://btdigg.org' @@ -37,31 +37,27 @@ class btdigg(object): i = 0 results = 0 while i < 3: - u = urllib2.urlopen('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i))) - for line in u: - try: - line = line.decode('utf8') - if line.startswith('#'): - continue + data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i))) + for line in data.splitlines(): + if line.startswith('#'): + continue - info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6] - name = name.replace('|', '') - # BTDigg returns unrelated results, we need to filter - if not all(word in name.lower() for word in what_list): - continue + info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6] + name = name.replace('|', '') + # BTDigg returns unrelated results, we need to filter + if not all(word in name.lower() for word in what_list): + continue - res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))), - name = name, - size = size, - seeds = int(dl), - leech = int(dl), - engine_url = self.url, - desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req)))) + res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))), + name = name, + size = size, + seeds = int(dl), + leech = int(dl), + engine_url = self.url, + desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req)))) - prettyPrinter(res) - results += 1 - except: - pass + prettyPrinter(res) + results += 1 if results == 0: break diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt index 2185a4c43..c87c0e05b 100644 --- a/src/searchengine/nova/engines/versions.txt +++ b/src/searchengine/nova/engines/versions.txt @@ -1,4 +1,4 @@ -btdigg: 1.30 +btdigg: 1.31 demonoid: 1.2 extratorrent: 2.04 kickasstorrents: 1.28 diff --git a/src/searchengine/nova3/engines/btdigg.py b/src/searchengine/nova3/engines/btdigg.py index 38db5be2e..16d8ede93 100644 --- a/src/searchengine/nova3/engines/btdigg.py +++ b/src/searchengine/nova3/engines/btdigg.py @@ -1,5 +1,6 @@ -#VERSION: 1.30 +#VERSION: 1.31 #AUTHORS: BTDigg team (research@btdigg.org) +# Contributors: Diego de las Heras (ngosang@hotmail.es) # GNU GENERAL PUBLIC LICENSE # Version 3, 29 June 2007 @@ -16,52 +17,47 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -import urllib.request, urllib.parse, urllib.error -import urllib.request, urllib.error, urllib.parse -import sys - from novaprinter import prettyPrinter +from helpers import retrieve_url, download_file +import urllib +import sys class btdigg(object): url = 'https://btdigg.org' - name = 'BTDigg' + name = 'BTDigg' supported_categories = {'all': ''} - + def __init__(self): pass - + def search(self, what, cat='all'): req = urllib.parse.unquote(what) what_list = req.split() i = 0 results = 0 while i < 3: - u = urllib.request.urlopen('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.parse.urlencode(dict(q = req, p = i))) - for line in u: - try: - line = line.decode('utf8') - if line.startswith('#'): - continue + data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.parse.urlencode(dict(q = req, p = i))) + for line in data.splitlines(): + if line.startswith('#'): + continue - info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6] - name = name.replace('|', '') - # BTDigg returns unrelated results, we need to filter - if not all(word in name.lower() for word in what_list): - continue + info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6] + name = name.replace('|', '') + # BTDigg returns unrelated results, we need to filter + if not all(word in name.lower() for word in what_list): + continue - res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.parse.quote(name)), - name = name, - size = size, - seeds = int(dl), - leech = int(dl), - engine_url = self.url, - desc_link = '%s/search?%s' % (self.url, urllib.parse.urlencode(dict(info_hash = info_hash, q = req)))) + res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.parse.quote(name)), + name = name, + size = size, + seeds = int(dl), + leech = int(dl), + engine_url = self.url, + desc_link = '%s/search?%s' % (self.url, urllib.parse.urlencode(dict(info_hash = info_hash, q = req)))) - prettyPrinter(res) - results += 1 - except: - pass + prettyPrinter(res) + results += 1 if results == 0: break diff --git a/src/searchengine/nova3/engines/versions.txt b/src/searchengine/nova3/engines/versions.txt index b32625719..00a901976 100644 --- a/src/searchengine/nova3/engines/versions.txt +++ b/src/searchengine/nova3/engines/versions.txt @@ -1,4 +1,4 @@ -btdigg: 1.30 +btdigg: 1.31 demonoid: 1.2 extratorrent: 2.04 kickasstorrents: 1.28