Merge pull request #4997 from ngosang/fix_python2_encoding

[Search engine] Python2 encoding and URL getting mechanism in BTDigg
This commit is contained in:
sledgehammer999 2016-03-27 11:09:45 -05:00
commit bdd31b8f82
6 changed files with 59 additions and 65 deletions

View file

@ -1,5 +1,6 @@
#VERSION: 1.30 #VERSION: 1.31
#AUTHORS: BTDigg team (research@btdigg.org) #AUTHORS: BTDigg team (research@btdigg.org)
# Contributors: Diego de las Heras (ngosang@hotmail.es)
# GNU GENERAL PUBLIC LICENSE # GNU GENERAL PUBLIC LICENSE
# Version 3, 29 June 2007 # Version 3, 29 June 2007
@ -16,11 +17,10 @@
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. # GNU General Public License for more details.
import urllib
import urllib2
import sys
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
import urllib
import sys
class btdigg(object): class btdigg(object):
url = 'https://btdigg.org' url = 'https://btdigg.org'
@ -37,31 +37,27 @@ class btdigg(object):
i = 0 i = 0
results = 0 results = 0
while i < 3: while i < 3:
u = urllib2.urlopen('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i))) data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i)))
for line in u: for line in data.splitlines():
try: if line.startswith('#'):
line = line.decode('utf8') continue
if line.startswith('#'):
continue
info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6] info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
name = name.replace('|', '') name = name.replace('|', '')
# BTDigg returns unrelated results, we need to filter # BTDigg returns unrelated results, we need to filter
if not all(word in name.lower() for word in what_list): if not all(word in name.lower() for word in what_list):
continue continue
res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))), res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))),
name = name, name = name,
size = size, size = size,
seeds = int(dl), seeds = int(dl),
leech = int(dl), leech = int(dl),
engine_url = self.url, engine_url = self.url,
desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req)))) desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req))))
prettyPrinter(res) prettyPrinter(res)
results += 1 results += 1
except:
pass
if results == 0: if results == 0:
break break

View file

@ -1,4 +1,4 @@
#VERSION: 2.17 #VERSION: 2.18
#AUTHORS: Diego de las Heras (ngosang@hotmail.es) #AUTHORS: Diego de las Heras (ngosang@hotmail.es)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -92,8 +92,8 @@ class torrentz(object):
# display item # display item
self.td_counter = None self.td_counter = None
self.current_item['engine_url'] = self.url self.current_item['engine_url'] = self.url
if self.current_item['name'].find(' \xc2'): if self.current_item['name'].find(u' \xc2'):
self.current_item['name'] = self.current_item['name'].split(' \xc2')[0] self.current_item['name'] = self.current_item['name'].split(u' \xc2')[0]
self.current_item['link'] += '&' + urlencode({'dn' : self.current_item['name']}) self.current_item['link'] += '&' + urlencode({'dn' : self.current_item['name']})
self.current_item['name'] = self.current_item['name'].decode('utf8') self.current_item['name'] = self.current_item['name'].decode('utf8')

View file

@ -1,4 +1,4 @@
btdigg: 1.30 btdigg: 1.31
demonoid: 1.2 demonoid: 1.2
extratorrent: 2.04 extratorrent: 2.04
kickasstorrents: 1.28 kickasstorrents: 1.28
@ -6,4 +6,4 @@ legittorrents: 2.00
mininova: 2.01 mininova: 2.01
piratebay: 2.14 piratebay: 2.14
torrentreactor: 1.41 torrentreactor: 1.41
torrentz: 2.17 torrentz: 2.18

View file

@ -1,7 +1,9 @@
#VERSION: 1.40 #VERSION: 1.41
# Author: # Author:
# Christophe DUMEZ (chris@qbittorrent.org) # Christophe DUMEZ (chris@qbittorrent.org)
# Contributors:
# Diego de las Heras (ngosang@hotmail.es)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met: # modification, are permitted provided that the following conditions are met:
@ -86,7 +88,7 @@ def retrieve_url(url):
pass pass
dat = dat.decode(charset, 'replace') dat = dat.decode(charset, 'replace')
dat = htmlentitydecode(dat) dat = htmlentitydecode(dat)
return dat.encode('utf-8', 'replace') return dat
def download_file(url, referer=None): def download_file(url, referer=None):
""" Download file at url and write it to a file, return the path to the file and the url """ """ Download file at url and write it to a file, return the path to the file and the url """

View file

@ -1,5 +1,6 @@
#VERSION: 1.30 #VERSION: 1.31
#AUTHORS: BTDigg team (research@btdigg.org) #AUTHORS: BTDigg team (research@btdigg.org)
# Contributors: Diego de las Heras (ngosang@hotmail.es)
# GNU GENERAL PUBLIC LICENSE # GNU GENERAL PUBLIC LICENSE
# Version 3, 29 June 2007 # Version 3, 29 June 2007
@ -16,52 +17,47 @@
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. # GNU General Public License for more details.
import urllib.request, urllib.parse, urllib.error
import urllib.request, urllib.error, urllib.parse
import sys
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
import urllib
import sys
class btdigg(object): class btdigg(object):
url = 'https://btdigg.org' url = 'https://btdigg.org'
name = 'BTDigg' name = 'BTDigg'
supported_categories = {'all': ''} supported_categories = {'all': ''}
def __init__(self): def __init__(self):
pass pass
def search(self, what, cat='all'): def search(self, what, cat='all'):
req = urllib.parse.unquote(what) req = urllib.parse.unquote(what)
what_list = req.split() what_list = req.split()
i = 0 i = 0
results = 0 results = 0
while i < 3: while i < 3:
u = urllib.request.urlopen('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.parse.urlencode(dict(q = req, p = i))) data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.parse.urlencode(dict(q = req, p = i)))
for line in u: for line in data.splitlines():
try: if line.startswith('#'):
line = line.decode('utf8') continue
if line.startswith('#'):
continue
info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6] info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
name = name.replace('|', '') name = name.replace('|', '')
# BTDigg returns unrelated results, we need to filter # BTDigg returns unrelated results, we need to filter
if not all(word in name.lower() for word in what_list): if not all(word in name.lower() for word in what_list):
continue continue
res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.parse.quote(name)), res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.parse.quote(name)),
name = name, name = name,
size = size, size = size,
seeds = int(dl), seeds = int(dl),
leech = int(dl), leech = int(dl),
engine_url = self.url, engine_url = self.url,
desc_link = '%s/search?%s' % (self.url, urllib.parse.urlencode(dict(info_hash = info_hash, q = req)))) desc_link = '%s/search?%s' % (self.url, urllib.parse.urlencode(dict(info_hash = info_hash, q = req))))
prettyPrinter(res) prettyPrinter(res)
results += 1 results += 1
except:
pass
if results == 0: if results == 0:
break break

View file

@ -1,4 +1,4 @@
btdigg: 1.30 btdigg: 1.31
demonoid: 1.2 demonoid: 1.2
extratorrent: 2.04 extratorrent: 2.04
kickasstorrents: 1.28 kickasstorrents: 1.28