[Search engine] Added TorLock search engine

This commit is contained in:
ngosang 2016-03-26 16:34:17 +01:00
parent 1eb6b05bbb
commit b86f92212f
6 changed files with 196 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 561 B

View file

@ -0,0 +1,97 @@
#VERSION: 2.0
#AUTHORS: Douman (custparasite@gmx.se)
#CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
from re import compile as re_compile
from HTMLParser import HTMLParser
class torlock(object):
url = "https://www.torlock.com"
name = "TorLock"
supported_categories = {'all' : 'all',
'anime' : 'anime',
'software' : 'software',
'games' : 'game',
'movies' : 'movie',
'music' : 'music',
'tv' : 'television',
'books' : 'ebooks'}
def download_torrent(self, info):
print(download_file(info))
class MyHtmlParser(HTMLParser):
""" Sub-class for parsing results """
def __init__(self, url):
HTMLParser.__init__(self)
self.url = url
self.article_found = False #true when <article> with results is found
self.item_found = False
self.item_bad = False #set to True for malicious links
self.current_item = None #dict for found item
self.item_name = None #key's name in current_item dict
self.parser_class = {"ts" : "size",
"tul" : "seeds",
"tdl" : "leech"}
def handle_starttag(self, tag, attrs):
params = dict(attrs)
if self.item_found:
if tag == "td":
if "class" in params:
self.item_name = self.parser_class.get(params["class"], None)
if self.item_name:
self.current_item[self.item_name] = ""
elif self.article_found and tag == "a":
if "href" in params:
link = params["href"]
if link.startswith("/torrent"):
self.current_item["desc_link"] = "".join((self.url, link))
self.current_item["link"] = "".join((self.url, "/tor/", link.split('/')[2], ".torrent"))
self.current_item["engine_url"] = self.url
self.item_found = True
self.item_name = "name"
self.current_item["name"] = ""
self.item_bad = "rel" in params and params["rel"] == "nofollow"
elif tag == "article":
self.article_found = True
self.current_item = {}
def handle_data(self, data):
if self.item_name:
self.current_item[self.item_name] += data
def handle_endtag(self, tag):
if tag == "article":
self.article_found = False
elif self.item_name and (tag == "a" or tag == "td"):
self.item_name = None
elif self.item_found and tag == "tr":
self.item_found = False
if not self.item_bad:
prettyPrinter(self.current_item)
self.current_item = {}
def search(self, query, cat='all'):
""" Performs search """
query = query.replace("%20", "-")
parser = self.MyHtmlParser(self.url)
page = "".join((self.url, "/", self.supported_categories[cat], "/torrents/", query, ".html?sort=seeds&page=1"))
html = retrieve_url(page)
parser.feed(html)
counter = 1
additional_pages = re_compile("/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+".format(self.supported_categories[cat], query))
list_searches = additional_pages.findall(html)[:-1] #last link is next(i.e. second)
for page in map(lambda link: "".join((self.url, link)), list_searches):
html = retrieve_url(page)
parser.feed(html)
counter += 1
if counter > 3:
break
parser.close()

View file

@ -5,5 +5,6 @@ kickasstorrents: 1.28
legittorrents: 2.00
mininova: 2.01
piratebay: 2.14
torlock: 2.0
torrentreactor: 1.41
torrentz: 2.17

Binary file not shown.

After

Width:  |  Height:  |  Size: 561 B

View file

@ -0,0 +1,97 @@
#VERSION: 2.0
#AUTHORS: Douman (custparasite@gmx.se)
#CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
from re import compile as re_compile
from html.parser import HTMLParser
class torlock(object):
url = "https://www.torlock.com"
name = "TorLock"
supported_categories = {'all' : 'all',
'anime' : 'anime',
'software' : 'software',
'games' : 'game',
'movies' : 'movie',
'music' : 'music',
'tv' : 'television',
'books' : 'ebooks'}
def download_torrent(self, info):
print(download_file(info))
class MyHtmlParser(HTMLParser):
""" Sub-class for parsing results """
def __init__(self, url):
HTMLParser.__init__(self)
self.url = url
self.article_found = False #true when <article> with results is found
self.item_found = False
self.item_bad = False #set to True for malicious links
self.current_item = None #dict for found item
self.item_name = None #key's name in current_item dict
self.parser_class = {"ts" : "size",
"tul" : "seeds",
"tdl" : "leech"}
def handle_starttag(self, tag, attrs):
params = dict(attrs)
if self.item_found:
if tag == "td":
if "class" in params:
self.item_name = self.parser_class.get(params["class"], None)
if self.item_name:
self.current_item[self.item_name] = ""
elif self.article_found and tag == "a":
if "href" in params:
link = params["href"]
if link.startswith("/torrent"):
self.current_item["desc_link"] = "".join((self.url, link))
self.current_item["link"] = "".join((self.url, "/tor/", link.split('/')[2], ".torrent"))
self.current_item["engine_url"] = self.url
self.item_found = True
self.item_name = "name"
self.current_item["name"] = ""
self.item_bad = "rel" in params and params["rel"] == "nofollow"
elif tag == "article":
self.article_found = True
self.current_item = {}
def handle_data(self, data):
if self.item_name:
self.current_item[self.item_name] += data
def handle_endtag(self, tag):
if tag == "article":
self.article_found = False
elif self.item_name and (tag == "a" or tag == "td"):
self.item_name = None
elif self.item_found and tag == "tr":
self.item_found = False
if not self.item_bad:
prettyPrinter(self.current_item)
self.current_item = {}
def search(self, query, cat='all'):
""" Performs search """
query = query.replace("%20", "-")
parser = self.MyHtmlParser(self.url)
page = "".join((self.url, "/", self.supported_categories[cat], "/torrents/", query, ".html?sort=seeds&page=1"))
html = retrieve_url(page)
parser.feed(html)
counter = 1
additional_pages = re_compile("/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+".format(self.supported_categories[cat], query))
list_searches = additional_pages.findall(html)[:-1] #last link is next(i.e. second)
for page in map(lambda link: "".join((self.url, link)), list_searches):
html = retrieve_url(page)
parser.feed(html)
counter += 1
if counter > 3:
break
parser.close()

View file

@ -5,5 +5,6 @@ kickasstorrents: 1.28
legittorrents: 2.00
mininova: 2.01
piratebay: 2.14
torlock: 2.0
torrentreactor: 1.41
torrentz: 2.17