# VERSION: 1.3
# AUTHORS: BurningMop (burning.mop@yandex.com)
# LICENSING INFORMATION
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
from html.parser import HTMLParser
import time
import threading
from helpers import download_file, retrieve_url
from novaprinter import prettyPrinter
class xxxclubto(object):
url = 'https://xxxclub.to'
headers = {
'Referer': url
}
name = 'XXXClub'
supported_categories = {
'all': 'All',
'pictures': '5',
}
container_regex = r'
(?s:.)*?<\/div>'
pagination_regex = r'(?s:.)*?<\/div>'
pagination_next_regex = r'(?s:.)*?<\/a>'
pagination_last_page = r'.*?'
items_regex = r'(?s:.)*?<\/li>'
has_results = True
has_next_page = True
last_page = 100
class MyHtmlParser(HTMLParser):
def error(self, message):
pass
UL, LI, SPAN, A = ('ul', 'li', 'span', 'a')
def __init__(self, url, headers):
HTMLParser.__init__(self)
self.url = url
self.headers = headers
self.headers['Referer'] = url
self.row = {}
self.column = 0
self.foundResults = False
self.foundTable = False
self.insideRow = False
self.insideCell = False
self.insideNameLink = False
self.foundTableHeading = False
self.foundRowCatlabe = False
self.magnet_regex = r'href="magnet:.*"'
def handle_starttag(self, tag, attrs):
params = dict(attrs)
if 'browsetableinside' in params.get('class', ''):
self.foundResults = True
return
if self.foundResults and tag == self.UL:
self.foundTable = True
return
if self.foundTable and tag == self.LI:
self.insideRow = True
return
if self.insideRow and self.foundTableHeading and tag == self.SPAN:
classList = params.get('class', None)
if self.foundRowCatlabe:
self.insideCell = True
self.column += 1
if 'catlabe' == classList:
self.foundRowCatlabe = True
return
if self.insideRow and self.foundTableHeading and self.column == 1 and tag == self.A:
self.insideNameLink = True
href = params.get('href')
link = f'{self.url}{href}'
self.row['desc_link'] = link
self.row['link'] = link
torrent_page = retrieve_url(link, self.headers)
matches = re.finditer(self.magnet_regex, torrent_page, re.MULTILINE)
magnet_urls = [x.group() for x in matches]
self.row['link'] = magnet_urls[0].split('"')[1]
return
def handle_data(self, data):
if self.insideCell and self.foundRowCatlabe:
if self.column == 1 and self.insideNameLink:
self.row['name'] = data
if self.column == 3:
size = data.replace(',', '')
self.row['size'] = size
if self.column == 4:
self.row['seeds'] = data
if self.column == 5:
self.row['leech'] = data
return
def handle_endtag(self, tag):
if self.insideCell and self.insideNameLink and tag == self.A:
self.insideNameLink = False
if self.insideCell and tag == self.SPAN:
self.insideCell = False
if self.insideRow and tag == self.LI:
if not self.foundTableHeading:
self.foundTableHeading = True
else:
self.row['engine_url'] = self.url
prettyPrinter(self.row)
self.insideRow = False
self.foundRowCatlabe = False
self.column = 0
self.row = {}
return
def download_torrent(self, info):
print(download_file(info))
def get_page_url(self, what, category, page):
return f'{self.url}/torrents/search/{category}/{what}?page={page}&sort=seeders&order=asc'
def get_results(self, html):
container_matches = re.finditer(self.container_regex, html, re.MULTILINE)
container = [x.group() for x in container_matches]
if len(container) > 0:
container_html = container[0]
items_matches = re.finditer(self.items_regex, container_html, re.MULTILINE)
items = [x.group() for x in items_matches]
self.has_results = len(items) > 1
else:
self.has_results = False
def get_next_page(self, html):
next_page_matches = re.finditer(self.pagination_next_regex, html, re.MULTILINE)
next_page = [x.group() for x in next_page_matches]
if len(next_page) == 0:
self.has_next_page = False
self.get_last_page(html)
def get_last_page(self, html):
last_page_matches = re.finditer(self.pagination_last_page, html, re.MULTILINE)
last_page = [x.group() for x in last_page_matches]
if len(last_page) == 0:
self.last_page = 1
else:
self.last_page = int(re.sub(r'', '',re.sub(r'', '', last_page[0])))
def threaded_search(self, page, what, cat):
page_url = self.get_page_url(what, cat, page)
self.headers['Referer'] = page_url
retrieved_html = retrieve_url(page_url, self.headers)
self.get_results(retrieved_html)
self.get_next_page(retrieved_html)
parser = self.MyHtmlParser(self.url, self.headers)
if self.has_results and page <= self.last_page:
parser.feed(retrieved_html)
parser.close()
def search(self, what, cat='all'):
category = self.supported_categories[cat]
page = 1
threads = []
while self.has_results and self.has_next_page:
t = threading.Thread(args=(page, what, category), target=self.threaded_search)
t.start()
time.sleep(0.5)
threads.append(t)
page += 1
for t in threads:
t.join()