# VERSION: 1.1 # # LICENSING INFORMATION # This is free and unencumbered software released into the public domain. # Anyone is free to copy, modify, publish, use, compile, sell, or # distribute this software, either in source code form or as a compiled # binary, for any purpose, commercial or non-commercial, and by any # means. # In jurisdictions that recognize copyright laws, the author or authors # of this software dedicate any and all copyright interest in the # software to the public domain. We make this dedication for the benefit # of the public at large and to the detriment of our heirs and # successors. We intend this dedication to be an overt act of # relinquishment in perpetuity of all present and future rights to this # software under copyright law. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # For more information, please refer to import urllib.parse import urllib.request import re import math import time import gzip from io import BytesIO from novaprinter import prettyPrinter import urllib.parse class btdig(object): url = 'https://www.btdig.com' name = 'btdig' supported_categories = {'all': '0'} def search(self, what, cat='all'): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8', 'Accept-Language': 'en-GB,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-User': '?1', 'DNT': '1', 'Sec-GPC': '1', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'TE': 'trailers' } url = f"{self.url}/search?q={what.replace(' ', '+')}&order=0" response = self.get_response(urllib.request.Request(url, headers=headers)) results_match = re.search(r'(\d+) results found', response) if results_match: total_results = int(results_match.group(1)) total_pages = math.ceil(total_results / 10) else: total_pages = 1 # assuming single page self.parse_page(response) for page in range(1, total_pages): time.sleep(1) # Sleep for 1 second between requests url = f"{self.url}/search?q={what.replace(' ', '+')}&p={page}&order=0" response = self.get_response(urllib.request.Request(url, headers=headers)) self.parse_page(response) def get_response(self, req): try: with urllib.request.urlopen(req) as response: if response.info().get('Content-Encoding') == 'gzip': gzip_file = gzip.GzipFile(fileobj=BytesIO(response.read())) return gzip_file.read().decode('utf-8', errors='ignore') return response.read().decode('utf-8', errors='ignore') except Exception as e: return "" def parse_page(self, html_content): result_blocks = re.finditer(r'