import argparse import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm import subprocess import os output_file = "filtered_links.txt" post_processing_script = "post_processing_script.py" extreme_downloader_script = "start-extreme-downloader.awk" file_types = [".zip", ".xml", ".pdf", ".txt", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".csv", ".json", ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".mp3", ".wav", ".mp4", ".avi", ".mov", ".flv", ".swf", ".rar", ".7z", ".tar", ".gz", ".bz2", ".exe", ".dll", ".jar", ".py", ".java", ".cpp", ".h", ".html", ".css", ".js", ".php", ".asp", ".jsp", ".md", ".sql", ".svg", ".ico", ".iso", ".dat", ".cfg", ".log", ".ini", ".bak"] def filter_links(links): filtered_links = [] for link in links: href = link.get("href") if href and href.startswith("https"): for file_type in file_types: if file_type in href: filtered_link = href.split('?')[0] filtered_links.append(filtered_link) break return filtered_links def process_page(page): url = base_url response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") links = soup.find_all("a") filtered_links = filter_links(links) return filtered_links parser = argparse.ArgumentParser() parser.add_argument("-u", "--url", help="URL för att hämta länkar från") parser.add_argument("--pwn", action="store_true", help="Anropa extreme downloader") parser.add_argument("-p", "--parallel", type=int, help="Antal sidor som bearbetas parallellt") args = parser.parse_args() if not args.url: print("URL måste anges med flaggan -u eller --url.") exit() if not args.url.startswith("https://t.me/s/"): print("URL måste vara en offentlig Telegram-kanal.") exit() base_url = args.url start_page = 1 end_page = 1790 concurrent_pages = args.parallel or 30 all_filtered_links = [] with ThreadPoolExecutor(max_workers=concurrent_pages) as executor, \ tqdm(total=end_page - start_page + 1, desc="Bearbetar sidor") as pbar: future_to_page = {executor.submit(process_page, page): page for page in range(start_page, end_page+1)} for future in as_completed(future_to_page): page = future_to_page[future] filtered_links = future.result() all_filtered_links.extend(filtered_links) pbar.update(1) with open(output_file, "w") as f: for link in all_filtered_links: f.write(link + "\n") print("Filtrerade länkar har sparats i filen:", output_file) print("Filtrerade länkar:") for link in all_filtered_links: print(link) with open(output_file, "r") as f: filtered_links_content = f.read() if input("Vill du starta start-extreme-downloader.awk? (y/n): ").lower() == "y": subprocess.run(["awk", "-f", extreme_downloader_script, filtered_links_content])
python3 python3.py -u https://t.me/s/MotoUpdatesTracker?before={} --parallel 25
https://rsddownload-secure.lenovo.com/AUSTIN_G_T1SA33.73-40_subsidy-CCAWS_regulatory-DEFAULT_cid50_R7_CFC.xml.zip https://rsddownload-secure.lenovo.com/GNEVAN_G_T1THS33.75-12-6-1_subsidy-CRICKET_RSU_regulatory-DEFAULT_cid51_CFC.xml.zip .......
let crawledUrls = []; let pendingUrls = ['https://t.me/s/MotoUpdatesTracker?before=1']; let totalPages = 50; const crawl = async () => { while (pendingUrls.length && crawledUrls.length < totalPages) { let urlsToCrawl = pendingUrls.splice(0, 50); // Number of concurrent requests: 50 let crawlPromises = urlsToCrawl.map(async (url) => { if (crawledUrls.includes(url)) return; console.log(`Crawling ${url}`); try { let response = await fetch(url); let text = await response.text(); let parser = new DOMParser(); let doc = parser.parseFromString(text, 'text/html'); let anchors = doc.getElementsByTagName('a'); for (let i = 0; i < anchors.length; i++) { let href = anchors[i].href; if (!crawledUrls.includes(href) && !pendingUrls.includes(href)) { pendingUrls.push(href); } } } catch (err) { console.error(`Failed to crawl "${url}": ${err}`); } crawledUrls.push(url); }); await Promise.all(crawlPromises); } console.log('Finished crawling'); console.log(crawledUrls); console.log(`Total URLs crawled: ${crawledUrls.length}`); console.log(`Total URLs found: ${pendingUrls.length}`); // Dumpa länkar i en ny flik let linksList = "<ul>"; for (let i = 0; i < crawledUrls.length; i++) { linksList += `<li><a href="${crawledUrls[i]}" target="_blank">${crawledUrls[i]}</a></li>`; } linksList += "</ul>"; let newTab = window.open("about:blank", "_blank"); newTab.document.write("<h1>Skrapade länkar:</h1>"); newTab.document.write(linksList); }; crawl();
[ -e "exempel.txt" ] && echo "Filen existerar." || echo "Filen existerar inte, ladda ner den"
function scrollBottomTop() { const scrollHeight = document.documentElement.scrollHeight; const viewportHeight = window.innerHeight; let scrollTop = 0; let scrollIncrement = viewportHeight; let scrollDirection = 1; let iteration = 0; function scroll() { scrollTop += scrollIncrement * scrollDirection; if (scrollTop >= scrollHeight || scrollTop <= 0) { scrollDirection *= -1; } window.scrollTo(0, scrollTop); if (scrollTop < scrollHeight || scrollTop > 0) { requestAnimationFrame(scroll); } iteration++; if (iteration === 10) { return; } } scroll(); } scrollBottomTop();
Du måste vara medlem för att kunna kommentera
Flashback finansieras genom donationer från våra medlemmar och besökare. Det är med hjälp av dig vi kan fortsätta erbjuda en fri samhällsdebatt. Tack för ditt stöd!
Swish: 123 536 99 96 Bankgiro: 211-4106