Spaces:
Runtime error
Runtime error
import asyncio | |
from llama_cpp import Llama | |
from openai import OpenAI | |
from selenium import webdriver | |
from selenium.common import WebDriverException | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.chrome.service import Service | |
import concurrent.futures | |
from webdriver_manager.chrome import ChromeDriverManager | |
import time | |
from datetime import datetime | |
import os | |
from GenerateAIPodcast import generateMp3 | |
from btts import generateAudioFile | |
# client = OpenAI(base_url="http://localhost:8080/v1", api_key="lm-studio") | |
'''def make_request(link): | |
print("-----------------------------------------------------------------------------------------") | |
print("Make Request is called") | |
try: | |
completion = client.chat.completions.create( | |
model="model-identifier", | |
messages=[ | |
{"role": "system", | |
"content": "Always answer short and most detailled and dont use * in your answers. It should be good to hear as a Podcast"}, | |
{"role": "user", "content": f"Please summarize this website: {link}."} | |
], | |
temperature=0.7, | |
) | |
# print(f"Thread: {completion.choices[0].message}") | |
# print("TEST:", completion.choices[0].message) | |
message = completion.choices[0].message.content | |
return message | |
except Exception as e: | |
print(f"Thread encountered an error: {e}^") | |
''' | |
llm = Llama.from_pretrained( | |
repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF", | |
filename="llama-3.2-1b-instruct-q8_0.gguf", | |
) | |
def generate(link:str): | |
## use the pipeline to generate text from given input text | |
output= llm.create_chat_completion( | |
messages = [ | |
{"role": "system", | |
"content": "Always answer short and most detailled and dont use * in your answers. It should be good to hear as a Podcast"}, | |
{"role": "user", "content": f"Please summarize this website: {link}."} | |
] | |
) | |
## return the generate text in Json reposnfe | |
return output['choices'][0]['message']['content'] | |
def run_tldr_crawler(): | |
# Setup Selenium WebDriver | |
options = webdriver.ChromeOptions() | |
# options.add_argument() # Run in headless mode (no browser UI) | |
options.add_argument('--disable-gpu') | |
options.add_argument('--no-sandbox') | |
# Initialize the WebDriver | |
service = Service(ChromeDriverManager().install()) | |
driver = webdriver.Chrome(service=service, options=options) | |
try: | |
date = datetime.today().strftime('%Y-%m-%d') | |
print(date) | |
# Comment this if you want run this at a weekend | |
date = '2025-03-07' | |
# Step 1: Navigate to the TLDR archives page | |
url = f"https://tldr.tech/tech/{date}" | |
driver.get(url) | |
# Wait for the page to load | |
time.sleep(2) | |
# Step 3: Extract all links on the new page | |
links = driver.find_elements(By.TAG_NAME, 'a') | |
# Collect the href attributes | |
# extracted_links = [link.get_attribute('href') for link in links if link.get_attribute('href') is not None] | |
extracted_links = [ | |
link.get_attribute('href') | |
for link in links | |
if link.get_attribute('href') is not None and | |
not link.get_attribute('href').startswith("https://tldr.tech") and | |
not link.get_attribute('href').startswith("https://jobs") and | |
not "advertise" in link.get_attribute('href') | |
] | |
# Output the extracted links | |
print("Extracted Links:") | |
print(len(extracted_links)) | |
for idx, link in enumerate(extracted_links, start=1): | |
print(f"{idx}. {link}") | |
# Die maximale Anzahl von Threads, die gleichzeitig laufen sollen | |
max_threads = 4 | |
# ThreadPoolExecutor verwenden, um maximal 4 Threads gleichzeitig auszuführen | |
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: | |
# Für jeden Link in extracted_links wird makerequest aufgerufen | |
# enumerate gibt auch den Index zurück, falls du ihn brauchst | |
futures = [] | |
for idx, link in enumerate(extracted_links, start=1): | |
future = executor.submit(generate, link) | |
futures.append((idx, link, future)) | |
# print(f"{idx}. {link}") | |
# print(future.result()) | |
for idx, link, future in futures: | |
result = future.result() | |
# print(f"{idx}. {link} - Result {result}") | |
asyncio.run(generateAudioFile(result, idx)) | |
except WebDriverException as e: | |
print(f"Fehler beim Laden der Seite: {e}") | |
finally: | |
# Close the WebDriver | |
driver.quit() |