import requests from bs4 import BeautifulSoup from fastapi import FastAPI#, Request from fastapi.responses import StreamingResponse from pydantic import BaseModel import re import replicate import os from supabase import create_client, Client url: str = os.environ.get("DB_URL") key: str = os.environ.get("DB_KEY") supabase: Client = create_client(url, key) class Item(BaseModel): url: str max_tokens: int app = FastAPI() def extract_article_content(url): try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] ARTICLE = ' '.join(text) return ARTICLE except Exception as e: return "" @app.get("/") async def root(): return {"status": "OK"} @app.post("/summarize-v1") async def root(item: Item): try: article = extract_article_content(item.url) if len(article) == 0: return {'summary': ""} event_list = [] for event in replicate.stream("snowflake/snowflake-arctic-instruct", input= { "prompt": "summarize this following news article:" + article, "temperature": 0.2}): # Convert the event to a string and append it to the list event_list.append(str(event)) # After the event stream ends, process the collected events output_variable = "".join(event_list) return output_variable except requests.RequestException as e: return {"error": str(e), "status_code": 500} @app.post("/summarize-v2") async def root(item: Item): try: article = extract_article_content(item.url) if len(article) == 0: return {'summary': ""} def event_generator(): for event in replicate.stream("snowflake/snowflake-arctic-instruct", input={ "prompt": f"summarize this news article in {item.max_tokens} lines:" + article, "temperature": 0.2, "max_new_tokens" : 1000 }): # Yield the event as a string yield str(event) print(str(event), end="") # Use StreamingResponse to stream the events return StreamingResponse(event_generator(), media_type='text/event-stream') except requests.RequestException as e: return {"error": str(e), "status_code": 500} @app.post("/extract-content") async def root(item: Item): try: article = extract_article_content(item.url) if len(article) == 0: return {'ERROR': "AHHHHHHHHH"} return {"content":article} except requests.RequestException as e: return {"error": str(e), "status_code": 500} async def insert_image(url , id): data, count = await supabase.table('news').update({'image_url': url}).eq('id', id).execute() @app.get("/extract-images") async def root(site: str = 'abcnews.go'): try: ii_list = [] response = supabase.table('news').select("*").eq('source', f'www.{site}.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute() for i in range(len(response.data)): url = response.data[i]['article_url'] try: res = requests.get(url) soup = BeautifulSoup(res.text, 'html.parser') results = soup.find_all(['img']) url = results[1]['src'] ii_list.append(url) await insert_image(url, response.data[i]['id'] ) except Exception as e: print(e) results = None return {"RESULTS": ii_list} except requests.RequestException as e: return {"error": str(e), "status_code": 500}