Divyansh Kushwaha
commited on
Commit
·
0021684
1
Parent(s):
53dd0ab
- api.py +21 -411
- main.py → app.py +0 -0
- utils.py +8 -39
api.py
CHANGED
@@ -1,250 +1,18 @@
|
|
1 |
-
# from fastapi import FastAPI, Query
|
2 |
-
# from fastapi.responses import JSONResponse, FileResponse
|
3 |
-
# import json
|
4 |
-
# import os
|
5 |
-
# from bs4 import BeautifulSoup
|
6 |
-
# from dotenv import load_dotenv
|
7 |
-
# import requests
|
8 |
-
# from transformers import pipeline
|
9 |
-
# from elevenlabs import ElevenLabs
|
10 |
-
# from langchain_groq import ChatGroq
|
11 |
-
# from langchain.schema import HumanMessage
|
12 |
-
|
13 |
-
# app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
|
14 |
-
|
15 |
-
# load_dotenv()
|
16 |
-
# GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
17 |
-
# ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
|
18 |
-
|
19 |
-
# llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
|
20 |
-
|
21 |
-
# JSON_FILE_PATH = "final_summary.json"
|
22 |
-
# AUDIO_FILE_PATH = "hindi_summary.mp3"
|
23 |
-
|
24 |
-
# def extract_titles_and_summaries(company_name, num_articles=10):
|
25 |
-
|
26 |
-
# url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
|
27 |
-
# try:
|
28 |
-
# response = requests.get(url)
|
29 |
-
# if response.status_code != 200:
|
30 |
-
# print(f"Failed to fetch the webpage. Status code: {response.status_code}")
|
31 |
-
# return []
|
32 |
-
|
33 |
-
# soup = BeautifulSoup(response.content, "html.parser")
|
34 |
-
# articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
|
35 |
-
# extracted_articles = []
|
36 |
-
|
37 |
-
# for article in articles:
|
38 |
-
# title_tag = article.find('h2').find('a')
|
39 |
-
# title = title_tag.get_text(strip=True) if title_tag else "No Title Found"
|
40 |
-
|
41 |
-
# summary_tag = article.find('p')
|
42 |
-
# summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
|
43 |
-
|
44 |
-
# extracted_articles.append({
|
45 |
-
# "Title": title,
|
46 |
-
# "Summary": summary
|
47 |
-
# })
|
48 |
-
|
49 |
-
# return {
|
50 |
-
# "Company": company_name,
|
51 |
-
# "Articles": extracted_articles
|
52 |
-
# }
|
53 |
-
# except Exception as e:
|
54 |
-
# print(f"An error occurred: {e}")
|
55 |
-
# return []
|
56 |
-
|
57 |
-
# def perform_sentiment_analysis(news_data):
|
58 |
-
# articles = news_data.get("Articles", [])
|
59 |
-
# pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
|
60 |
-
# sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
61 |
-
|
62 |
-
# for article in articles:
|
63 |
-
# content = f"{article['Title']} {article['Summary']}"
|
64 |
-
# sentiment_result = pipe(content)[0]
|
65 |
-
|
66 |
-
# sentiment_map = {
|
67 |
-
# "positive": "Positive",
|
68 |
-
# "negative": "Negative",
|
69 |
-
# "neutral": "Neutral",
|
70 |
-
# "very positive":"Positive",
|
71 |
-
# "very negative":"Negative"
|
72 |
-
# }
|
73 |
-
|
74 |
-
# sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
|
75 |
-
# score = float(sentiment_result["score"])
|
76 |
-
|
77 |
-
# article["Sentiment"] = sentiment
|
78 |
-
# article["Score"] = score
|
79 |
-
|
80 |
-
# if sentiment in sentiment_counts:
|
81 |
-
# sentiment_counts[sentiment] += 1
|
82 |
-
|
83 |
-
# return news_data, sentiment_counts
|
84 |
-
|
85 |
-
# def extract_topics_with_hf(news_data):
|
86 |
-
# structured_data = {
|
87 |
-
# "Company": news_data.get("Company", "Unknown"),
|
88 |
-
# "Articles": []
|
89 |
-
# }
|
90 |
-
# topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
|
91 |
-
# articles = news_data.get("Articles", [])
|
92 |
-
# for article in articles:
|
93 |
-
# content = f"{article['Title']} {article['Summary']}"
|
94 |
-
# topics_result = topic_pipe(content, top_k=3)
|
95 |
-
# topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
|
96 |
-
|
97 |
-
# structured_data["Articles"].append({
|
98 |
-
# "Title": article["Title"],
|
99 |
-
# "Summary": article["Summary"],
|
100 |
-
# "Sentiment": article.get("Sentiment", "Unknown"),
|
101 |
-
# "Score": article.get("Score", 0.0),
|
102 |
-
# "Topics": topics
|
103 |
-
# })
|
104 |
-
# return structured_data
|
105 |
-
|
106 |
-
# def generate_final_sentiment(news_data, sentiment_counts):
|
107 |
-
# company_name = news_data["Company"]
|
108 |
-
# total_articles = sum(sentiment_counts.values())
|
109 |
-
# combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
|
110 |
-
# prompt = f"""
|
111 |
-
# Based on the analysis of {total_articles} articles about the company "{company_name}":
|
112 |
-
# - Positive articles: {sentiment_counts['Positive']}
|
113 |
-
# - Negative articles: {sentiment_counts['Negative']}
|
114 |
-
# - Neutral articles: {sentiment_counts['Neutral']}
|
115 |
-
|
116 |
-
# The following are the summarized key points from the articles: "{combined_summaries}".
|
117 |
-
# Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
|
118 |
-
# Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
|
119 |
-
# """
|
120 |
-
# response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
|
121 |
-
# final_sentiment = response if response else "Sentiment analysis summary not available."
|
122 |
-
# return final_sentiment.content
|
123 |
-
|
124 |
-
# def extract_json(response):
|
125 |
-
# try:
|
126 |
-
# return json.loads(response)
|
127 |
-
# except json.JSONDecodeError:
|
128 |
-
# return {}
|
129 |
-
|
130 |
-
# def compare_articles(news_data, sentiment_counts):
|
131 |
-
# articles = news_data.get("Articles", [])
|
132 |
-
# all_topics = [set(article["Topics"]) for article in articles]
|
133 |
-
|
134 |
-
# common_topics = set.intersection(*all_topics) if all_topics else set()
|
135 |
-
|
136 |
-
# topics_prompt = f"""
|
137 |
-
# Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
|
138 |
-
# even if they are phrased differently. The topics from each article are:
|
139 |
-
# {all_topics}
|
140 |
-
|
141 |
-
# Respond **ONLY** with a JSON format:
|
142 |
-
# {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
|
143 |
-
# """
|
144 |
-
|
145 |
-
# response = llm.invoke([HumanMessage(content=topics_prompt)]).content
|
146 |
-
# contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] # Limit to 3 topics
|
147 |
-
|
148 |
-
# total_articles = sum(sentiment_counts.values())
|
149 |
-
# comparison_prompt = f"""
|
150 |
-
# Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
|
151 |
-
# - Sentiment distribution: {sentiment_counts}
|
152 |
-
# - Commonly discussed topics across articles: {contextual_common_topics}
|
153 |
-
|
154 |
-
# Consider the following:
|
155 |
-
# 1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
|
156 |
-
# 2. Overall implications for the company's reputation, stock potential, and public perception.
|
157 |
-
# 3. How sentiment varies across articles and its impact.
|
158 |
-
|
159 |
-
# Respond **ONLY** with a concise and insightful summary in this JSON format:
|
160 |
-
# {{
|
161 |
-
# "Coverage Differences": [
|
162 |
-
# {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
|
163 |
-
# {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}},
|
164 |
-
# ...
|
165 |
-
# ]
|
166 |
-
# }}
|
167 |
-
# """
|
168 |
-
|
169 |
-
# response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
|
170 |
-
# coverage_differences = extract_json(response).get("Coverage Differences", [])
|
171 |
-
|
172 |
-
# final_sentiment = generate_final_sentiment(news_data, sentiment_counts)
|
173 |
-
|
174 |
-
# return {
|
175 |
-
# "Company": news_data["Company"],
|
176 |
-
# "Articles": articles,
|
177 |
-
# "Comparative Sentiment Score": {
|
178 |
-
# "Sentiment Distribution": sentiment_counts,
|
179 |
-
# "Coverage Differences": coverage_differences,
|
180 |
-
# "Topic Overlap": {
|
181 |
-
# "Common Topics": contextual_common_topics,
|
182 |
-
# "Unique Topics": {
|
183 |
-
# f"Article {i+1}": list(topics - set(contextual_common_topics))
|
184 |
-
# for i, topics in enumerate(all_topics)
|
185 |
-
# }
|
186 |
-
# }
|
187 |
-
# },
|
188 |
-
# "Final Sentiment Analysis": final_sentiment
|
189 |
-
# }
|
190 |
-
|
191 |
-
|
192 |
-
# def generate_summary(company_name):
|
193 |
-
# news_articles = extract_titles_and_summaries(company_name)
|
194 |
-
# news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
|
195 |
-
# news_articles = extract_topics_with_hf(news_articles)
|
196 |
-
# final_summary = compare_articles(news_articles, sentiment_counts)
|
197 |
-
|
198 |
-
# hindi_prompt = f"Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
|
199 |
-
# hindi_summary = llm.invoke([HumanMessage(content=hindi_prompt)]).content
|
200 |
-
|
201 |
-
# client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
|
202 |
-
# audio = client.text_to_speech.convert(
|
203 |
-
# voice_id="9BWtsMINqrJLrRacOk9x",
|
204 |
-
# output_format="mp3_44100_128",
|
205 |
-
# text=hindi_summary,
|
206 |
-
# model_id="eleven_multilingual_v2",
|
207 |
-
# )
|
208 |
-
# with open(AUDIO_FILE_PATH, "wb") as f:
|
209 |
-
# f.write(b"".join(audio))
|
210 |
-
|
211 |
-
# return final_summary["Final Sentiment Analysis"]
|
212 |
-
|
213 |
-
# @app.get("/")
|
214 |
-
# def home():
|
215 |
-
# return {"message": "Welcome to the Company Sentiment API"}
|
216 |
-
|
217 |
-
# @app.get("/generateSummary")
|
218 |
-
# def get_summary(company_name: str = Query(..., description="Enter company name")):
|
219 |
-
# summary = generate_summary(company_name)
|
220 |
-
# return {"final_summary": summary}
|
221 |
-
|
222 |
-
# @app.get("/downloadJson")
|
223 |
-
# def download_json():
|
224 |
-
# return FileResponse(JSON_FILE_PATH, media_type="application/json", filename="final_summary.json")
|
225 |
-
|
226 |
-
# @app.get("/downloadHindiAudio")
|
227 |
-
# def download_audio():
|
228 |
-
# return FileResponse(AUDIO_FILE_PATH, media_type="audio/mp3", filename="hindi_summary.mp3")
|
229 |
-
|
230 |
-
# if __name__ == "__main__":
|
231 |
-
# import uvicorn
|
232 |
-
# uvicorn.run(app, host="0.0.0.0", port=8000)
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
from fastapi import FastAPI, Query,HTTPException
|
239 |
from fastapi.responses import JSONResponse, FileResponse
|
240 |
from elevenlabs import ElevenLabs
|
241 |
-
from bs4 import BeautifulSoup
|
242 |
-
import requests
|
243 |
from langchain.schema import HumanMessage
|
244 |
from langchain_groq import ChatGroq
|
245 |
import json
|
246 |
from dotenv import load_dotenv
|
247 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
load_dotenv()
|
249 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
250 |
ELEVEN_LABS_API_KEY=os.getenv('ELEVEN_LABS_API_KEY')
|
@@ -257,185 +25,24 @@ llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
|
|
257 |
JSON_FILE_PATH = "final_summary.json"
|
258 |
AUDIO_FILE_PATH = "hindi_summary.mp3"
|
259 |
|
260 |
-
def extract_titles_and_summaries(company_name, num_articles=10):
|
261 |
-
url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
|
262 |
-
try:
|
263 |
-
response = requests.get(url)
|
264 |
-
if response.status_code != 200:
|
265 |
-
print(f"Failed to fetch the webpage. Status code: {response.status_code}")
|
266 |
-
return []
|
267 |
-
|
268 |
-
soup = BeautifulSoup(response.content, "html.parser")
|
269 |
-
articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
|
270 |
-
extracted_articles = []
|
271 |
-
|
272 |
-
for article in articles:
|
273 |
-
title_tag = article.find('h2')
|
274 |
-
if title_tag:
|
275 |
-
link_tag = title_tag.find('a')
|
276 |
-
title = link_tag.get_text(strip=True) if link_tag else "No Title Found"
|
277 |
-
else:
|
278 |
-
title = "No Title Found"
|
279 |
-
|
280 |
-
summary_tag = article.find('p')
|
281 |
-
summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
|
282 |
-
|
283 |
-
extracted_articles.append({
|
284 |
-
"Title": title,
|
285 |
-
"Summary": summary
|
286 |
-
})
|
287 |
-
|
288 |
-
return {
|
289 |
-
"Company": company_name,
|
290 |
-
"Articles": extracted_articles
|
291 |
-
}
|
292 |
-
except Exception as e:
|
293 |
-
print(f"An error occurred: {e}")
|
294 |
-
return []
|
295 |
-
|
296 |
-
def perform_sentiment_analysis(news_data):
|
297 |
-
from transformers import pipeline
|
298 |
-
articles = news_data.get("Articles", [])
|
299 |
-
pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis",device=1)
|
300 |
-
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
301 |
-
|
302 |
-
for article in articles:
|
303 |
-
content = f"{article['Title']} {article['Summary']}"
|
304 |
-
sentiment_result = pipe(content)[0]
|
305 |
-
|
306 |
-
sentiment_map = {
|
307 |
-
"positive": "Positive",
|
308 |
-
"negative": "Negative",
|
309 |
-
"neutral": "Neutral",
|
310 |
-
"very positive": "Positive",
|
311 |
-
"very negative": "Negative"
|
312 |
-
}
|
313 |
-
|
314 |
-
sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
|
315 |
-
score = float(sentiment_result["score"])
|
316 |
-
|
317 |
-
article["Sentiment"] = sentiment
|
318 |
-
article["Score"] = score
|
319 |
-
|
320 |
-
if sentiment in sentiment_counts:
|
321 |
-
sentiment_counts[sentiment] += 1
|
322 |
-
|
323 |
-
return news_data, sentiment_counts
|
324 |
-
|
325 |
-
def extract_topics_with_hf(news_data):
|
326 |
-
from transformers import pipeline
|
327 |
-
structured_data = {
|
328 |
-
"Company": news_data.get("Company", "Unknown"),
|
329 |
-
"Articles": []
|
330 |
-
}
|
331 |
-
topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
|
332 |
-
articles = news_data.get("Articles", [])
|
333 |
-
for article in articles:
|
334 |
-
content = f"{article['Title']} {article['Summary']}"
|
335 |
-
topics_result = topic_pipe(content, top_k=3)
|
336 |
-
topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
|
337 |
-
|
338 |
-
structured_data["Articles"].append({
|
339 |
-
"Title": article["Title"],
|
340 |
-
"Summary": article["Summary"],
|
341 |
-
"Sentiment": article.get("Sentiment", "Unknown"),
|
342 |
-
"Score": article.get("Score", 0.0),
|
343 |
-
"Topics": topics
|
344 |
-
})
|
345 |
-
return structured_data
|
346 |
-
|
347 |
-
def generate_final_sentiment(news_data, sentiment_counts):
|
348 |
-
company_name = news_data["Company"]
|
349 |
-
total_articles = sum(sentiment_counts.values())
|
350 |
-
combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
|
351 |
-
prompt = f"""
|
352 |
-
Based on the analysis of {total_articles} articles about the company "{company_name}":
|
353 |
-
- Positive articles: {sentiment_counts['Positive']}
|
354 |
-
- Negative articles: {sentiment_counts['Negative']}
|
355 |
-
- Neutral articles: {sentiment_counts['Neutral']}
|
356 |
-
The following are the summarized key points from the articles: "{combined_summaries}".
|
357 |
-
Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
|
358 |
-
Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
|
359 |
-
"""
|
360 |
-
response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
|
361 |
-
final_sentiment = response if response else "Sentiment analysis summary not available."
|
362 |
-
return final_sentiment.content # it's a string
|
363 |
-
|
364 |
-
def extract_json(response):
|
365 |
-
try:
|
366 |
-
return json.loads(response)
|
367 |
-
except json.JSONDecodeError:
|
368 |
-
return {}
|
369 |
-
|
370 |
-
def compare_articles(news_data, sentiment_counts):
|
371 |
-
articles = news_data.get("Articles", [])
|
372 |
-
all_topics = [set(article["Topics"]) for article in articles]
|
373 |
-
common_topics = set.intersection(*all_topics) if all_topics else set()
|
374 |
-
topics_prompt = f"""
|
375 |
-
Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
|
376 |
-
even if they are phrased differently. The topics from each article are:
|
377 |
-
{all_topics}
|
378 |
-
|
379 |
-
Respond **ONLY** with a JSON format:
|
380 |
-
{{"CommonTopics": ["topic1", "topic2", "topic3"]}}
|
381 |
-
"""
|
382 |
-
response = llm.invoke([HumanMessage(content=topics_prompt)]).content
|
383 |
-
contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] # Limit to 3 topics
|
384 |
-
|
385 |
-
total_articles = sum(sentiment_counts.values())
|
386 |
-
comparison_prompt = f"""
|
387 |
-
Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
|
388 |
-
- Sentiment distribution: {sentiment_counts}
|
389 |
-
- Commonly discussed topics across articles: {contextual_common_topics}
|
390 |
-
|
391 |
-
Consider the following:
|
392 |
-
1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
|
393 |
-
2. Overall implications for the company's reputation, stock potential, and public perception.
|
394 |
-
3. How sentiment varies across articles and its impact.
|
395 |
-
|
396 |
-
Respond **ONLY** with a concise and insightful summary in this JSON format:
|
397 |
-
{{
|
398 |
-
"Coverage Differences": [
|
399 |
-
{{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
|
400 |
-
{{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}}
|
401 |
-
]
|
402 |
-
}}
|
403 |
-
"""
|
404 |
-
response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
|
405 |
-
coverage_differences = extract_json(response).get("Coverage Differences", [])
|
406 |
-
final_sentiment = generate_final_sentiment(news_data, sentiment_counts,llm)
|
407 |
-
return {
|
408 |
-
"Company": news_data["Company"],
|
409 |
-
"Articles": articles,
|
410 |
-
"Comparative Sentiment Score": {
|
411 |
-
"Sentiment Distribution": sentiment_counts,
|
412 |
-
"Coverage Differences": coverage_differences,
|
413 |
-
"Topic Overlap": {
|
414 |
-
"Common Topics": contextual_common_topics,
|
415 |
-
"Unique Topics": {
|
416 |
-
f"Article {i+1}": list(topics - set(contextual_common_topics))
|
417 |
-
for i, topics in enumerate(all_topics)
|
418 |
-
}
|
419 |
-
}
|
420 |
-
},
|
421 |
-
"Final Sentiment Analysis": final_sentiment
|
422 |
-
}
|
423 |
-
|
424 |
def generate_summary(company_name):
|
425 |
news_articles = extract_titles_and_summaries(company_name)
|
426 |
news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
|
427 |
news_articles = extract_topics_with_hf(news_articles)
|
428 |
final_summary = compare_articles(news_articles, sentiment_counts)
|
429 |
hindi_text = ""
|
430 |
-
|
431 |
if ELEVEN_LABS_API_KEY and VOICE_ID:
|
432 |
-
client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
|
433 |
-
|
434 |
hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
|
435 |
hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
|
436 |
hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
|
|
|
|
|
|
|
|
|
437 |
|
438 |
try:
|
|
|
439 |
audio = client.text_to_speech.convert(
|
440 |
voice_id=VOICE_ID,
|
441 |
output_format="mp3_44100_128",
|
@@ -443,13 +50,16 @@ def generate_summary(company_name):
|
|
443 |
model_id="eleven_multilingual_v2",
|
444 |
)
|
445 |
|
446 |
-
|
447 |
with open(AUDIO_FILE_PATH, "wb") as f:
|
448 |
-
f.write(
|
449 |
|
450 |
except Exception as e:
|
451 |
print(f"Error generating audio: {e}")
|
452 |
-
|
|
|
|
|
|
|
453 |
|
454 |
with open(JSON_FILE_PATH,"w") as f:
|
455 |
json.dump(final_summary,f,indent=4)
|
@@ -475,7 +85,7 @@ def generate_summary(company_name):
|
|
475 |
}
|
476 |
},
|
477 |
'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
|
478 |
-
'Hindi Summary':
|
479 |
}
|
480 |
|
481 |
@app.get("/")
|
@@ -497,4 +107,4 @@ def download_audio():
|
|
497 |
|
498 |
if __name__ == "__main__":
|
499 |
import uvicorn
|
500 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from fastapi import FastAPI, Query,HTTPException
|
2 |
from fastapi.responses import JSONResponse, FileResponse
|
3 |
from elevenlabs import ElevenLabs
|
|
|
|
|
4 |
from langchain.schema import HumanMessage
|
5 |
from langchain_groq import ChatGroq
|
6 |
import json
|
7 |
from dotenv import load_dotenv
|
8 |
import os
|
9 |
+
from utils import (
|
10 |
+
extract_titles_and_summaries,
|
11 |
+
perform_sentiment_analysis,
|
12 |
+
extract_topics_with_hf,
|
13 |
+
compare_articles
|
14 |
+
)
|
15 |
+
|
16 |
load_dotenv()
|
17 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
18 |
ELEVEN_LABS_API_KEY=os.getenv('ELEVEN_LABS_API_KEY')
|
|
|
25 |
JSON_FILE_PATH = "final_summary.json"
|
26 |
AUDIO_FILE_PATH = "hindi_summary.mp3"
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def generate_summary(company_name):
|
29 |
news_articles = extract_titles_and_summaries(company_name)
|
30 |
news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
|
31 |
news_articles = extract_topics_with_hf(news_articles)
|
32 |
final_summary = compare_articles(news_articles, sentiment_counts)
|
33 |
hindi_text = ""
|
34 |
+
hindi_summary=None
|
35 |
if ELEVEN_LABS_API_KEY and VOICE_ID:
|
|
|
|
|
36 |
hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
|
37 |
hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
|
38 |
hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
|
39 |
+
if hindi_text:
|
40 |
+
print(f"Generated Hindi Text: {hindi_text}")
|
41 |
+
else:
|
42 |
+
print("Hindi Text not generated")
|
43 |
|
44 |
try:
|
45 |
+
client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
|
46 |
audio = client.text_to_speech.convert(
|
47 |
voice_id=VOICE_ID,
|
48 |
output_format="mp3_44100_128",
|
|
|
50 |
model_id="eleven_multilingual_v2",
|
51 |
)
|
52 |
|
53 |
+
audio_bytes = b"".join(audio) # Store the audio content as binary data
|
54 |
with open(AUDIO_FILE_PATH, "wb") as f:
|
55 |
+
f.write(audio_bytes)
|
56 |
|
57 |
except Exception as e:
|
58 |
print(f"Error generating audio: {e}")
|
59 |
+
audio_bytes = None
|
60 |
+
if not os.path.exists(AUDIO_FILE_PATH):
|
61 |
+
print(f"Audio file could not be found at {AUDIO_FILE_PATH}.")
|
62 |
+
final_summary["Hindi Summary"] = audio_bytes
|
63 |
|
64 |
with open(JSON_FILE_PATH,"w") as f:
|
65 |
json.dump(final_summary,f,indent=4)
|
|
|
85 |
}
|
86 |
},
|
87 |
'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
|
88 |
+
'Hindi Summary': hindi_text
|
89 |
}
|
90 |
|
91 |
@app.get("/")
|
|
|
107 |
|
108 |
if __name__ == "__main__":
|
109 |
import uvicorn
|
110 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
main.py → app.py
RENAMED
File without changes
|
utils.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
-
import json
|
2 |
-
import requests
|
3 |
from bs4 import BeautifulSoup
|
|
|
4 |
from langchain.schema import HumanMessage
|
5 |
from langchain_groq import ChatGroq
|
|
|
6 |
from dotenv import load_dotenv
|
7 |
import os
|
|
|
8 |
|
9 |
load_dotenv()
|
10 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
|
|
11 |
llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
|
12 |
|
13 |
def extract_titles_and_summaries(company_name, num_articles=10):
|
@@ -75,57 +77,24 @@ def perform_sentiment_analysis(news_data):
|
|
75 |
|
76 |
return news_data, sentiment_counts
|
77 |
|
78 |
-
# def extract_topics_with_hf(news_data):
|
79 |
-
# structured_data = {
|
80 |
-
# "Company": news_data.get("Company", "Unknown"),
|
81 |
-
# "Articles": []
|
82 |
-
# }
|
83 |
-
# topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
|
84 |
-
# articles = news_data.get("Articles", [])
|
85 |
-
# for article in articles:
|
86 |
-
# content = f"{article['Title']} {article['Summary']}"
|
87 |
-
# topics_result = topic_pipe(content, top_k=3)
|
88 |
-
# topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
|
89 |
-
|
90 |
-
# structured_data["Articles"].append({
|
91 |
-
# "Title": article["Title"],
|
92 |
-
# "Summary": article["Summary"],
|
93 |
-
# "Sentiment": article.get("Sentiment", "Unknown"),
|
94 |
-
# "Score": article.get("Score", 0.0),
|
95 |
-
# "Topics": topics
|
96 |
-
# })
|
97 |
-
# return structured_data
|
98 |
-
|
99 |
def extract_topics_with_hf(news_data):
|
100 |
structured_data = {
|
101 |
"Company": news_data.get("Company", "Unknown"),
|
102 |
"Articles": []
|
103 |
}
|
|
|
104 |
articles = news_data.get("Articles", [])
|
105 |
for article in articles:
|
106 |
content = f"{article['Title']} {article['Summary']}"
|
107 |
-
|
108 |
-
|
109 |
-
Analyze the following content: "{content}"
|
110 |
-
Extract and return **exactly three key topics** most relevant to this content.
|
111 |
-
The topics should be of one word after analyzing the content.
|
112 |
-
Respond in a JSON format like this:
|
113 |
-
{{"Topics": ["topic1", "topic2", "topic3"]}}
|
114 |
-
"""
|
115 |
-
try:
|
116 |
-
# Use Groq AI to invoke the model
|
117 |
-
response = llm.invoke([HumanMessage(content=prompt)]).content
|
118 |
-
topics_result = json.loads(response).get("Topics", ["Unknown"]) # Parse JSON response
|
119 |
-
except Exception as e:
|
120 |
-
print(f"Error while extracting topics: {e}")
|
121 |
-
topics_result = ["Unknown"]
|
122 |
|
123 |
structured_data["Articles"].append({
|
124 |
"Title": article["Title"],
|
125 |
"Summary": article["Summary"],
|
126 |
"Sentiment": article.get("Sentiment", "Unknown"),
|
127 |
"Score": article.get("Score", 0.0),
|
128 |
-
"Topics":
|
129 |
})
|
130 |
return structured_data
|
131 |
|
|
|
|
|
|
|
1 |
from bs4 import BeautifulSoup
|
2 |
+
import requests
|
3 |
from langchain.schema import HumanMessage
|
4 |
from langchain_groq import ChatGroq
|
5 |
+
import json
|
6 |
from dotenv import load_dotenv
|
7 |
import os
|
8 |
+
from transformers import pipeline
|
9 |
|
10 |
load_dotenv()
|
11 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
12 |
+
|
13 |
llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
|
14 |
|
15 |
def extract_titles_and_summaries(company_name, num_articles=10):
|
|
|
77 |
|
78 |
return news_data, sentiment_counts
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
def extract_topics_with_hf(news_data):
|
81 |
structured_data = {
|
82 |
"Company": news_data.get("Company", "Unknown"),
|
83 |
"Articles": []
|
84 |
}
|
85 |
+
topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
|
86 |
articles = news_data.get("Articles", [])
|
87 |
for article in articles:
|
88 |
content = f"{article['Title']} {article['Summary']}"
|
89 |
+
topics_result = topic_pipe(content, top_k=3)
|
90 |
+
topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
structured_data["Articles"].append({
|
93 |
"Title": article["Title"],
|
94 |
"Summary": article["Summary"],
|
95 |
"Sentiment": article.get("Sentiment", "Unknown"),
|
96 |
"Score": article.get("Score", 0.0),
|
97 |
+
"Topics": topics
|
98 |
})
|
99 |
return structured_data
|
100 |
|