Divyansh Kushwaha commited on
Commit
0021684
·
1 Parent(s): 53dd0ab
Files changed (3) hide show
  1. api.py +21 -411
  2. main.py → app.py +0 -0
  3. utils.py +8 -39
api.py CHANGED
@@ -1,250 +1,18 @@
1
- # from fastapi import FastAPI, Query
2
- # from fastapi.responses import JSONResponse, FileResponse
3
- # import json
4
- # import os
5
- # from bs4 import BeautifulSoup
6
- # from dotenv import load_dotenv
7
- # import requests
8
- # from transformers import pipeline
9
- # from elevenlabs import ElevenLabs
10
- # from langchain_groq import ChatGroq
11
- # from langchain.schema import HumanMessage
12
-
13
- # app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
14
-
15
- # load_dotenv()
16
- # GROQ_API_KEY = os.getenv("GROQ_API_KEY")
17
- # ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
18
-
19
- # llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
20
-
21
- # JSON_FILE_PATH = "final_summary.json"
22
- # AUDIO_FILE_PATH = "hindi_summary.mp3"
23
-
24
- # def extract_titles_and_summaries(company_name, num_articles=10):
25
-
26
- # url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
27
- # try:
28
- # response = requests.get(url)
29
- # if response.status_code != 200:
30
- # print(f"Failed to fetch the webpage. Status code: {response.status_code}")
31
- # return []
32
-
33
- # soup = BeautifulSoup(response.content, "html.parser")
34
- # articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
35
- # extracted_articles = []
36
-
37
- # for article in articles:
38
- # title_tag = article.find('h2').find('a')
39
- # title = title_tag.get_text(strip=True) if title_tag else "No Title Found"
40
-
41
- # summary_tag = article.find('p')
42
- # summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
43
-
44
- # extracted_articles.append({
45
- # "Title": title,
46
- # "Summary": summary
47
- # })
48
-
49
- # return {
50
- # "Company": company_name,
51
- # "Articles": extracted_articles
52
- # }
53
- # except Exception as e:
54
- # print(f"An error occurred: {e}")
55
- # return []
56
-
57
- # def perform_sentiment_analysis(news_data):
58
- # articles = news_data.get("Articles", [])
59
- # pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
60
- # sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
61
-
62
- # for article in articles:
63
- # content = f"{article['Title']} {article['Summary']}"
64
- # sentiment_result = pipe(content)[0]
65
-
66
- # sentiment_map = {
67
- # "positive": "Positive",
68
- # "negative": "Negative",
69
- # "neutral": "Neutral",
70
- # "very positive":"Positive",
71
- # "very negative":"Negative"
72
- # }
73
-
74
- # sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
75
- # score = float(sentiment_result["score"])
76
-
77
- # article["Sentiment"] = sentiment
78
- # article["Score"] = score
79
-
80
- # if sentiment in sentiment_counts:
81
- # sentiment_counts[sentiment] += 1
82
-
83
- # return news_data, sentiment_counts
84
-
85
- # def extract_topics_with_hf(news_data):
86
- # structured_data = {
87
- # "Company": news_data.get("Company", "Unknown"),
88
- # "Articles": []
89
- # }
90
- # topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
91
- # articles = news_data.get("Articles", [])
92
- # for article in articles:
93
- # content = f"{article['Title']} {article['Summary']}"
94
- # topics_result = topic_pipe(content, top_k=3)
95
- # topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
96
-
97
- # structured_data["Articles"].append({
98
- # "Title": article["Title"],
99
- # "Summary": article["Summary"],
100
- # "Sentiment": article.get("Sentiment", "Unknown"),
101
- # "Score": article.get("Score", 0.0),
102
- # "Topics": topics
103
- # })
104
- # return structured_data
105
-
106
- # def generate_final_sentiment(news_data, sentiment_counts):
107
- # company_name = news_data["Company"]
108
- # total_articles = sum(sentiment_counts.values())
109
- # combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
110
- # prompt = f"""
111
- # Based on the analysis of {total_articles} articles about the company "{company_name}":
112
- # - Positive articles: {sentiment_counts['Positive']}
113
- # - Negative articles: {sentiment_counts['Negative']}
114
- # - Neutral articles: {sentiment_counts['Neutral']}
115
-
116
- # The following are the summarized key points from the articles: "{combined_summaries}".
117
- # Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
118
- # Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
119
- # """
120
- # response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
121
- # final_sentiment = response if response else "Sentiment analysis summary not available."
122
- # return final_sentiment.content
123
-
124
- # def extract_json(response):
125
- # try:
126
- # return json.loads(response)
127
- # except json.JSONDecodeError:
128
- # return {}
129
-
130
- # def compare_articles(news_data, sentiment_counts):
131
- # articles = news_data.get("Articles", [])
132
- # all_topics = [set(article["Topics"]) for article in articles]
133
-
134
- # common_topics = set.intersection(*all_topics) if all_topics else set()
135
-
136
- # topics_prompt = f"""
137
- # Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
138
- # even if they are phrased differently. The topics from each article are:
139
- # {all_topics}
140
-
141
- # Respond **ONLY** with a JSON format:
142
- # {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
143
- # """
144
-
145
- # response = llm.invoke([HumanMessage(content=topics_prompt)]).content
146
- # contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] # Limit to 3 topics
147
-
148
- # total_articles = sum(sentiment_counts.values())
149
- # comparison_prompt = f"""
150
- # Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
151
- # - Sentiment distribution: {sentiment_counts}
152
- # - Commonly discussed topics across articles: {contextual_common_topics}
153
-
154
- # Consider the following:
155
- # 1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
156
- # 2. Overall implications for the company's reputation, stock potential, and public perception.
157
- # 3. How sentiment varies across articles and its impact.
158
-
159
- # Respond **ONLY** with a concise and insightful summary in this JSON format:
160
- # {{
161
- # "Coverage Differences": [
162
- # {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
163
- # {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}},
164
- # ...
165
- # ]
166
- # }}
167
- # """
168
-
169
- # response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
170
- # coverage_differences = extract_json(response).get("Coverage Differences", [])
171
-
172
- # final_sentiment = generate_final_sentiment(news_data, sentiment_counts)
173
-
174
- # return {
175
- # "Company": news_data["Company"],
176
- # "Articles": articles,
177
- # "Comparative Sentiment Score": {
178
- # "Sentiment Distribution": sentiment_counts,
179
- # "Coverage Differences": coverage_differences,
180
- # "Topic Overlap": {
181
- # "Common Topics": contextual_common_topics,
182
- # "Unique Topics": {
183
- # f"Article {i+1}": list(topics - set(contextual_common_topics))
184
- # for i, topics in enumerate(all_topics)
185
- # }
186
- # }
187
- # },
188
- # "Final Sentiment Analysis": final_sentiment
189
- # }
190
-
191
-
192
- # def generate_summary(company_name):
193
- # news_articles = extract_titles_and_summaries(company_name)
194
- # news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
195
- # news_articles = extract_topics_with_hf(news_articles)
196
- # final_summary = compare_articles(news_articles, sentiment_counts)
197
-
198
- # hindi_prompt = f"Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
199
- # hindi_summary = llm.invoke([HumanMessage(content=hindi_prompt)]).content
200
-
201
- # client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
202
- # audio = client.text_to_speech.convert(
203
- # voice_id="9BWtsMINqrJLrRacOk9x",
204
- # output_format="mp3_44100_128",
205
- # text=hindi_summary,
206
- # model_id="eleven_multilingual_v2",
207
- # )
208
- # with open(AUDIO_FILE_PATH, "wb") as f:
209
- # f.write(b"".join(audio))
210
-
211
- # return final_summary["Final Sentiment Analysis"]
212
-
213
- # @app.get("/")
214
- # def home():
215
- # return {"message": "Welcome to the Company Sentiment API"}
216
-
217
- # @app.get("/generateSummary")
218
- # def get_summary(company_name: str = Query(..., description="Enter company name")):
219
- # summary = generate_summary(company_name)
220
- # return {"final_summary": summary}
221
-
222
- # @app.get("/downloadJson")
223
- # def download_json():
224
- # return FileResponse(JSON_FILE_PATH, media_type="application/json", filename="final_summary.json")
225
-
226
- # @app.get("/downloadHindiAudio")
227
- # def download_audio():
228
- # return FileResponse(AUDIO_FILE_PATH, media_type="audio/mp3", filename="hindi_summary.mp3")
229
-
230
- # if __name__ == "__main__":
231
- # import uvicorn
232
- # uvicorn.run(app, host="0.0.0.0", port=8000)
233
-
234
-
235
-
236
-
237
-
238
  from fastapi import FastAPI, Query,HTTPException
239
  from fastapi.responses import JSONResponse, FileResponse
240
  from elevenlabs import ElevenLabs
241
- from bs4 import BeautifulSoup
242
- import requests
243
  from langchain.schema import HumanMessage
244
  from langchain_groq import ChatGroq
245
  import json
246
  from dotenv import load_dotenv
247
  import os
 
 
 
 
 
 
 
248
  load_dotenv()
249
  GROQ_API_KEY = os.getenv('GROQ_API_KEY')
250
  ELEVEN_LABS_API_KEY=os.getenv('ELEVEN_LABS_API_KEY')
@@ -257,185 +25,24 @@ llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
257
  JSON_FILE_PATH = "final_summary.json"
258
  AUDIO_FILE_PATH = "hindi_summary.mp3"
259
 
260
- def extract_titles_and_summaries(company_name, num_articles=10):
261
- url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
262
- try:
263
- response = requests.get(url)
264
- if response.status_code != 200:
265
- print(f"Failed to fetch the webpage. Status code: {response.status_code}")
266
- return []
267
-
268
- soup = BeautifulSoup(response.content, "html.parser")
269
- articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
270
- extracted_articles = []
271
-
272
- for article in articles:
273
- title_tag = article.find('h2')
274
- if title_tag:
275
- link_tag = title_tag.find('a')
276
- title = link_tag.get_text(strip=True) if link_tag else "No Title Found"
277
- else:
278
- title = "No Title Found"
279
-
280
- summary_tag = article.find('p')
281
- summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
282
-
283
- extracted_articles.append({
284
- "Title": title,
285
- "Summary": summary
286
- })
287
-
288
- return {
289
- "Company": company_name,
290
- "Articles": extracted_articles
291
- }
292
- except Exception as e:
293
- print(f"An error occurred: {e}")
294
- return []
295
-
296
- def perform_sentiment_analysis(news_data):
297
- from transformers import pipeline
298
- articles = news_data.get("Articles", [])
299
- pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis",device=1)
300
- sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
301
-
302
- for article in articles:
303
- content = f"{article['Title']} {article['Summary']}"
304
- sentiment_result = pipe(content)[0]
305
-
306
- sentiment_map = {
307
- "positive": "Positive",
308
- "negative": "Negative",
309
- "neutral": "Neutral",
310
- "very positive": "Positive",
311
- "very negative": "Negative"
312
- }
313
-
314
- sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
315
- score = float(sentiment_result["score"])
316
-
317
- article["Sentiment"] = sentiment
318
- article["Score"] = score
319
-
320
- if sentiment in sentiment_counts:
321
- sentiment_counts[sentiment] += 1
322
-
323
- return news_data, sentiment_counts
324
-
325
- def extract_topics_with_hf(news_data):
326
- from transformers import pipeline
327
- structured_data = {
328
- "Company": news_data.get("Company", "Unknown"),
329
- "Articles": []
330
- }
331
- topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
332
- articles = news_data.get("Articles", [])
333
- for article in articles:
334
- content = f"{article['Title']} {article['Summary']}"
335
- topics_result = topic_pipe(content, top_k=3)
336
- topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
337
-
338
- structured_data["Articles"].append({
339
- "Title": article["Title"],
340
- "Summary": article["Summary"],
341
- "Sentiment": article.get("Sentiment", "Unknown"),
342
- "Score": article.get("Score", 0.0),
343
- "Topics": topics
344
- })
345
- return structured_data
346
-
347
- def generate_final_sentiment(news_data, sentiment_counts):
348
- company_name = news_data["Company"]
349
- total_articles = sum(sentiment_counts.values())
350
- combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
351
- prompt = f"""
352
- Based on the analysis of {total_articles} articles about the company "{company_name}":
353
- - Positive articles: {sentiment_counts['Positive']}
354
- - Negative articles: {sentiment_counts['Negative']}
355
- - Neutral articles: {sentiment_counts['Neutral']}
356
- The following are the summarized key points from the articles: "{combined_summaries}".
357
- Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
358
- Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
359
- """
360
- response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
361
- final_sentiment = response if response else "Sentiment analysis summary not available."
362
- return final_sentiment.content # it's a string
363
-
364
- def extract_json(response):
365
- try:
366
- return json.loads(response)
367
- except json.JSONDecodeError:
368
- return {}
369
-
370
- def compare_articles(news_data, sentiment_counts):
371
- articles = news_data.get("Articles", [])
372
- all_topics = [set(article["Topics"]) for article in articles]
373
- common_topics = set.intersection(*all_topics) if all_topics else set()
374
- topics_prompt = f"""
375
- Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
376
- even if they are phrased differently. The topics from each article are:
377
- {all_topics}
378
-
379
- Respond **ONLY** with a JSON format:
380
- {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
381
- """
382
- response = llm.invoke([HumanMessage(content=topics_prompt)]).content
383
- contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] # Limit to 3 topics
384
-
385
- total_articles = sum(sentiment_counts.values())
386
- comparison_prompt = f"""
387
- Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
388
- - Sentiment distribution: {sentiment_counts}
389
- - Commonly discussed topics across articles: {contextual_common_topics}
390
-
391
- Consider the following:
392
- 1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
393
- 2. Overall implications for the company's reputation, stock potential, and public perception.
394
- 3. How sentiment varies across articles and its impact.
395
-
396
- Respond **ONLY** with a concise and insightful summary in this JSON format:
397
- {{
398
- "Coverage Differences": [
399
- {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
400
- {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}}
401
- ]
402
- }}
403
- """
404
- response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
405
- coverage_differences = extract_json(response).get("Coverage Differences", [])
406
- final_sentiment = generate_final_sentiment(news_data, sentiment_counts,llm)
407
- return {
408
- "Company": news_data["Company"],
409
- "Articles": articles,
410
- "Comparative Sentiment Score": {
411
- "Sentiment Distribution": sentiment_counts,
412
- "Coverage Differences": coverage_differences,
413
- "Topic Overlap": {
414
- "Common Topics": contextual_common_topics,
415
- "Unique Topics": {
416
- f"Article {i+1}": list(topics - set(contextual_common_topics))
417
- for i, topics in enumerate(all_topics)
418
- }
419
- }
420
- },
421
- "Final Sentiment Analysis": final_sentiment
422
- }
423
-
424
  def generate_summary(company_name):
425
  news_articles = extract_titles_and_summaries(company_name)
426
  news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
427
  news_articles = extract_topics_with_hf(news_articles)
428
  final_summary = compare_articles(news_articles, sentiment_counts)
429
  hindi_text = ""
430
-
431
  if ELEVEN_LABS_API_KEY and VOICE_ID:
432
- client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
433
-
434
  hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
435
  hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
436
  hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
 
 
 
 
437
 
438
  try:
 
439
  audio = client.text_to_speech.convert(
440
  voice_id=VOICE_ID,
441
  output_format="mp3_44100_128",
@@ -443,13 +50,16 @@ def generate_summary(company_name):
443
  model_id="eleven_multilingual_v2",
444
  )
445
 
446
- hindi_summary = b"".join(audio) # Store the audio content as binary data
447
  with open(AUDIO_FILE_PATH, "wb") as f:
448
- f.write(b"".join(audio))
449
 
450
  except Exception as e:
451
  print(f"Error generating audio: {e}")
452
- hindi_summary = None
 
 
 
453
 
454
  with open(JSON_FILE_PATH,"w") as f:
455
  json.dump(final_summary,f,indent=4)
@@ -475,7 +85,7 @@ def generate_summary(company_name):
475
  }
476
  },
477
  'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
478
- 'Hindi Summary': hindi_summary
479
  }
480
 
481
  @app.get("/")
@@ -497,4 +107,4 @@ def download_audio():
497
 
498
  if __name__ == "__main__":
499
  import uvicorn
500
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, Query,HTTPException
2
  from fastapi.responses import JSONResponse, FileResponse
3
  from elevenlabs import ElevenLabs
 
 
4
  from langchain.schema import HumanMessage
5
  from langchain_groq import ChatGroq
6
  import json
7
  from dotenv import load_dotenv
8
  import os
9
+ from utils import (
10
+ extract_titles_and_summaries,
11
+ perform_sentiment_analysis,
12
+ extract_topics_with_hf,
13
+ compare_articles
14
+ )
15
+
16
  load_dotenv()
17
  GROQ_API_KEY = os.getenv('GROQ_API_KEY')
18
  ELEVEN_LABS_API_KEY=os.getenv('ELEVEN_LABS_API_KEY')
 
25
  JSON_FILE_PATH = "final_summary.json"
26
  AUDIO_FILE_PATH = "hindi_summary.mp3"
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def generate_summary(company_name):
29
  news_articles = extract_titles_and_summaries(company_name)
30
  news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
31
  news_articles = extract_topics_with_hf(news_articles)
32
  final_summary = compare_articles(news_articles, sentiment_counts)
33
  hindi_text = ""
34
+ hindi_summary=None
35
  if ELEVEN_LABS_API_KEY and VOICE_ID:
 
 
36
  hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
37
  hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
38
  hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
39
+ if hindi_text:
40
+ print(f"Generated Hindi Text: {hindi_text}")
41
+ else:
42
+ print("Hindi Text not generated")
43
 
44
  try:
45
+ client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
46
  audio = client.text_to_speech.convert(
47
  voice_id=VOICE_ID,
48
  output_format="mp3_44100_128",
 
50
  model_id="eleven_multilingual_v2",
51
  )
52
 
53
+ audio_bytes = b"".join(audio) # Store the audio content as binary data
54
  with open(AUDIO_FILE_PATH, "wb") as f:
55
+ f.write(audio_bytes)
56
 
57
  except Exception as e:
58
  print(f"Error generating audio: {e}")
59
+ audio_bytes = None
60
+ if not os.path.exists(AUDIO_FILE_PATH):
61
+ print(f"Audio file could not be found at {AUDIO_FILE_PATH}.")
62
+ final_summary["Hindi Summary"] = audio_bytes
63
 
64
  with open(JSON_FILE_PATH,"w") as f:
65
  json.dump(final_summary,f,indent=4)
 
85
  }
86
  },
87
  'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
88
+ 'Hindi Summary': hindi_text
89
  }
90
 
91
  @app.get("/")
 
107
 
108
  if __name__ == "__main__":
109
  import uvicorn
110
+ uvicorn.run(app, host="0.0.0.0", port=8000)
main.py → app.py RENAMED
File without changes
utils.py CHANGED
@@ -1,13 +1,15 @@
1
- import json
2
- import requests
3
  from bs4 import BeautifulSoup
 
4
  from langchain.schema import HumanMessage
5
  from langchain_groq import ChatGroq
 
6
  from dotenv import load_dotenv
7
  import os
 
8
 
9
  load_dotenv()
10
  GROQ_API_KEY = os.getenv('GROQ_API_KEY')
 
11
  llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
12
 
13
  def extract_titles_and_summaries(company_name, num_articles=10):
@@ -75,57 +77,24 @@ def perform_sentiment_analysis(news_data):
75
 
76
  return news_data, sentiment_counts
77
 
78
- # def extract_topics_with_hf(news_data):
79
- # structured_data = {
80
- # "Company": news_data.get("Company", "Unknown"),
81
- # "Articles": []
82
- # }
83
- # topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
84
- # articles = news_data.get("Articles", [])
85
- # for article in articles:
86
- # content = f"{article['Title']} {article['Summary']}"
87
- # topics_result = topic_pipe(content, top_k=3)
88
- # topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
89
-
90
- # structured_data["Articles"].append({
91
- # "Title": article["Title"],
92
- # "Summary": article["Summary"],
93
- # "Sentiment": article.get("Sentiment", "Unknown"),
94
- # "Score": article.get("Score", 0.0),
95
- # "Topics": topics
96
- # })
97
- # return structured_data
98
-
99
  def extract_topics_with_hf(news_data):
100
  structured_data = {
101
  "Company": news_data.get("Company", "Unknown"),
102
  "Articles": []
103
  }
 
104
  articles = news_data.get("Articles", [])
105
  for article in articles:
106
  content = f"{article['Title']} {article['Summary']}"
107
- # Define the prompt for Groq AI
108
- prompt = f"""
109
- Analyze the following content: "{content}"
110
- Extract and return **exactly three key topics** most relevant to this content.
111
- The topics should be of one word after analyzing the content.
112
- Respond in a JSON format like this:
113
- {{"Topics": ["topic1", "topic2", "topic3"]}}
114
- """
115
- try:
116
- # Use Groq AI to invoke the model
117
- response = llm.invoke([HumanMessage(content=prompt)]).content
118
- topics_result = json.loads(response).get("Topics", ["Unknown"]) # Parse JSON response
119
- except Exception as e:
120
- print(f"Error while extracting topics: {e}")
121
- topics_result = ["Unknown"]
122
 
123
  structured_data["Articles"].append({
124
  "Title": article["Title"],
125
  "Summary": article["Summary"],
126
  "Sentiment": article.get("Sentiment", "Unknown"),
127
  "Score": article.get("Score", 0.0),
128
- "Topics": topics_result
129
  })
130
  return structured_data
131
 
 
 
 
1
  from bs4 import BeautifulSoup
2
+ import requests
3
  from langchain.schema import HumanMessage
4
  from langchain_groq import ChatGroq
5
+ import json
6
  from dotenv import load_dotenv
7
  import os
8
+ from transformers import pipeline
9
 
10
  load_dotenv()
11
  GROQ_API_KEY = os.getenv('GROQ_API_KEY')
12
+
13
  llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
14
 
15
  def extract_titles_and_summaries(company_name, num_articles=10):
 
77
 
78
  return news_data, sentiment_counts
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def extract_topics_with_hf(news_data):
81
  structured_data = {
82
  "Company": news_data.get("Company", "Unknown"),
83
  "Articles": []
84
  }
85
+ topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
86
  articles = news_data.get("Articles", [])
87
  for article in articles:
88
  content = f"{article['Title']} {article['Summary']}"
89
+ topics_result = topic_pipe(content, top_k=3)
90
+ topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  structured_data["Articles"].append({
93
  "Title": article["Title"],
94
  "Summary": article["Summary"],
95
  "Sentiment": article.get("Sentiment", "Unknown"),
96
  "Score": article.get("Score", 0.0),
97
+ "Topics": topics
98
  })
99
  return structured_data
100