Krish Patel commited on
Commit
207a2e4
·
1 Parent(s): 32fef5a

Debugging 2

Browse files
Files changed (2) hide show
  1. app.py +3 -2
  2. final.py +19 -88
app.py CHANGED
@@ -42,8 +42,6 @@
42
  # st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")
43
 
44
  import streamlit as st
45
- from final import *
46
- import pandas as pd
47
 
48
  # Page configuration
49
  st.set_page_config(
@@ -52,6 +50,9 @@ st.set_page_config(
52
  layout="wide"
53
  )
54
 
 
 
 
55
  # Cache model loading
56
  @st.cache_resource
57
  def initialize_models():
 
42
  # st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")
43
 
44
  import streamlit as st
 
 
45
 
46
  # Page configuration
47
  st.set_page_config(
 
50
  layout="wide"
51
  )
52
 
53
+ import pandas as pd
54
+ from final import *
55
+
56
  # Cache model loading
57
  @st.cache_resource
58
  def initialize_models():
final.py CHANGED
@@ -269,13 +269,6 @@
269
  # if __name__ == "__main__":
270
  # main()
271
 
272
- import streamlit as st
273
- # Page config
274
- st.set_page_config(
275
- page_title="Nexus NLP News Classifier",
276
- page_icon="📰",
277
- layout="wide"
278
- )
279
  import torch
280
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, DebertaV2Tokenizer
281
  import networkx as nx
@@ -286,26 +279,21 @@ import json
286
  import os
287
  import dotenv
288
 
289
-
290
  # Load environment variables
291
  dotenv.load_dotenv()
292
 
293
- # Load models and resources
294
- @st.cache_resource
295
- def load_nlp():
296
- return spacy.load("en_core_web_sm")
297
-
298
- @st.cache_resource
299
- def load_model():
300
  model_path = "./results/checkpoint-753"
301
  tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
302
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
303
  model.eval()
304
- return tokenizer, model
305
 
306
- @st.cache_resource
307
  def load_knowledge_graph():
308
- graph_path = "./knowledge_graph_final.pkl"
 
309
  with open(graph_path, 'rb') as f:
310
  graph_data = pickle.load(f)
311
  knowledge_graph = nx.DiGraph()
@@ -316,16 +304,13 @@ def load_knowledge_graph():
316
  return knowledge_graph
317
 
318
  def setup_gemini():
 
319
  genai.configure(api_key=os.getenv("GEMINI_API"))
320
  model = genai.GenerativeModel('gemini-pro')
321
  return model
322
 
323
- # Initialize resources
324
- nlp = load_nlp()
325
- tokenizer, model = load_model()
326
- knowledge_graph = load_knowledge_graph()
327
-
328
- def predict_with_model(text):
329
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
330
  with torch.no_grad():
331
  outputs = model(**inputs)
@@ -334,13 +319,15 @@ def predict_with_model(text):
334
  confidence = probabilities[0][predicted_label].item() * 100
335
  return "FAKE" if predicted_label == 1 else "REAL", confidence
336
 
337
- def extract_entities(text):
 
338
  doc = nlp(text)
339
  entities = [(ent.text, ent.label_) for ent in doc.ents]
340
  return entities
341
 
342
- def update_knowledge_graph(text, is_real):
343
- entities = extract_entities(text)
 
344
  for entity, entity_type in entities:
345
  if not knowledge_graph.has_node(entity):
346
  knowledge_graph.add_node(
@@ -367,8 +354,9 @@ def update_knowledge_graph(text, is_real):
367
  else:
368
  knowledge_graph[entity1][entity2]['weight'] += 1
369
 
370
- def predict_with_knowledge_graph(text):
371
- entities = extract_entities(text)
 
372
  real_score = 0
373
  fake_score = 0
374
 
@@ -393,6 +381,7 @@ def predict_with_knowledge_graph(text):
393
  return "FAKE", confidence
394
 
395
  def analyze_content_gemini(model, text):
 
396
  prompt = f"""Analyze this news text and return a JSON object with the following structure:
397
  {{
398
  "gemini_analysis": {{
@@ -451,62 +440,4 @@ def analyze_content_gemini(model, text):
451
  "reasoning": ["Analysis failed to generate valid JSON"]
452
  }
453
  }
454
-
455
- def main():
456
- st.title("📰 Nexus NLP News Classifier")
457
- st.write("Enter news text below to analyze its authenticity")
458
-
459
- # Query parameters for API functionality
460
- query_params = st.query_params
461
- if "text" in query_params:
462
- text_input = query_params["text"][0]
463
- ml_prediction, ml_confidence = predict_with_model(text_input)
464
- st.json({"prediction": ml_prediction, "confidence": ml_confidence})
465
- return
466
-
467
- # Regular UI
468
- news_text = st.text_area("News Text", height=200)
469
-
470
- if st.button("Analyze"):
471
- if news_text:
472
- with st.spinner("Analyzing..."):
473
- # Get all predictions
474
- ml_prediction, ml_confidence = predict_with_model(news_text)
475
- kg_prediction, kg_confidence = predict_with_knowledge_graph(news_text)
476
- update_knowledge_graph(news_text, ml_prediction == "REAL")
477
-
478
- gemini_model = setup_gemini()
479
- gemini_result = analyze_content_gemini(gemini_model, news_text)
480
-
481
- # Display results
482
- col1, col2, col3 = st.columns(3)
483
-
484
- with col1:
485
- st.subheader("ML Model Analysis")
486
- st.metric("Prediction", ml_prediction)
487
- st.metric("Confidence", f"{ml_confidence:.2f}%")
488
-
489
- with col2:
490
- st.subheader("Knowledge Graph Analysis")
491
- st.metric("Prediction", kg_prediction)
492
- st.metric("Confidence", f"{kg_confidence:.2f}%")
493
-
494
- with col3:
495
- st.subheader("Gemini Analysis")
496
- gemini_pred = gemini_result["gemini_analysis"]["predicted_classification"]
497
- gemini_conf = gemini_result["gemini_analysis"]["confidence_score"]
498
- st.metric("Prediction", gemini_pred)
499
- st.metric("Confidence", f"{gemini_conf}%")
500
-
501
- with st.expander("View Detailed Analysis"):
502
- st.json(gemini_result)
503
-
504
- with st.expander("Named Entities"):
505
- entities = extract_entities(news_text)
506
- st.write(entities)
507
-
508
- else:
509
- st.warning("Please enter some text to analyze")
510
-
511
- if __name__ == "__main__":
512
- main()
 
269
  # if __name__ == "__main__":
270
  # main()
271
 
 
 
 
 
 
 
 
272
  import torch
273
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, DebertaV2Tokenizer
274
  import networkx as nx
 
279
  import os
280
  import dotenv
281
 
 
282
  # Load environment variables
283
  dotenv.load_dotenv()
284
 
285
+ def load_models():
286
+ """Load all required ML models"""
287
+ nlp = spacy.load("en_core_web_sm")
 
 
 
 
288
  model_path = "./results/checkpoint-753"
289
  tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
290
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
291
  model.eval()
292
+ return nlp, tokenizer, model
293
 
 
294
  def load_knowledge_graph():
295
+ """Load and initialize knowledge graph"""
296
+ graph_path = "./models/knowledge_graph.pkl"
297
  with open(graph_path, 'rb') as f:
298
  graph_data = pickle.load(f)
299
  knowledge_graph = nx.DiGraph()
 
304
  return knowledge_graph
305
 
306
  def setup_gemini():
307
+ """Initialize Gemini model"""
308
  genai.configure(api_key=os.getenv("GEMINI_API"))
309
  model = genai.GenerativeModel('gemini-pro')
310
  return model
311
 
312
+ def predict_with_model(text, tokenizer, model):
313
+ """Make predictions using the ML model"""
 
 
 
 
314
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
315
  with torch.no_grad():
316
  outputs = model(**inputs)
 
319
  confidence = probabilities[0][predicted_label].item() * 100
320
  return "FAKE" if predicted_label == 1 else "REAL", confidence
321
 
322
+ def extract_entities(text, nlp):
323
+ """Extract named entities from text"""
324
  doc = nlp(text)
325
  entities = [(ent.text, ent.label_) for ent in doc.ents]
326
  return entities
327
 
328
+ def update_knowledge_graph(text, is_real, knowledge_graph, nlp):
329
+ """Update knowledge graph with new information"""
330
+ entities = extract_entities(text, nlp)
331
  for entity, entity_type in entities:
332
  if not knowledge_graph.has_node(entity):
333
  knowledge_graph.add_node(
 
354
  else:
355
  knowledge_graph[entity1][entity2]['weight'] += 1
356
 
357
+ def predict_with_knowledge_graph(text, knowledge_graph, nlp):
358
+ """Make predictions using the knowledge graph"""
359
+ entities = extract_entities(text, nlp)
360
  real_score = 0
361
  fake_score = 0
362
 
 
381
  return "FAKE", confidence
382
 
383
  def analyze_content_gemini(model, text):
384
+ """Analyze content using Gemini model"""
385
  prompt = f"""Analyze this news text and return a JSON object with the following structure:
386
  {{
387
  "gemini_analysis": {{
 
440
  "reasoning": ["Analysis failed to generate valid JSON"]
441
  }
442
  }
443
+