Spaces:
Running
Running
Krish Patel
commited on
Commit
·
207a2e4
1
Parent(s):
32fef5a
Debugging 2
Browse files
app.py
CHANGED
@@ -42,8 +42,6 @@
|
|
42 |
# st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")
|
43 |
|
44 |
import streamlit as st
|
45 |
-
from final import *
|
46 |
-
import pandas as pd
|
47 |
|
48 |
# Page configuration
|
49 |
st.set_page_config(
|
@@ -52,6 +50,9 @@ st.set_page_config(
|
|
52 |
layout="wide"
|
53 |
)
|
54 |
|
|
|
|
|
|
|
55 |
# Cache model loading
|
56 |
@st.cache_resource
|
57 |
def initialize_models():
|
|
|
42 |
# st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")
|
43 |
|
44 |
import streamlit as st
|
|
|
|
|
45 |
|
46 |
# Page configuration
|
47 |
st.set_page_config(
|
|
|
50 |
layout="wide"
|
51 |
)
|
52 |
|
53 |
+
import pandas as pd
|
54 |
+
from final import *
|
55 |
+
|
56 |
# Cache model loading
|
57 |
@st.cache_resource
|
58 |
def initialize_models():
|
final.py
CHANGED
@@ -269,13 +269,6 @@
|
|
269 |
# if __name__ == "__main__":
|
270 |
# main()
|
271 |
|
272 |
-
import streamlit as st
|
273 |
-
# Page config
|
274 |
-
st.set_page_config(
|
275 |
-
page_title="Nexus NLP News Classifier",
|
276 |
-
page_icon="📰",
|
277 |
-
layout="wide"
|
278 |
-
)
|
279 |
import torch
|
280 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DebertaV2Tokenizer
|
281 |
import networkx as nx
|
@@ -286,26 +279,21 @@ import json
|
|
286 |
import os
|
287 |
import dotenv
|
288 |
|
289 |
-
|
290 |
# Load environment variables
|
291 |
dotenv.load_dotenv()
|
292 |
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
return spacy.load("en_core_web_sm")
|
297 |
-
|
298 |
-
@st.cache_resource
|
299 |
-
def load_model():
|
300 |
model_path = "./results/checkpoint-753"
|
301 |
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
|
302 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
303 |
model.eval()
|
304 |
-
return tokenizer, model
|
305 |
|
306 |
-
@st.cache_resource
|
307 |
def load_knowledge_graph():
|
308 |
-
|
|
|
309 |
with open(graph_path, 'rb') as f:
|
310 |
graph_data = pickle.load(f)
|
311 |
knowledge_graph = nx.DiGraph()
|
@@ -316,16 +304,13 @@ def load_knowledge_graph():
|
|
316 |
return knowledge_graph
|
317 |
|
318 |
def setup_gemini():
|
|
|
319 |
genai.configure(api_key=os.getenv("GEMINI_API"))
|
320 |
model = genai.GenerativeModel('gemini-pro')
|
321 |
return model
|
322 |
|
323 |
-
|
324 |
-
|
325 |
-
tokenizer, model = load_model()
|
326 |
-
knowledge_graph = load_knowledge_graph()
|
327 |
-
|
328 |
-
def predict_with_model(text):
|
329 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
330 |
with torch.no_grad():
|
331 |
outputs = model(**inputs)
|
@@ -334,13 +319,15 @@ def predict_with_model(text):
|
|
334 |
confidence = probabilities[0][predicted_label].item() * 100
|
335 |
return "FAKE" if predicted_label == 1 else "REAL", confidence
|
336 |
|
337 |
-
def extract_entities(text):
|
|
|
338 |
doc = nlp(text)
|
339 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
340 |
return entities
|
341 |
|
342 |
-
def update_knowledge_graph(text, is_real):
|
343 |
-
|
|
|
344 |
for entity, entity_type in entities:
|
345 |
if not knowledge_graph.has_node(entity):
|
346 |
knowledge_graph.add_node(
|
@@ -367,8 +354,9 @@ def update_knowledge_graph(text, is_real):
|
|
367 |
else:
|
368 |
knowledge_graph[entity1][entity2]['weight'] += 1
|
369 |
|
370 |
-
def predict_with_knowledge_graph(text):
|
371 |
-
|
|
|
372 |
real_score = 0
|
373 |
fake_score = 0
|
374 |
|
@@ -393,6 +381,7 @@ def predict_with_knowledge_graph(text):
|
|
393 |
return "FAKE", confidence
|
394 |
|
395 |
def analyze_content_gemini(model, text):
|
|
|
396 |
prompt = f"""Analyze this news text and return a JSON object with the following structure:
|
397 |
{{
|
398 |
"gemini_analysis": {{
|
@@ -451,62 +440,4 @@ def analyze_content_gemini(model, text):
|
|
451 |
"reasoning": ["Analysis failed to generate valid JSON"]
|
452 |
}
|
453 |
}
|
454 |
-
|
455 |
-
def main():
|
456 |
-
st.title("📰 Nexus NLP News Classifier")
|
457 |
-
st.write("Enter news text below to analyze its authenticity")
|
458 |
-
|
459 |
-
# Query parameters for API functionality
|
460 |
-
query_params = st.query_params
|
461 |
-
if "text" in query_params:
|
462 |
-
text_input = query_params["text"][0]
|
463 |
-
ml_prediction, ml_confidence = predict_with_model(text_input)
|
464 |
-
st.json({"prediction": ml_prediction, "confidence": ml_confidence})
|
465 |
-
return
|
466 |
-
|
467 |
-
# Regular UI
|
468 |
-
news_text = st.text_area("News Text", height=200)
|
469 |
-
|
470 |
-
if st.button("Analyze"):
|
471 |
-
if news_text:
|
472 |
-
with st.spinner("Analyzing..."):
|
473 |
-
# Get all predictions
|
474 |
-
ml_prediction, ml_confidence = predict_with_model(news_text)
|
475 |
-
kg_prediction, kg_confidence = predict_with_knowledge_graph(news_text)
|
476 |
-
update_knowledge_graph(news_text, ml_prediction == "REAL")
|
477 |
-
|
478 |
-
gemini_model = setup_gemini()
|
479 |
-
gemini_result = analyze_content_gemini(gemini_model, news_text)
|
480 |
-
|
481 |
-
# Display results
|
482 |
-
col1, col2, col3 = st.columns(3)
|
483 |
-
|
484 |
-
with col1:
|
485 |
-
st.subheader("ML Model Analysis")
|
486 |
-
st.metric("Prediction", ml_prediction)
|
487 |
-
st.metric("Confidence", f"{ml_confidence:.2f}%")
|
488 |
-
|
489 |
-
with col2:
|
490 |
-
st.subheader("Knowledge Graph Analysis")
|
491 |
-
st.metric("Prediction", kg_prediction)
|
492 |
-
st.metric("Confidence", f"{kg_confidence:.2f}%")
|
493 |
-
|
494 |
-
with col3:
|
495 |
-
st.subheader("Gemini Analysis")
|
496 |
-
gemini_pred = gemini_result["gemini_analysis"]["predicted_classification"]
|
497 |
-
gemini_conf = gemini_result["gemini_analysis"]["confidence_score"]
|
498 |
-
st.metric("Prediction", gemini_pred)
|
499 |
-
st.metric("Confidence", f"{gemini_conf}%")
|
500 |
-
|
501 |
-
with st.expander("View Detailed Analysis"):
|
502 |
-
st.json(gemini_result)
|
503 |
-
|
504 |
-
with st.expander("Named Entities"):
|
505 |
-
entities = extract_entities(news_text)
|
506 |
-
st.write(entities)
|
507 |
-
|
508 |
-
else:
|
509 |
-
st.warning("Please enter some text to analyze")
|
510 |
-
|
511 |
-
if __name__ == "__main__":
|
512 |
-
main()
|
|
|
269 |
# if __name__ == "__main__":
|
270 |
# main()
|
271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
import torch
|
273 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DebertaV2Tokenizer
|
274 |
import networkx as nx
|
|
|
279 |
import os
|
280 |
import dotenv
|
281 |
|
|
|
282 |
# Load environment variables
|
283 |
dotenv.load_dotenv()
|
284 |
|
285 |
+
def load_models():
|
286 |
+
"""Load all required ML models"""
|
287 |
+
nlp = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
|
288 |
model_path = "./results/checkpoint-753"
|
289 |
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
|
290 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
291 |
model.eval()
|
292 |
+
return nlp, tokenizer, model
|
293 |
|
|
|
294 |
def load_knowledge_graph():
|
295 |
+
"""Load and initialize knowledge graph"""
|
296 |
+
graph_path = "./models/knowledge_graph.pkl"
|
297 |
with open(graph_path, 'rb') as f:
|
298 |
graph_data = pickle.load(f)
|
299 |
knowledge_graph = nx.DiGraph()
|
|
|
304 |
return knowledge_graph
|
305 |
|
306 |
def setup_gemini():
|
307 |
+
"""Initialize Gemini model"""
|
308 |
genai.configure(api_key=os.getenv("GEMINI_API"))
|
309 |
model = genai.GenerativeModel('gemini-pro')
|
310 |
return model
|
311 |
|
312 |
+
def predict_with_model(text, tokenizer, model):
|
313 |
+
"""Make predictions using the ML model"""
|
|
|
|
|
|
|
|
|
314 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
315 |
with torch.no_grad():
|
316 |
outputs = model(**inputs)
|
|
|
319 |
confidence = probabilities[0][predicted_label].item() * 100
|
320 |
return "FAKE" if predicted_label == 1 else "REAL", confidence
|
321 |
|
322 |
+
def extract_entities(text, nlp):
|
323 |
+
"""Extract named entities from text"""
|
324 |
doc = nlp(text)
|
325 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
326 |
return entities
|
327 |
|
328 |
+
def update_knowledge_graph(text, is_real, knowledge_graph, nlp):
|
329 |
+
"""Update knowledge graph with new information"""
|
330 |
+
entities = extract_entities(text, nlp)
|
331 |
for entity, entity_type in entities:
|
332 |
if not knowledge_graph.has_node(entity):
|
333 |
knowledge_graph.add_node(
|
|
|
354 |
else:
|
355 |
knowledge_graph[entity1][entity2]['weight'] += 1
|
356 |
|
357 |
+
def predict_with_knowledge_graph(text, knowledge_graph, nlp):
|
358 |
+
"""Make predictions using the knowledge graph"""
|
359 |
+
entities = extract_entities(text, nlp)
|
360 |
real_score = 0
|
361 |
fake_score = 0
|
362 |
|
|
|
381 |
return "FAKE", confidence
|
382 |
|
383 |
def analyze_content_gemini(model, text):
|
384 |
+
"""Analyze content using Gemini model"""
|
385 |
prompt = f"""Analyze this news text and return a JSON object with the following structure:
|
386 |
{{
|
387 |
"gemini_analysis": {{
|
|
|
440 |
"reasoning": ["Analysis failed to generate valid JSON"]
|
441 |
}
|
442 |
}
|
443 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|