Spaces:

HeheBoi0769
/

Nexus_NLP_model

Running

App Files Files Community

Krish Patel commited on Feb 12

Commit

207a2e4

1 Parent(s): 32fef5a

Debugging 2

Browse files

Files changed (2) hide show

app.py +3 -2
final.py +19 -88

app.py CHANGED Viewed

@@ -42,8 +42,6 @@
 #         st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")
 import streamlit as st
-from final import *
-import pandas as pd
 # Page configuration
 st.set_page_config(
@@ -52,6 +50,9 @@ st.set_page_config(
     layout="wide"
 )
 # Cache model loading
 @st.cache_resource
 def initialize_models():

 #         st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")
 import streamlit as st
 # Page configuration
 st.set_page_config(
     layout="wide"
 )
+import pandas as pd
+from final import *
 # Cache model loading
 @st.cache_resource
 def initialize_models():

final.py CHANGED Viewed

@@ -269,13 +269,6 @@
 # if __name__ == "__main__":
 #     main()
-import streamlit as st
-# Page config
-st.set_page_config(
-    page_title="Nexus NLP News Classifier",
-    page_icon="📰",
-    layout="wide"
-)
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, DebertaV2Tokenizer
 import networkx as nx
@@ -286,26 +279,21 @@ import json
 import os
 import dotenv
 # Load environment variables
 dotenv.load_dotenv()
-# Load models and resources
-@st.cache_resource
-def load_nlp():
-    return spacy.load("en_core_web_sm")
-@st.cache_resource
-def load_model():
     model_path = "./results/checkpoint-753"
     tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
     model = AutoModelForSequenceClassification.from_pretrained(model_path)
     model.eval()
-    return tokenizer, model
-@st.cache_resource
 def load_knowledge_graph():
-    graph_path = "./knowledge_graph_final.pkl"
     with open(graph_path, 'rb') as f:
         graph_data = pickle.load(f)
     knowledge_graph = nx.DiGraph()
@@ -316,16 +304,13 @@ def load_knowledge_graph():
     return knowledge_graph
 def setup_gemini():
     genai.configure(api_key=os.getenv("GEMINI_API"))
     model = genai.GenerativeModel('gemini-pro')
     return model
-# Initialize resources
-nlp = load_nlp()
-tokenizer, model = load_model()
-knowledge_graph = load_knowledge_graph()
-def predict_with_model(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
@@ -334,13 +319,15 @@ def predict_with_model(text):
     confidence = probabilities[0][predicted_label].item() * 100
     return "FAKE" if predicted_label == 1 else "REAL", confidence
-def extract_entities(text):
     doc = nlp(text)
     entities = [(ent.text, ent.label_) for ent in doc.ents]
     return entities
-def update_knowledge_graph(text, is_real):
-    entities = extract_entities(text)
     for entity, entity_type in entities:
         if not knowledge_graph.has_node(entity):
             knowledge_graph.add_node(
@@ -367,8 +354,9 @@ def update_knowledge_graph(text, is_real):
             else:
                 knowledge_graph[entity1][entity2]['weight'] += 1
-def predict_with_knowledge_graph(text):
-    entities = extract_entities(text)
     real_score = 0
     fake_score = 0
@@ -393,6 +381,7 @@ def predict_with_knowledge_graph(text):
         return "FAKE", confidence
 def analyze_content_gemini(model, text):
     prompt = f"""Analyze this news text and return a JSON object with the following structure:
     {{
         "gemini_analysis": {{
@@ -451,62 +440,4 @@ def analyze_content_gemini(model, text):
                 "reasoning": ["Analysis failed to generate valid JSON"]
             }
         }
-def main():
-    st.title("📰 Nexus NLP News Classifier")
-    st.write("Enter news text below to analyze its authenticity")
-    # Query parameters for API functionality
-    query_params = st.query_params
-    if "text" in query_params:
-        text_input = query_params["text"][0]
-        ml_prediction, ml_confidence = predict_with_model(text_input)
-        st.json({"prediction": ml_prediction, "confidence": ml_confidence})
-        return
-    # Regular UI
-    news_text = st.text_area("News Text", height=200)
-    if st.button("Analyze"):
-        if news_text:
-            with st.spinner("Analyzing..."):
-                # Get all predictions
-                ml_prediction, ml_confidence = predict_with_model(news_text)
-                kg_prediction, kg_confidence = predict_with_knowledge_graph(news_text)
-                update_knowledge_graph(news_text, ml_prediction == "REAL")
-                gemini_model = setup_gemini()
-                gemini_result = analyze_content_gemini(gemini_model, news_text)
-                # Display results
-                col1, col2, col3 = st.columns(3)
-                with col1:
-                    st.subheader("ML Model Analysis")
-                    st.metric("Prediction", ml_prediction)
-                    st.metric("Confidence", f"{ml_confidence:.2f}%")
-                with col2:
-                    st.subheader("Knowledge Graph Analysis")
-                    st.metric("Prediction", kg_prediction)
-                    st.metric("Confidence", f"{kg_confidence:.2f}%")
-                with col3:
-                    st.subheader("Gemini Analysis")
-                    gemini_pred = gemini_result["gemini_analysis"]["predicted_classification"]
-                    gemini_conf = gemini_result["gemini_analysis"]["confidence_score"]
-                    st.metric("Prediction", gemini_pred)
-                    st.metric("Confidence", f"{gemini_conf}%")
-                with st.expander("View Detailed Analysis"):
-                    st.json(gemini_result)
-                with st.expander("Named Entities"):
-                    entities = extract_entities(news_text)
-                    st.write(entities)
-        else:
-            st.warning("Please enter some text to analyze")
-if __name__ == "__main__":
-    main()

 # if __name__ == "__main__":
 #     main()
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, DebertaV2Tokenizer
 import networkx as nx
 import os
 import dotenv
 # Load environment variables
 dotenv.load_dotenv()
+def load_models():
+    """Load all required ML models"""
+    nlp = spacy.load("en_core_web_sm")
     model_path = "./results/checkpoint-753"
     tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
     model = AutoModelForSequenceClassification.from_pretrained(model_path)
     model.eval()
+    return nlp, tokenizer, model
 def load_knowledge_graph():
+    """Load and initialize knowledge graph"""
+    graph_path = "./models/knowledge_graph.pkl"
     with open(graph_path, 'rb') as f:
         graph_data = pickle.load(f)
     knowledge_graph = nx.DiGraph()
     return knowledge_graph
 def setup_gemini():
+    """Initialize Gemini model"""
     genai.configure(api_key=os.getenv("GEMINI_API"))
     model = genai.GenerativeModel('gemini-pro')
     return model
+def predict_with_model(text, tokenizer, model):
+    """Make predictions using the ML model"""
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
     confidence = probabilities[0][predicted_label].item() * 100
     return "FAKE" if predicted_label == 1 else "REAL", confidence
+def extract_entities(text, nlp):
+    """Extract named entities from text"""
     doc = nlp(text)
     entities = [(ent.text, ent.label_) for ent in doc.ents]
     return entities
+def update_knowledge_graph(text, is_real, knowledge_graph, nlp):
+    """Update knowledge graph with new information"""
+    entities = extract_entities(text, nlp)
     for entity, entity_type in entities:
         if not knowledge_graph.has_node(entity):
             knowledge_graph.add_node(
             else:
                 knowledge_graph[entity1][entity2]['weight'] += 1
+def predict_with_knowledge_graph(text, knowledge_graph, nlp):
+    """Make predictions using the knowledge graph"""
+    entities = extract_entities(text, nlp)
     real_score = 0
     fake_score = 0
         return "FAKE", confidence
 def analyze_content_gemini(model, text):
+    """Analyze content using Gemini model"""
     prompt = f"""Analyze this news text and return a JSON object with the following structure:
     {{
         "gemini_analysis": {{
                 "reasoning": ["Analysis failed to generate valid JSON"]
             }
         }