File size: 9,310 Bytes
30c7f0c
9963876
30c7f0c
94a65e4
 
a355e59
94a65e4
 
207a2e4
 
5e58061
 
207a2e4
1a2b17f
30c7f0c
94a65e4
1a2b17f
 
 
 
 
 
 
 
 
 
 
94a65e4
 
 
5e58061
 
 
 
 
 
 
 
 
 
 
 
 
9963876
 
 
df7ee24
9963876
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e58061
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9963876
5e58061
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30c7f0c
94a65e4
5ac49d2
94a65e4
 
85e3b89
 
 
94a65e4
 
 
 
 
 
 
 
c422cb8
94a65e4
 
 
 
 
df7ee24
e9db226
df7ee24
 
94a65e4
df7ee24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21bc2b4
 
709d77e
15e4be7
94a65e4
c37cf7b
94a65e4
 
 
c37cf7b
 
 
 
94a65e4
df7ee24
 
 
 
 
 
94a65e4
21bc2b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116a946
21bc2b4
 
 
94a65e4
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import streamlit as st
import random

# Page configuration
st.set_page_config(
    page_title="Nexus NLP News Classifier"
)

import pandas as pd
from final import *
from pydantic import BaseModel
import plotly.graph_objects as go

# Update the initialize_models function
@st.cache_resource
def initialize_models():
    try:
        nlp = spacy.load("en_core_web_sm")
    except:
        spacy.cli.download("en_core_web_sm")
        nlp = spacy.load("en_core_web_sm")
    
    model_path = "./results/checkpoint-753"
    tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    model.eval()
    
    knowledge_graph = load_knowledge_graph()
    return nlp, tokenizer, model, knowledge_graph


class NewsInput(BaseModel):
    text: str

def generate_knowledge_graph_viz(text, nlp, tokenizer, model):
    kg_builder = KnowledgeGraphBuilder()
    
    # Get prediction
    prediction, _ = predict_with_model(text, tokenizer, model)
    is_fake = prediction == "FAKE"
    
    # Update knowledge graph
    kg_builder.update_knowledge_graph(text, not is_fake, nlp)

    # Randomly select subset of edges (e.g. 60% of edges)
    edges = list(kg_builder.knowledge_graph.edges())
    selected_edges = random.sample(edges, k=int(len(edges) * 0.3))
    
    # Create a new graph with selected edges
    selected_graph = nx.DiGraph()
    selected_graph.add_nodes_from(kg_builder.knowledge_graph.nodes(data=True))
    selected_graph.add_edges_from(selected_edges)
    
    pos = nx.spring_layout(selected_graph)
    
    edge_trace = go.Scatter(
        x=[], y=[],
        line=dict(
            width=2, 
            color='rgba(255,0,0,0.7)' if is_fake else 'rgba(0,255,0,0.7)'
        ),
        hoverinfo='none',
        mode='lines'
    )
    
    # Create visualization
    pos = nx.spring_layout(kg_builder.knowledge_graph)
    
    edge_trace = go.Scatter(
        x=[], y=[],
        line=dict(
            width=2,
            color='rgba(255,0,0,0.7)' if is_fake else 'rgba(0,255,0,0.7)'
        ),
        hoverinfo='none',
        mode='lines'
    )
    
    node_trace = go.Scatter(
        x=[], y=[],
        mode='markers+text',
        hoverinfo='text',
        textposition='top center',
        marker=dict(
            size=15,
            color='white',
            line=dict(width=2, color='black')
        ),
        text=[]
    )
    
    # Add edges
    for edge in selected_graph.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_trace['x'] += (x0, x1, None)
        edge_trace['y'] += (y0, y1, None)
    
    # Add nodes
    for node in kg_builder.knowledge_graph.nodes():
        x, y = pos[node]
        node_trace['x'] += (x,)
        node_trace['y'] += (y,)
        node_trace['text'] += (node,)
    
    fig = go.Figure(
        data=[edge_trace, node_trace],
        layout=go.Layout(
            showlegend=False,
            hovermode='closest',
            margin=dict(b=0,l=0,r=0,t=0),
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            plot_bgcolor='rgba(0,0,0,0)',
            paper_bgcolor='rgba(0,0,0,0)'
        )
    )
    
    return fig

# Streamlit UI
def main():
    st.title("Nexus NLP News Classifier")
    st.write("Enter news text below to analyze its authenticity")

    # Initialize models
    nlp, tokenizer, model, knowledge_graph = initialize_models()

    # Text input area
    news_text = st.text_area("News Text", height=200)

    if st.button("Analyze"):
        if news_text:
            with st.spinner("Analyzing..."):
                # Get predictions from all models
                ml_prediction, ml_confidence = predict_with_model(news_text, tokenizer, model)
                kg_prediction, kg_confidence = predict_with_knowledge_graph(news_text, knowledge_graph, nlp)
                
                # Update knowledge graph
                update_knowledge_graph(news_text, ml_prediction == "REAL", knowledge_graph, nlp)
                
                # Get Gemini analysis
                # Get Gemini analysis with retries
                max_retries = 10
                retry_count = 0
                gemini_result = None

                while retry_count < max_retries:
                    try:
                        gemini_model = setup_gemini()
                        gemini_result = analyze_content_gemini(gemini_model, news_text)
                        
                        # Check if we got valid results
                        if gemini_result and gemini_result.get('gemini_analysis'):
                            break
                            
                    except Exception:
                        pass
                        
                    retry_count += 1
                    
                # Use default values if all retries failed
                if not gemini_result:
                    gemini_result = {
                        "gemini_analysis": {
                            "predicted_classification": "UNCERTAIN",
                            "confidence_score": "50",
                            "reasoning": ["Analysis temporarily unavailable"]
                        }
                    }

                # Display metrics in columns
                col1 = st.columns(1)[0]

                with col1:
                    st.subheader("ML Model and Knowedge Graph Analysis")
                    st.metric("Prediction", ml_prediction)
                    st.metric("Confidence", f"{ml_confidence:.2f}%")

                # with col2:
                #     st.subheader("Knowledge Graph Analysis")
                #     st.metric("Prediction", kg_prediction)
                #     st.metric("Confidence", f"{kg_confidence:.2f}%")

                # with col3:
                #     st.subheader("Gemini Analysis")
                #     gemini_pred = gemini_result["gemini_analysis"]["predicted_classification"]
                #     gemini_conf = gemini_result["gemini_analysis"]["confidence_score"]
                #     st.metric("Prediction", gemini_pred)
                #     st.metric("Confidence", f"{gemini_conf}%")

                # Single expander for all analysis details
                with st.expander("Detailed Analysis"):
                    try:
                        # Text Classification
                        st.subheader("πŸ“ Text Classification")
                        text_class = gemini_result.get('text_classification', {})
                        st.write(f"Category: {text_class.get('category', 'N/A')}")
                        st.write(f"Writing Style: {text_class.get('writing_style', 'N/A')}")
                        st.write(f"Target Audience: {text_class.get('target_audience', 'N/A')}")
                        st.write(f"Content Type: {text_class.get('content_type', 'N/A')}")
                        
                        # Sentiment Analysis
                        st.subheader("🎭 Sentiment Analysis")
                        sentiment = gemini_result.get('sentiment_analysis', {})
                        st.write(f"Primary Emotion: {sentiment.get('primary_emotion', 'N/A')}")
                        st.write(f"Emotional Intensity: {sentiment.get('emotional_intensity', 'N/A')}/10")
                        st.write(f"Sensationalism Level: {sentiment.get('sensationalism_level', 'N/A')}")
                        st.write("Bias Indicators:", ", ".join(sentiment.get('bias_indicators', ['N/A'])))
                        
                        # Entity Recognition
                        st.subheader("πŸ” Entity Recognition")
                        entities = gemini_result.get('entity_recognition', {})
                        st.write(f"Source Credibility: {entities.get('source_credibility', 'N/A')}")
                        st.write("People:", ", ".join(entities.get('people', ['N/A'])))
                        st.write("Organizations:", ", ".join(entities.get('organizations', ['N/A'])))
                        
                        # Named Entities from spaCy
                        st.subheader("🏷️ Named Entities")
                        entities = extract_entities(news_text, nlp)
                        df = pd.DataFrame(entities, columns=["Entity", "Type"])
                        st.dataframe(df)
                        
                        # Knowledge Graph Visualization
                        st.subheader("πŸ•ΈοΈ Knowledge Graph")
                        fig = generate_knowledge_graph_viz(news_text, nlp, tokenizer, model)
                        st.plotly_chart(fig, use_container_width=True)
                        
                        # Analysis Reasoning
                        st.subheader("πŸ’­ Analysis Reasoning")
                        for point in gemini_result.get('gemini_analysis', {}).get('reasoning', ['N/A']):
                            st.write(f"β€’ {point}")
                            
                    except Exception as e:
                        st.error("Error processing analysis results")

        else:
            st.warning("Please enter some text to analyze")

if __name__ == "__main__":
    main()