Paperlens / TrendAnalysis.py
Ippo987's picture
Update TrendAnalysis.py
839c577 verified
from motor.motor_asyncio import AsyncIOMotorClient
import pandas as pd
import numpy as np
import re
import json
import umap
import plotly.io as pio
import hdbscan
from bertopic import BERTopic
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from skopt import gp_minimize
from sentence_transformers import SentenceTransformer
import torch
import random
import multiprocessing
from sklearn.feature_extraction.text import CountVectorizer
from bertopic.vectorizers import ClassTfidfTransformer
from bertopic.representation import KeyBERTInspired
import optuna
import pandas as pd
import dash
from dash import dcc, html, Input, Output, State
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import dash_bootstrap_components as dbc
from fastapi import HTTPException, APIRouter, Request
from pydantic import BaseModel
import threading
import time
import webbrowser
import asyncio
# Set seed for reproducibility
def set_seed(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
if __name__ == "__main__":
set_seed(42)
multiprocessing.freeze_support()
global TitleName
TitleName = "Dashboard"
router = APIRouter()
class TrendAnalysisRequest(BaseModel):
userId: str
topic: str
year: str = None
page: int = 0
async def fetch_papers_with_pagination(request: Request, userId: str, topic: str, year: str = None, page: int = 0):
# Build the query filter
query_filter = {"userId": userId, "topic": topic}
if year:
query_filter["year"] = year
# Count total matching documents
count_pipeline = [
{"$match": query_filter},
{"$unwind": "$papers"},
{"$count": "total_papers"}
]
collection = request.app.state.collection
count_result = await collection.aggregate(count_pipeline).to_list(length=1)
total_papers = count_result[0]['total_papers'] if count_result else 0
print(f"Total papers matching criteria: {total_papers}")
# If no papers found, return empty result
if total_papers == 0:
return pd.DataFrame(), 0, 0, 0, 0
# Define pagination constants
papers_per_page = 200
min_papers_last_page = 50
# Calculate basic pagination
if total_papers <= papers_per_page:
# Simple case: all papers fit in one page
total_pages = 1
else:
# Multiple pages case
full_pages = total_papers // papers_per_page
remaining = total_papers % papers_per_page
if remaining >= min_papers_last_page:
# If remaining papers meet minimum threshold, create a separate page
total_pages = full_pages + 1
else:
# Otherwise, we'll have exactly 'full_pages' pages
# The remaining papers will be added to the last page
total_pages = full_pages
# Ensure page is within valid range
if page >= total_pages:
return pd.DataFrame(), 0, total_pages, 0, total_papers
# Calculate skip and limit based on page number
if total_pages == 1:
# Only one page - return all papers
skip = 0
limit = total_papers
elif page < total_pages - 1:
# Regular full page
skip = page * papers_per_page
limit = papers_per_page
else:
# Last page - might include remaining papers
remaining = total_papers % papers_per_page
if remaining >= min_papers_last_page or remaining == 0:
# Last page with either enough remaining papers or perfectly divided
skip = page * papers_per_page
limit = remaining if remaining > 0 else papers_per_page
else:
# Last page with remaining papers that don't meet minimum threshold
# We distribute by adding them to the last page
skip = (total_pages - 1) * papers_per_page
limit = papers_per_page + remaining
print(f"Pagination: Page {page + 1} of {total_pages}, Skip {skip}, Limit {limit}")
# MongoDB aggregation pipeline
pipeline = [
{"$match": query_filter},
{"$unwind": "$papers"},
{"$replaceRoot": {"newRoot": "$papers"}},
{"$project": {
"_id": 0,
"paperId": 1,
"url": 1,
"title": 1,
"abstract": 1,
"citationCount": 1,
"influentialCitationCount": 1,
"embedding": 1,
"publicationDate": 1,
"authors": 1
}},
{"$sort": {"publicationDate": 1}},
{"$skip": skip},
{"$limit": limit}
]
# Execute the aggregation pipeline
cursor = collection.aggregate(pipeline)
papers = await cursor.to_list(None)
papers_count = len(papers)
print(f"Papers Retrieved: {papers_count}")
# Convert to DataFrame
df = pd.DataFrame(papers)
df = df.sort_values(by="publicationDate")
print(df[["paperId", "publicationDate"]].head(10))
return df, page, total_pages, papers_count, total_papers
# Preprocessing function
def clean_text(text):
text = str(text).lower()
text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
return ' '.join([word for word in text.split() if word not in ENGLISH_STOP_WORDS])
# Adaptive clustering and topic modeling
def perform_trend_analysis(df):
# Convert embeddings
def convert_embedding(embedding):
return np.array(embedding["vector"], dtype=np.float64) if isinstance(embedding,
dict) and "vector" in embedding else None
df["embedding"] = df["embedding"].apply(convert_embedding)
df = df.dropna(subset=["embedding"])
if df.empty:
return df, {}
df["clean_text"] = (df["abstract"].fillna("")).apply(clean_text)
def objective(trial):
umap_n_components = trial.suggest_int("umap_n_components", 1, 12)
umap_min_dist = trial.suggest_float("umap_min_dist", 0.1, 0.8)
umap_n_neighbors = trial.suggest_int("umap_n_neighbors", 2, 12)
hdbscan_min_cluster_size = trial.suggest_int("hdbscan_min_cluster_size", 2, 10)
hdbscan_min_samples = trial.suggest_int("hdbscan_min_samples", 1, 10)
hdbscan_cluster_selection_epsilon = trial.suggest_float("hdbscan_cluster_selection_epsilon", 0.2, 0.8)
hdbscan_cluster_selection_method = trial.suggest_categorical("hdbscan_cluster_selection_method",
["eom", "leaf"])
reducer_high_dim = umap.UMAP(
n_components=umap_n_components,
random_state=42,
min_dist=umap_min_dist,
n_neighbors=umap_n_neighbors,
metric="cosine"
)
reduced_embeddings_high_dim = reducer_high_dim.fit_transform(np.vstack(df["embedding"].values)).astype(
np.float64)
clusterer = hdbscan.HDBSCAN(
min_cluster_size=hdbscan_min_cluster_size,
min_samples=hdbscan_min_samples,
cluster_selection_epsilon=hdbscan_cluster_selection_epsilon,
cluster_selection_method=hdbscan_cluster_selection_method,
prediction_data=True,
core_dist_n_jobs=1
)
labels = clusterer.fit_predict(reduced_embeddings_high_dim)
if len(set(labels)) > 1:
dbcv_score = hdbscan.validity.validity_index(reduced_embeddings_high_dim, labels)
else:
dbcv_score = -np.inf
return dbcv_score
study = optuna.create_study(
direction="maximize",
sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=100)
best_params = study.best_params
umap_model = umap.UMAP(
n_components=best_params["umap_n_components"],
random_state=42,
min_dist=best_params["umap_min_dist"],
n_neighbors=best_params["umap_n_neighbors"],
metric="cosine"
)
hdbscan_model = hdbscan.HDBSCAN(
min_cluster_size=best_params["hdbscan_min_cluster_size"],
min_samples=best_params["hdbscan_min_samples"],
cluster_selection_epsilon=best_params["hdbscan_cluster_selection_epsilon"],
cluster_selection_method=best_params["hdbscan_cluster_selection_method"],
prediction_data=True,
core_dist_n_jobs=1
)
vectorizer = CountVectorizer(
stop_words=list(ENGLISH_STOP_WORDS),
ngram_range=(2, 3)
)
representation_model = KeyBERTInspired()
embedding_model = SentenceTransformer("allenai/specter")
topic_model = BERTopic(
vectorizer_model=vectorizer,
umap_model=umap_model,
hdbscan_model=hdbscan_model,
embedding_model=embedding_model,
nr_topics='auto',
top_n_words=8,
representation_model=representation_model,
ctfidf_model=ClassTfidfTransformer(reduce_frequent_words=False, bm25_weighting=True)
)
topics, _ = topic_model.fit_transform(df["clean_text"], np.vstack(df["embedding"].values))
df["topic"] = topics
topic_labels = {t: " | ".join([word for word, _ in topic_model.get_topic(t)][:8]) for t in set(topics)}
reduced_embeddings_2d = umap.UMAP(n_components=2, random_state=42).fit_transform(
np.vstack(df["embedding"].values)).astype(np.float64)
df["x"] = reduced_embeddings_2d[:, 0]
df["y"] = reduced_embeddings_2d[:, 1]
df["topic_label"] = df["topic"].map(topic_labels)
return df, topic_labels
def build_dashboard(df, titleNm, topic_year,existing_app=None):
global dash_app
TitleName = titleNm + "_" + topic_year
color_palette = px.colors.qualitative.Vivid
unique_topics = sorted(df["topic"].unique())
color_map = {topic: color_palette[i % len(color_palette)] for i, topic in enumerate(unique_topics)}
# Map colors to topics
df["color"] = df["topic"].map(color_map)
# Calculate the number of papers in each cluster
cluster_sizes = df.groupby("topic").size().reset_index(name="paper_count")
df = df.merge(cluster_sizes, on="topic", how="left")
app = existing_app if existing_app else dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY])
# Improved marker scaling with a better range
min_size = 50
max_size = 140
df["marker_size"] = ((df["paper_count"] - df["paper_count"].min()) /
(df["paper_count"].max() - df["paper_count"].min())) * (max_size - min_size) + min_size
# Add log-transformed citation and influence columns
df["log_citation"] = np.log1p(df["citationCount"])
df["log_influence"] = np.log1p(df["influentialCitationCount"])
# Bayesian shrinkage for citations and influence
global_median_citation = df["log_citation"].median()
global_median_influence = df["log_influence"].median()
C = 10 # Shrinkage constant
def bayesian_shrinkage(group, global_median, C):
return (group.sum() + C * global_median) / (len(group) + C)
adjusted_citations = df.groupby("topic")["log_citation"].apply(
lambda x: bayesian_shrinkage(x, global_median_citation, C))
adjusted_influence = df.groupby("topic")["log_influence"].apply(
lambda x: bayesian_shrinkage(x, global_median_influence, C))
# Merge adjusted metrics back into the dataframe
df = df.merge(adjusted_citations.rename("adjusted_citation"), on="topic")
df = df.merge(adjusted_influence.rename("adjusted_influence"), on="topic")
# Calculate global percentiles for thresholds
citation_25th = df["adjusted_citation"].quantile(0.25)
citation_75th = df["adjusted_citation"].quantile(0.75)
influence_25th = df["adjusted_influence"].quantile(0.25)
influence_75th = df["adjusted_influence"].quantile(0.75)
# Enhanced theme classification with more distinct emojis
def classify_theme(row):
if row["adjusted_citation"] >= citation_75th and row["adjusted_influence"] >= influence_75th:
return "🔥 Hot Topic"
elif row["adjusted_citation"] <= citation_25th and row["adjusted_influence"] >= influence_75th:
return "💎 Gap Opportunity"
elif row["adjusted_citation"] >= citation_75th and row["adjusted_influence"] <= influence_25th:
return "⚠️ Risky Theme"
else:
return "🔄 Neutral"
df["theme"] = df.apply(classify_theme, axis=1)
# Create a more visually appealing figure
fig = go.Figure()
# Add subtle grid lines for reference
fig.update_xaxes(
showgrid=True,
gridwidth=0.1,
gridcolor='rgba(255, 255, 255, 0.05)',
zeroline=False
)
fig.update_yaxes(
showgrid=True,
gridwidth=0.1,
gridcolor='rgba(255, 255, 255, 0.05)',
zeroline=False
)
for topic in unique_topics:
topic_data = df[df["topic"] == topic]
# Get cluster center
center_x = topic_data["x"].mean()
center_y = topic_data["y"].mean()
# Get label
full_topic_formatted = topic_data['topic_label'].iloc[
0] if 'topic_label' in topic_data.columns else f"Cluster {topic}"
# Add a subtle glow effect with a larger outer circle
fig.add_trace(
go.Scatter(
x=[center_x],
y=[center_y],
mode="markers",
marker=dict(
color=color_map[topic],
size=topic_data["marker_size"].iloc[0] * 1.2, # Slightly larger for glow effect
opacity=0.3,
line=dict(width=0),
symbol="circle",
),
showlegend=False,
hoverinfo="none",
)
)
# Add main cluster circle with enhanced styling
fig.add_trace(
go.Scatter(
x=[center_x],
y=[center_y],
mode="markers+text",
marker=dict(
color=color_map[topic],
size=topic_data["marker_size"].iloc[0],
opacity=0.85,
line=dict(width=2, color="white"),
symbol="circle",
),
text=[f"{topic}"],
textposition="middle center",
textfont=dict(
family="Arial Black",
size=16,
color="white"
),
name=f"{topic}",
hovertemplate=(
"<b>Cluster ID:</b> %{text}<br>" +
"<b>Name:</b><br>" + full_topic_formatted + "<br>" +
"<b>Papers:</b> " + str(topic_data["paper_count"].iloc[0]) + "<br>" +
"<b>Popularity:</b> " + (
"🔼 High" if topic_data["adjusted_citation"].iloc[0] >= citation_75th else "🔽 Low") +
f" (Adjusted Citation: {topic_data['adjusted_citation'].iloc[0]:.2f})<br>" +
"<b>Impactfulness:</b> " + (
"🔼 High" if topic_data["adjusted_influence"].iloc[0] >= influence_75th else "🔽 Low") +
f" (Adjusted Influence: {topic_data['adjusted_influence'].iloc[0]:.2f})<br>" +
"<b>Theme:</b> " + topic_data["theme"].iloc[0] +
"<extra></extra>"
),
customdata=[[topic]],
)
)
# Add an aesthetic background with gradient
fig.update_layout(
shapes=[
# Improved gradient background
dict(
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1,
y1=1,
fillcolor="rgba(0, 0, 40, 0.95)",
line_width=0,
layer="below"
),
# Add a subtle radial gradient effect
dict(
type="circle",
xref="paper",
yref="paper",
x0=0.3,
y0=0.3,
x1=0.7,
y1=0.7,
fillcolor="rgba(50, 50, 120, 0.2)",
line_width=0,
layer="below"
)
],
template="plotly_dark",
title={
'text': f"<b>{TitleName.title()}</b>",
'y': 0.97,
'x': 0.5,
'xanchor': 'center',
'yanchor': 'top',
'font': dict(
family="Arial Black",
size=28,
color="white",
),
'xref': 'paper',
'yref': 'paper',
},
margin=dict(l=40, r=40, b=150, t=100),
hovermode="closest",
xaxis=dict(showticklabels=False),
yaxis=dict(showticklabels=False),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
dragmode="pan",
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.15,
xanchor="center",
x=0.5,
bgcolor="rgba(30,30,60,0.5)",
bordercolor="rgba(255,255,255,0.2)",
borderwidth=1
),
)
# Add subtle animation options
fig.update_layout(
updatemenus=[
dict(
type="buttons",
showactive=False,
buttons=[
dict(
label="Reset View",
method="relayout",
args=[{"xaxis.range": None, "yaxis.range": None}]
),
],
x=0.05,
y=0.05,
xanchor="left",
yanchor="bottom",
bgcolor="rgba(50,50,80,0.7)",
bordercolor="rgba(255,255,255,0.2)",
)
]
)
# Enhanced app layout with modern design elements
app.layout = dbc.Container(
fluid=True,
style={
"backgroundColor": "#111122",
"minHeight": "100vh",
"height": "100%",
"width": "100%",
"backgroundImage": "linear-gradient(135deg, #111122 0%, #15162c 100%)",
"padding": "20px"
},
children=[
dbc.Row([
dbc.Col(html.H1(
"Trend Analysis Dashboard ",
style={
"textAlign": "center",
"color": "white",
"marginBottom": "5px",
"fontFamily": "Arial Black",
"textShadow": "2px 2px 8px rgba(0,0,0,0.7)",
"letterSpacing": "2px",
"fontSize": "42px",
"background": "linear-gradient(135deg, #790091 0%, #565cd5 100%)",
"WebkitBackgroundClip": "text",
"WebkitTextFillColor": "transparent",
"paddingTop": "10px"
}
), width=10),
dbc.Col([
html.Button(
[
html.I(className="fas fa-download mr-2"),
" Save Dashboard"
],
id="download-button",
className="btn btn-outline-light",
style={
"marginTop": "10px",
"backgroundColor": "rgba(80, 80, 150, 0.4)",
"border": "1px solid rgba(100, 100, 200, 0.5)",
"borderRadius": "8px",
"padding": "8px 15px",
"boxShadow": "0px 4px 8px rgba(0, 0, 0, 0.3)",
"transition": "all 0.3s ease",
"fontSize": "14px",
"fontWeight": "bold"
}
),
# Add the download component
dcc.Download(id="download-dashboard")
], width=2),
dbc.Col(html.P(
"Interactive visualization of research topics and their relationships",
style={
"textAlign": "center",
"color": "#aaddff",
"marginBottom": "15px",
"fontStyle": "italic",
"fontSize": "16px",
"fontWeight": "300",
"letterSpacing": "0.5px",
"textShadow": "1px 1px 3px rgba(0,0,0,0.5)",
}
), width=12),
]),
dbc.Row([
dbc.Col(
dbc.Card(
dbc.CardBody([
dcc.Graph(
id="cluster-graph",
figure=fig,
config={
"scrollZoom": True,
"displayModeBar": True,
"modeBarButtonsToRemove": ["select2d", "lasso2d"]
}, style={"height": "80vh", "min-height": "800px"}
)
], style={"height": "80vh", "min-height": "800px"}),
style={
"backgroundColor": "rgba(20, 20, 40, 0.7)",
"borderRadius": "15px",
"boxShadow": "0px 10px 30px rgba(0, 0, 0, 0.5)",
"border": "1px solid rgba(100, 100, 200, 0.3)",
"height": "80vh",
"min-height": "800px" # Ensure minimum height
}
),
width=9
),
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H3("Paper List", style={
"textAlign": "center",
"marginBottom": "15px",
"color": "#ffffff",
"fontFamily": "Arial",
"fontWeight": "bold",
"textShadow": "1px 1px 3px rgba(0,0,0,0.3)"
}),
html.Hr(style={"borderColor": "rgba(100, 100, 200, 0.3)", "margin": "10px 0 20px 0"}),
html.Div(
id="paper-list",
style={
"overflowY": "auto",
"height": "700px",
"padding": "5px"
},
children=html.Div([
html.Div(
html.I(className="fas fa-mouse-pointer", style={"marginRight": "10px"}),
style={"textAlign": "center", "fontSize": "24px", "marginBottom": "10px",
"color": "#7f8fa6"}
),
html.P("Click on a cluster to view its papers",
style={"textAlign": "center", "color": "#7f8fa6"})
])
),
],
style={
"backgroundColor": "rgba(30, 30, 50, 0.8)",
"borderRadius": "15px",
"padding": "20px",
"height": "100%"
}),
style={
"height": "800px",
"boxShadow": "0px 10px 30px rgba(0, 0, 0, 0.5)",
"border": "1px solid rgba(100, 100, 200, 0.3)",
"borderRadius": "15px"
}
),
width=3
),
], style={"marginTop": "20px"}),
# Add a footer with theme legend
dbc.Row([
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H5("Theme Legend", style={"textAlign": "center", "marginBottom": "15px"}),
dbc.Row([
dbc.Col(html.Div([
html.Span("🔥", style={"fontSize": "20px", "marginRight": "10px"}),
"Hot Topic: High citations & high influence"
]), width=3),
dbc.Col(html.Div([
html.Span("💎", style={"fontSize": "20px", "marginRight": "10px"}),
"Gap Opportunity: Low citations but high influence"
]), width=3),
dbc.Col(html.Div([
html.Span("⚠️", style={"fontSize": "20px", "marginRight": "10px"}),
"Risky Theme: High citations but low influence"
]), width=3),
dbc.Col(html.Div([
html.Span("🔄", style={"fontSize": "20px", "marginRight": "10px"}),
"Neutral: Average citations and influence"
]), width=3),
])
]),
style={
"backgroundColor": "rgba(30, 30, 50, 0.8)",
"borderRadius": "15px",
"marginTop": "20px",
"boxShadow": "0px 5px 15px rgba(0, 0, 0, 0.3)",
"border": "1px solid rgba(100, 100, 200, 0.3)"
}
),
width=12
),
]),
dcc.Store(id="stored-figure", data=fig)
]
)
@app.callback(
Output("download-dashboard", "data"),
Input("download-button", "n_clicks"),
State("cluster-graph", "figure"),
prevent_initial_call=True
)
def download_dashboard(n_clicks, figure):
if n_clicks is None:
return None
# Save the figure as HTML with full plotly.js included
dashboard_html = pio.to_html(
figure,
full_html=True,
include_plotlyjs='cdn',
config={'responsive': True}
)
# Return the dashboard as an HTML file
return dict(
content=dashboard_html,
filename="research_dashboard.html",
type="text/html",
)
# Enhanced callback to update paper list with better styling
# Enhanced callback to update paper list with better styling
@app.callback(
Output("paper-list", "children"),
[Input("cluster-graph", "clickData")]
)
def update_paper_list(clickData):
if clickData is None:
return html.Div([
html.Div(
html.I(className="fas fa-mouse-pointer", style={"marginRight": "10px"}),
style={"textAlign": "center", "fontSize": "24px", "marginBottom": "10px", "color": "#7f8fa6"}
),
html.P("Click on a cluster to view its papers",
style={"textAlign": "center", "color": "#7f8fa6"})
])
# Extract the clicked cluster ID
try:
clicked_topic = clickData["points"][0]["customdata"][0]
# Get the color for this topic for styling consistency
topic_color = color_map[clicked_topic]
# Get the theme for this topic
topic_theme = df[df["topic"] == clicked_topic]["theme"].iloc[0]
except (KeyError, IndexError):
return html.Div("Error retrieving cluster data.", style={"textAlign": "center", "marginTop": "20px"})
# Filter papers in the clicked cluster - UPDATED to include titles AND urls
papers_in_cluster = df[df["topic"] == clicked_topic][["title", "url", "paperId"]]
if papers_in_cluster.empty:
return html.Div(f"No papers found for Cluster {clicked_topic}.",
style={"textAlign": "center", "marginTop": "20px"})
# Get topic label
topic_label = df[df["topic"] == clicked_topic]['topic_label'].iloc[
0] if 'topic_label' in df.columns else f"Cluster {clicked_topic}"
# Create an enhanced styled list of paper titles - UPDATED to make clickable
paper_list = []
for i, (_, paper) in enumerate(papers_in_cluster.iterrows()):
paper_url = paper["url"]
paper_title = paper["title"]
paper_list.append(
dbc.Card(
dbc.CardBody([
html.A(
html.H6(
f"{i + 1}. {paper_title}",
className="card-title",
style={
"fontSize": "14px",
"margin": "5px 0",
"fontWeight": "normal",
"lineHeight": "1.4",
"color": "#aaccff", # Blue color to indicate clickable link
"cursor": "pointer"
}
),
href=paper_url,
target="_blank", # Open in new tab
style={"textDecoration": "none"}
),
], style={"padding": "12px"}),
style={
"marginBottom": "10px",
"backgroundColor": "rgba(40, 45, 60, 0.8)",
"borderRadius": "8px",
"borderLeft": f"4px solid {topic_color}",
"boxShadow": "0px 3px 8px rgba(0, 0, 0, 0.2)",
"transition": "transform 0.2s",
":hover": {
"transform": "translateY(-2px)",
"boxShadow": "0px 5px 10px rgba(0, 0, 0, 0.3)"
}
},
className="paper-card"
)
)
return html.Div([
html.Div([
html.H4(
f"Cluster {clicked_topic}",
style={
"textAlign": "center",
"marginBottom": "5px",
"color": topic_color,
"fontWeight": "bold"
}
),
html.H5(
topic_label,
style={
"textAlign": "center",
"marginBottom": "5px",
"color": "#aaaacc",
"fontStyle": "italic",
"fontWeight": "normal"
}
),
html.Div(
topic_theme,
style={
"textAlign": "center",
"marginBottom": "15px",
"fontSize": "16px",
"fontWeight": "bold"
}
),
html.Hr(style={"borderColor": "rgba(100, 100, 200, 0.3)", "margin": "10px 0 20px 0"}),
html.H5(
f"Papers ({len(papers_in_cluster)})",
style={
"textAlign": "left",
"marginBottom": "15px",
"color": "#ffffff",
"fontWeight": "bold"
}
),
]),
html.Div(
paper_list,
style={"paddingRight": "10px"},
)
])
# Add custom CSS for hover effects
app.index_string = '''
<!DOCTYPE html>
<html>
<head>
{%metas%}
<title>Trend Analysis Clusters Dashboard</title>
{%favicon%}
{%css%}
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">
<style>
.paper-card:hover {
transform: translateY(-2px);
box-shadow: 0px 5px 10px rgba(0, 0, 0, 0.3);
background-color: rgba(50, 55, 70, 0.8) !important;
}
a h6:hover {
color: #ffffff !important;
text-decoration: underline;
}
/* Add subtle scroll bar styling */
::-webkit-scrollbar {
width: 8px;
}
::-webkit-scrollbar-track {
background: rgba(30, 30, 50, 0.3);
border-radius: 10px;
}
::-webkit-scrollbar-thumb {
background: rgba(100, 100, 200, 0.5);
border-radius: 10px;
}
::-webkit-scrollbar-thumb:hover {
background: rgba(120, 120, 220, 0.7);
}
</style>
</head>
<body>
{%app_entry%}
<footer>
{%config%}
{%scripts%}
{%renderer%}
</footer>
</body>
</html>
'''
return app
@router.post("/analyze-trends/")
async def analyze_trends(request: Request, data_request: TrendAnalysisRequest):
global dash_thread
TitleName = data_request.topic
Topic_year = data_request.year
# Fetch and process data
df, current_page, total_pages, papers_count, total_papers = await fetch_papers_with_pagination(
request, data_request.userId, data_request.topic, data_request.year, data_request.page
)
if df.empty and total_papers > 0:
raise HTTPException(
status_code=404,
detail=f"No papers found for page {data_request.page + 1}. Valid pages are 1 to {total_pages}."
)
elif df.empty:
raise HTTPException(
status_code=404,
detail=f"No papers found for userId '{data_request.userId}', topic '{data_request.topic}'" +
(f", and year '{data_request.year}'" if data_request.year else "")
)
# Perform the trend analysis
df, topic_labels = perform_trend_analysis(df)
if df.empty:
raise HTTPException(status_code=500, detail="Failed to process embeddings for trend analysis")
# Create cluster statistics
cluster_sizes = df.groupby("topic").size().to_dict()
# Build the dashboard
from app import get_or_create_dash_app
dash_app = get_or_create_dash_app()
# Build the dashboard using existing dash_app
updated_dash_app = build_dashboard(df, TitleName, Topic_year if Topic_year else "", existing_app=dash_app)
# Update the global dash_app in the main app
from app import dash_app as main_dash_app
main_dash_app.layout = updated_dash_app.layout
# Add dashboard_path to the response
dashboard_path = f"/dash"
# Get base URL from request and build complete URL
scheme = request.url.scheme # 'http' or 'https'
base_url = f"{scheme}://{request.headers['host']}"
dashboard_url = f"{base_url}/dash"
# Open browser in a new thread
def open_browser():
import webbrowser
webbrowser.open(dashboard_url,new=2)
# Start a thread to open the browser after a short delay
import threading
browser_thread = threading.Timer(1.5, open_browser)
browser_thread.daemon = True
browser_thread.start()
return {
"message": f"Trend analysis completed for papers (page {current_page + 1} of {total_pages})",
"current_page": current_page,
"total_pages": total_pages,
"papers_count": papers_count,
"total_papers": total_papers,
"cluster_sizes": cluster_sizes,
"cluster_titles": topic_labels,
"dashboard_url": dashboard_url,
"redirect": True # Add a flag to indicate redirect is needed
}
# Additional function to add at the bottom of TrendAnalysis.py to ensure browser opening works
# on direct dashboard access as well
@router.get("/dashboard/{userId}/{topic}/{year}")
@router.get("/dashboard/{userId}/{topic}")
async def get_dashboard(request: Request, userId: str, topic: str, year: str = None):
# Fetch and process data
from pydantic import BaseModel
import webbrowser
import threading
class TempRequest(BaseModel):
userId: str
topic: str
year: str = None
page: int = 0
data_request = TempRequest(userId=userId, topic=topic, year=year)
# Get base URL from request and build complete URL
base_url = str(request.base_url)
dashboard_url = f"{base_url}dash"
# Open browser in a new thread
def open_browser():
webbrowser.open(dashboard_url,new=2)
# Start a thread to open the browser after a short delay
browser_thread = threading.Timer(1.5, open_browser)
browser_thread.daemon = True
browser_thread.start()
# Reuse the analyze_trends logic to create the dashboard
result = await analyze_trends(request, data_request)
# Redirect to the dash app
from fastapi.responses import RedirectResponse
return RedirectResponse(url="/dash")