|
from motor.motor_asyncio import AsyncIOMotorClient |
|
import pandas as pd |
|
import numpy as np |
|
import re |
|
import json |
|
import umap |
|
import plotly.io as pio |
|
import hdbscan |
|
from bertopic import BERTopic |
|
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS |
|
from skopt import gp_minimize |
|
from sentence_transformers import SentenceTransformer |
|
import torch |
|
import random |
|
import multiprocessing |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
from bertopic.vectorizers import ClassTfidfTransformer |
|
from bertopic.representation import KeyBERTInspired |
|
import optuna |
|
import pandas as pd |
|
import dash |
|
from dash import dcc, html, Input, Output, State |
|
import plotly.graph_objects as go |
|
import plotly.express as px |
|
import numpy as np |
|
import dash_bootstrap_components as dbc |
|
from fastapi import HTTPException, APIRouter, Request |
|
from pydantic import BaseModel |
|
import threading |
|
import time |
|
import webbrowser |
|
import asyncio |
|
|
|
|
|
|
|
def set_seed(seed=42): |
|
random.seed(seed) |
|
np.random.seed(seed) |
|
torch.manual_seed(seed) |
|
torch.cuda.manual_seed_all(seed) |
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
|
|
|
|
if __name__ == "__main__": |
|
set_seed(42) |
|
multiprocessing.freeze_support() |
|
|
|
global TitleName |
|
TitleName = "Dashboard" |
|
router = APIRouter() |
|
|
|
|
|
class TrendAnalysisRequest(BaseModel): |
|
userId: str |
|
topic: str |
|
year: str = None |
|
page: int = 0 |
|
|
|
|
|
async def fetch_papers_with_pagination(request: Request, userId: str, topic: str, year: str = None, page: int = 0): |
|
|
|
query_filter = {"userId": userId, "topic": topic} |
|
if year: |
|
query_filter["year"] = year |
|
|
|
|
|
count_pipeline = [ |
|
{"$match": query_filter}, |
|
{"$unwind": "$papers"}, |
|
{"$count": "total_papers"} |
|
] |
|
collection = request.app.state.collection |
|
count_result = await collection.aggregate(count_pipeline).to_list(length=1) |
|
total_papers = count_result[0]['total_papers'] if count_result else 0 |
|
|
|
print(f"Total papers matching criteria: {total_papers}") |
|
|
|
|
|
if total_papers == 0: |
|
return pd.DataFrame(), 0, 0, 0, 0 |
|
|
|
|
|
papers_per_page = 200 |
|
min_papers_last_page = 50 |
|
|
|
|
|
if total_papers <= papers_per_page: |
|
|
|
total_pages = 1 |
|
else: |
|
|
|
full_pages = total_papers // papers_per_page |
|
remaining = total_papers % papers_per_page |
|
|
|
if remaining >= min_papers_last_page: |
|
|
|
total_pages = full_pages + 1 |
|
else: |
|
|
|
|
|
total_pages = full_pages |
|
|
|
|
|
if page >= total_pages: |
|
return pd.DataFrame(), 0, total_pages, 0, total_papers |
|
|
|
|
|
if total_pages == 1: |
|
|
|
skip = 0 |
|
limit = total_papers |
|
elif page < total_pages - 1: |
|
|
|
skip = page * papers_per_page |
|
limit = papers_per_page |
|
else: |
|
|
|
remaining = total_papers % papers_per_page |
|
|
|
if remaining >= min_papers_last_page or remaining == 0: |
|
|
|
skip = page * papers_per_page |
|
limit = remaining if remaining > 0 else papers_per_page |
|
else: |
|
|
|
|
|
skip = (total_pages - 1) * papers_per_page |
|
limit = papers_per_page + remaining |
|
|
|
print(f"Pagination: Page {page + 1} of {total_pages}, Skip {skip}, Limit {limit}") |
|
|
|
|
|
pipeline = [ |
|
{"$match": query_filter}, |
|
{"$unwind": "$papers"}, |
|
{"$replaceRoot": {"newRoot": "$papers"}}, |
|
{"$project": { |
|
"_id": 0, |
|
"paperId": 1, |
|
"url": 1, |
|
"title": 1, |
|
"abstract": 1, |
|
"citationCount": 1, |
|
"influentialCitationCount": 1, |
|
"embedding": 1, |
|
"publicationDate": 1, |
|
"authors": 1 |
|
}}, |
|
{"$sort": {"publicationDate": 1}}, |
|
{"$skip": skip}, |
|
{"$limit": limit} |
|
] |
|
|
|
|
|
cursor = collection.aggregate(pipeline) |
|
papers = await cursor.to_list(None) |
|
|
|
papers_count = len(papers) |
|
print(f"Papers Retrieved: {papers_count}") |
|
|
|
|
|
df = pd.DataFrame(papers) |
|
df = df.sort_values(by="publicationDate") |
|
print(df[["paperId", "publicationDate"]].head(10)) |
|
|
|
return df, page, total_pages, papers_count, total_papers |
|
|
|
|
|
|
|
def clean_text(text): |
|
text = str(text).lower() |
|
text = re.sub(r"[^a-zA-Z0-9\s]", "", text) |
|
return ' '.join([word for word in text.split() if word not in ENGLISH_STOP_WORDS]) |
|
|
|
|
|
|
|
def perform_trend_analysis(df): |
|
|
|
def convert_embedding(embedding): |
|
return np.array(embedding["vector"], dtype=np.float64) if isinstance(embedding, |
|
dict) and "vector" in embedding else None |
|
|
|
df["embedding"] = df["embedding"].apply(convert_embedding) |
|
df = df.dropna(subset=["embedding"]) |
|
|
|
if df.empty: |
|
return df, {} |
|
|
|
df["clean_text"] = (df["abstract"].fillna("")).apply(clean_text) |
|
|
|
def objective(trial): |
|
umap_n_components = trial.suggest_int("umap_n_components", 1, 12) |
|
umap_min_dist = trial.suggest_float("umap_min_dist", 0.1, 0.8) |
|
umap_n_neighbors = trial.suggest_int("umap_n_neighbors", 2, 12) |
|
hdbscan_min_cluster_size = trial.suggest_int("hdbscan_min_cluster_size", 2, 10) |
|
hdbscan_min_samples = trial.suggest_int("hdbscan_min_samples", 1, 10) |
|
hdbscan_cluster_selection_epsilon = trial.suggest_float("hdbscan_cluster_selection_epsilon", 0.2, 0.8) |
|
hdbscan_cluster_selection_method = trial.suggest_categorical("hdbscan_cluster_selection_method", |
|
["eom", "leaf"]) |
|
|
|
reducer_high_dim = umap.UMAP( |
|
n_components=umap_n_components, |
|
random_state=42, |
|
min_dist=umap_min_dist, |
|
n_neighbors=umap_n_neighbors, |
|
metric="cosine" |
|
) |
|
reduced_embeddings_high_dim = reducer_high_dim.fit_transform(np.vstack(df["embedding"].values)).astype( |
|
np.float64) |
|
|
|
clusterer = hdbscan.HDBSCAN( |
|
min_cluster_size=hdbscan_min_cluster_size, |
|
min_samples=hdbscan_min_samples, |
|
cluster_selection_epsilon=hdbscan_cluster_selection_epsilon, |
|
cluster_selection_method=hdbscan_cluster_selection_method, |
|
prediction_data=True, |
|
core_dist_n_jobs=1 |
|
) |
|
labels = clusterer.fit_predict(reduced_embeddings_high_dim) |
|
|
|
if len(set(labels)) > 1: |
|
dbcv_score = hdbscan.validity.validity_index(reduced_embeddings_high_dim, labels) |
|
else: |
|
dbcv_score = -np.inf |
|
|
|
return dbcv_score |
|
|
|
study = optuna.create_study( |
|
direction="maximize", |
|
sampler=optuna.samplers.TPESampler(seed=42)) |
|
study.optimize(objective, n_trials=100) |
|
|
|
best_params = study.best_params |
|
umap_model = umap.UMAP( |
|
n_components=best_params["umap_n_components"], |
|
random_state=42, |
|
min_dist=best_params["umap_min_dist"], |
|
n_neighbors=best_params["umap_n_neighbors"], |
|
metric="cosine" |
|
) |
|
hdbscan_model = hdbscan.HDBSCAN( |
|
min_cluster_size=best_params["hdbscan_min_cluster_size"], |
|
min_samples=best_params["hdbscan_min_samples"], |
|
cluster_selection_epsilon=best_params["hdbscan_cluster_selection_epsilon"], |
|
cluster_selection_method=best_params["hdbscan_cluster_selection_method"], |
|
prediction_data=True, |
|
core_dist_n_jobs=1 |
|
) |
|
|
|
vectorizer = CountVectorizer( |
|
stop_words=list(ENGLISH_STOP_WORDS), |
|
ngram_range=(2, 3) |
|
) |
|
|
|
representation_model = KeyBERTInspired() |
|
embedding_model = SentenceTransformer("allenai/specter") |
|
topic_model = BERTopic( |
|
vectorizer_model=vectorizer, |
|
umap_model=umap_model, |
|
hdbscan_model=hdbscan_model, |
|
embedding_model=embedding_model, |
|
nr_topics='auto', |
|
top_n_words=8, |
|
representation_model=representation_model, |
|
ctfidf_model=ClassTfidfTransformer(reduce_frequent_words=False, bm25_weighting=True) |
|
) |
|
|
|
topics, _ = topic_model.fit_transform(df["clean_text"], np.vstack(df["embedding"].values)) |
|
df["topic"] = topics |
|
topic_labels = {t: " | ".join([word for word, _ in topic_model.get_topic(t)][:8]) for t in set(topics)} |
|
|
|
reduced_embeddings_2d = umap.UMAP(n_components=2, random_state=42).fit_transform( |
|
np.vstack(df["embedding"].values)).astype(np.float64) |
|
df["x"] = reduced_embeddings_2d[:, 0] |
|
df["y"] = reduced_embeddings_2d[:, 1] |
|
df["topic_label"] = df["topic"].map(topic_labels) |
|
|
|
return df, topic_labels |
|
|
|
|
|
def build_dashboard(df, titleNm, topic_year,existing_app=None): |
|
global dash_app |
|
TitleName = titleNm + "_" + topic_year |
|
color_palette = px.colors.qualitative.Vivid |
|
unique_topics = sorted(df["topic"].unique()) |
|
color_map = {topic: color_palette[i % len(color_palette)] for i, topic in enumerate(unique_topics)} |
|
|
|
|
|
df["color"] = df["topic"].map(color_map) |
|
|
|
|
|
cluster_sizes = df.groupby("topic").size().reset_index(name="paper_count") |
|
df = df.merge(cluster_sizes, on="topic", how="left") |
|
app = existing_app if existing_app else dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY]) |
|
|
|
|
|
min_size = 50 |
|
max_size = 140 |
|
df["marker_size"] = ((df["paper_count"] - df["paper_count"].min()) / |
|
(df["paper_count"].max() - df["paper_count"].min())) * (max_size - min_size) + min_size |
|
|
|
|
|
df["log_citation"] = np.log1p(df["citationCount"]) |
|
df["log_influence"] = np.log1p(df["influentialCitationCount"]) |
|
|
|
|
|
global_median_citation = df["log_citation"].median() |
|
global_median_influence = df["log_influence"].median() |
|
C = 10 |
|
|
|
def bayesian_shrinkage(group, global_median, C): |
|
return (group.sum() + C * global_median) / (len(group) + C) |
|
|
|
adjusted_citations = df.groupby("topic")["log_citation"].apply( |
|
lambda x: bayesian_shrinkage(x, global_median_citation, C)) |
|
adjusted_influence = df.groupby("topic")["log_influence"].apply( |
|
lambda x: bayesian_shrinkage(x, global_median_influence, C)) |
|
|
|
|
|
df = df.merge(adjusted_citations.rename("adjusted_citation"), on="topic") |
|
df = df.merge(adjusted_influence.rename("adjusted_influence"), on="topic") |
|
|
|
|
|
citation_25th = df["adjusted_citation"].quantile(0.25) |
|
citation_75th = df["adjusted_citation"].quantile(0.75) |
|
influence_25th = df["adjusted_influence"].quantile(0.25) |
|
influence_75th = df["adjusted_influence"].quantile(0.75) |
|
|
|
|
|
def classify_theme(row): |
|
if row["adjusted_citation"] >= citation_75th and row["adjusted_influence"] >= influence_75th: |
|
return "🔥 Hot Topic" |
|
elif row["adjusted_citation"] <= citation_25th and row["adjusted_influence"] >= influence_75th: |
|
return "💎 Gap Opportunity" |
|
elif row["adjusted_citation"] >= citation_75th and row["adjusted_influence"] <= influence_25th: |
|
return "⚠️ Risky Theme" |
|
else: |
|
return "🔄 Neutral" |
|
|
|
df["theme"] = df.apply(classify_theme, axis=1) |
|
|
|
|
|
|
|
|
|
fig = go.Figure() |
|
|
|
|
|
fig.update_xaxes( |
|
showgrid=True, |
|
gridwidth=0.1, |
|
gridcolor='rgba(255, 255, 255, 0.05)', |
|
zeroline=False |
|
) |
|
fig.update_yaxes( |
|
showgrid=True, |
|
gridwidth=0.1, |
|
gridcolor='rgba(255, 255, 255, 0.05)', |
|
zeroline=False |
|
) |
|
|
|
for topic in unique_topics: |
|
topic_data = df[df["topic"] == topic] |
|
|
|
|
|
center_x = topic_data["x"].mean() |
|
center_y = topic_data["y"].mean() |
|
|
|
|
|
full_topic_formatted = topic_data['topic_label'].iloc[ |
|
0] if 'topic_label' in topic_data.columns else f"Cluster {topic}" |
|
|
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=[center_x], |
|
y=[center_y], |
|
mode="markers", |
|
marker=dict( |
|
color=color_map[topic], |
|
size=topic_data["marker_size"].iloc[0] * 1.2, |
|
opacity=0.3, |
|
line=dict(width=0), |
|
symbol="circle", |
|
), |
|
showlegend=False, |
|
hoverinfo="none", |
|
) |
|
) |
|
|
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=[center_x], |
|
y=[center_y], |
|
mode="markers+text", |
|
marker=dict( |
|
color=color_map[topic], |
|
size=topic_data["marker_size"].iloc[0], |
|
opacity=0.85, |
|
line=dict(width=2, color="white"), |
|
symbol="circle", |
|
), |
|
text=[f"{topic}"], |
|
textposition="middle center", |
|
textfont=dict( |
|
family="Arial Black", |
|
size=16, |
|
color="white" |
|
), |
|
name=f"{topic}", |
|
hovertemplate=( |
|
"<b>Cluster ID:</b> %{text}<br>" + |
|
"<b>Name:</b><br>" + full_topic_formatted + "<br>" + |
|
"<b>Papers:</b> " + str(topic_data["paper_count"].iloc[0]) + "<br>" + |
|
"<b>Popularity:</b> " + ( |
|
"🔼 High" if topic_data["adjusted_citation"].iloc[0] >= citation_75th else "🔽 Low") + |
|
f" (Adjusted Citation: {topic_data['adjusted_citation'].iloc[0]:.2f})<br>" + |
|
"<b>Impactfulness:</b> " + ( |
|
"🔼 High" if topic_data["adjusted_influence"].iloc[0] >= influence_75th else "🔽 Low") + |
|
f" (Adjusted Influence: {topic_data['adjusted_influence'].iloc[0]:.2f})<br>" + |
|
"<b>Theme:</b> " + topic_data["theme"].iloc[0] + |
|
"<extra></extra>" |
|
), |
|
customdata=[[topic]], |
|
) |
|
) |
|
|
|
|
|
fig.update_layout( |
|
shapes=[ |
|
|
|
dict( |
|
type="rect", |
|
xref="paper", |
|
yref="paper", |
|
x0=0, |
|
y0=0, |
|
x1=1, |
|
y1=1, |
|
fillcolor="rgba(0, 0, 40, 0.95)", |
|
line_width=0, |
|
layer="below" |
|
), |
|
|
|
dict( |
|
type="circle", |
|
xref="paper", |
|
yref="paper", |
|
x0=0.3, |
|
y0=0.3, |
|
x1=0.7, |
|
y1=0.7, |
|
fillcolor="rgba(50, 50, 120, 0.2)", |
|
line_width=0, |
|
layer="below" |
|
) |
|
], |
|
template="plotly_dark", |
|
title={ |
|
'text': f"<b>{TitleName.title()}</b>", |
|
'y': 0.97, |
|
'x': 0.5, |
|
'xanchor': 'center', |
|
'yanchor': 'top', |
|
'font': dict( |
|
family="Arial Black", |
|
size=28, |
|
color="white", |
|
), |
|
'xref': 'paper', |
|
'yref': 'paper', |
|
}, |
|
margin=dict(l=40, r=40, b=150, t=100), |
|
hovermode="closest", |
|
xaxis=dict(showticklabels=False), |
|
yaxis=dict(showticklabels=False), |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
plot_bgcolor="rgba(0,0,0,0)", |
|
dragmode="pan", |
|
legend=dict( |
|
orientation="h", |
|
yanchor="bottom", |
|
y=-0.15, |
|
xanchor="center", |
|
x=0.5, |
|
bgcolor="rgba(30,30,60,0.5)", |
|
bordercolor="rgba(255,255,255,0.2)", |
|
borderwidth=1 |
|
), |
|
) |
|
|
|
|
|
fig.update_layout( |
|
updatemenus=[ |
|
dict( |
|
type="buttons", |
|
showactive=False, |
|
buttons=[ |
|
dict( |
|
label="Reset View", |
|
method="relayout", |
|
args=[{"xaxis.range": None, "yaxis.range": None}] |
|
), |
|
], |
|
x=0.05, |
|
y=0.05, |
|
xanchor="left", |
|
yanchor="bottom", |
|
bgcolor="rgba(50,50,80,0.7)", |
|
bordercolor="rgba(255,255,255,0.2)", |
|
) |
|
] |
|
) |
|
|
|
|
|
app.layout = dbc.Container( |
|
fluid=True, |
|
style={ |
|
"backgroundColor": "#111122", |
|
"minHeight": "100vh", |
|
"height": "100%", |
|
"width": "100%", |
|
"backgroundImage": "linear-gradient(135deg, #111122 0%, #15162c 100%)", |
|
"padding": "20px" |
|
}, |
|
children=[ |
|
dbc.Row([ |
|
dbc.Col(html.H1( |
|
"Trend Analysis Dashboard ", |
|
style={ |
|
"textAlign": "center", |
|
"color": "white", |
|
"marginBottom": "5px", |
|
"fontFamily": "Arial Black", |
|
"textShadow": "2px 2px 8px rgba(0,0,0,0.7)", |
|
"letterSpacing": "2px", |
|
"fontSize": "42px", |
|
"background": "linear-gradient(135deg, #790091 0%, #565cd5 100%)", |
|
"WebkitBackgroundClip": "text", |
|
"WebkitTextFillColor": "transparent", |
|
"paddingTop": "10px" |
|
} |
|
), width=10), |
|
|
|
dbc.Col([ |
|
html.Button( |
|
[ |
|
html.I(className="fas fa-download mr-2"), |
|
" Save Dashboard" |
|
], |
|
id="download-button", |
|
className="btn btn-outline-light", |
|
style={ |
|
"marginTop": "10px", |
|
"backgroundColor": "rgba(80, 80, 150, 0.4)", |
|
"border": "1px solid rgba(100, 100, 200, 0.5)", |
|
"borderRadius": "8px", |
|
"padding": "8px 15px", |
|
"boxShadow": "0px 4px 8px rgba(0, 0, 0, 0.3)", |
|
"transition": "all 0.3s ease", |
|
"fontSize": "14px", |
|
"fontWeight": "bold" |
|
} |
|
), |
|
|
|
dcc.Download(id="download-dashboard") |
|
], width=2), |
|
|
|
dbc.Col(html.P( |
|
"Interactive visualization of research topics and their relationships", |
|
style={ |
|
"textAlign": "center", |
|
"color": "#aaddff", |
|
"marginBottom": "15px", |
|
"fontStyle": "italic", |
|
"fontSize": "16px", |
|
"fontWeight": "300", |
|
"letterSpacing": "0.5px", |
|
"textShadow": "1px 1px 3px rgba(0,0,0,0.5)", |
|
} |
|
), width=12), |
|
]), |
|
|
|
dbc.Row([ |
|
dbc.Col( |
|
dbc.Card( |
|
dbc.CardBody([ |
|
dcc.Graph( |
|
id="cluster-graph", |
|
figure=fig, |
|
config={ |
|
"scrollZoom": True, |
|
"displayModeBar": True, |
|
"modeBarButtonsToRemove": ["select2d", "lasso2d"] |
|
}, style={"height": "80vh", "min-height": "800px"} |
|
) |
|
], style={"height": "80vh", "min-height": "800px"}), |
|
style={ |
|
"backgroundColor": "rgba(20, 20, 40, 0.7)", |
|
"borderRadius": "15px", |
|
"boxShadow": "0px 10px 30px rgba(0, 0, 0, 0.5)", |
|
"border": "1px solid rgba(100, 100, 200, 0.3)", |
|
"height": "80vh", |
|
"min-height": "800px" |
|
} |
|
), |
|
width=9 |
|
), |
|
|
|
dbc.Col( |
|
dbc.Card( |
|
dbc.CardBody([ |
|
html.H3("Paper List", style={ |
|
"textAlign": "center", |
|
"marginBottom": "15px", |
|
"color": "#ffffff", |
|
"fontFamily": "Arial", |
|
"fontWeight": "bold", |
|
"textShadow": "1px 1px 3px rgba(0,0,0,0.3)" |
|
}), |
|
html.Hr(style={"borderColor": "rgba(100, 100, 200, 0.3)", "margin": "10px 0 20px 0"}), |
|
html.Div( |
|
id="paper-list", |
|
style={ |
|
"overflowY": "auto", |
|
"height": "700px", |
|
"padding": "5px" |
|
}, |
|
children=html.Div([ |
|
html.Div( |
|
html.I(className="fas fa-mouse-pointer", style={"marginRight": "10px"}), |
|
style={"textAlign": "center", "fontSize": "24px", "marginBottom": "10px", |
|
"color": "#7f8fa6"} |
|
), |
|
html.P("Click on a cluster to view its papers", |
|
style={"textAlign": "center", "color": "#7f8fa6"}) |
|
]) |
|
), |
|
], |
|
style={ |
|
"backgroundColor": "rgba(30, 30, 50, 0.8)", |
|
"borderRadius": "15px", |
|
"padding": "20px", |
|
"height": "100%" |
|
}), |
|
style={ |
|
"height": "800px", |
|
"boxShadow": "0px 10px 30px rgba(0, 0, 0, 0.5)", |
|
"border": "1px solid rgba(100, 100, 200, 0.3)", |
|
"borderRadius": "15px" |
|
} |
|
), |
|
width=3 |
|
), |
|
], style={"marginTop": "20px"}), |
|
|
|
|
|
dbc.Row([ |
|
dbc.Col( |
|
dbc.Card( |
|
dbc.CardBody([ |
|
html.H5("Theme Legend", style={"textAlign": "center", "marginBottom": "15px"}), |
|
dbc.Row([ |
|
dbc.Col(html.Div([ |
|
html.Span("🔥", style={"fontSize": "20px", "marginRight": "10px"}), |
|
"Hot Topic: High citations & high influence" |
|
]), width=3), |
|
dbc.Col(html.Div([ |
|
html.Span("💎", style={"fontSize": "20px", "marginRight": "10px"}), |
|
"Gap Opportunity: Low citations but high influence" |
|
]), width=3), |
|
dbc.Col(html.Div([ |
|
html.Span("⚠️", style={"fontSize": "20px", "marginRight": "10px"}), |
|
"Risky Theme: High citations but low influence" |
|
]), width=3), |
|
dbc.Col(html.Div([ |
|
html.Span("🔄", style={"fontSize": "20px", "marginRight": "10px"}), |
|
"Neutral: Average citations and influence" |
|
]), width=3), |
|
]) |
|
]), |
|
style={ |
|
"backgroundColor": "rgba(30, 30, 50, 0.8)", |
|
"borderRadius": "15px", |
|
"marginTop": "20px", |
|
"boxShadow": "0px 5px 15px rgba(0, 0, 0, 0.3)", |
|
"border": "1px solid rgba(100, 100, 200, 0.3)" |
|
} |
|
), |
|
width=12 |
|
), |
|
|
|
]), |
|
|
|
dcc.Store(id="stored-figure", data=fig) |
|
] |
|
|
|
) |
|
|
|
@app.callback( |
|
Output("download-dashboard", "data"), |
|
Input("download-button", "n_clicks"), |
|
State("cluster-graph", "figure"), |
|
prevent_initial_call=True |
|
) |
|
def download_dashboard(n_clicks, figure): |
|
if n_clicks is None: |
|
return None |
|
|
|
|
|
dashboard_html = pio.to_html( |
|
figure, |
|
full_html=True, |
|
include_plotlyjs='cdn', |
|
config={'responsive': True} |
|
) |
|
|
|
|
|
return dict( |
|
content=dashboard_html, |
|
filename="research_dashboard.html", |
|
type="text/html", |
|
) |
|
|
|
|
|
|
|
@app.callback( |
|
Output("paper-list", "children"), |
|
[Input("cluster-graph", "clickData")] |
|
) |
|
def update_paper_list(clickData): |
|
if clickData is None: |
|
return html.Div([ |
|
html.Div( |
|
html.I(className="fas fa-mouse-pointer", style={"marginRight": "10px"}), |
|
style={"textAlign": "center", "fontSize": "24px", "marginBottom": "10px", "color": "#7f8fa6"} |
|
), |
|
html.P("Click on a cluster to view its papers", |
|
style={"textAlign": "center", "color": "#7f8fa6"}) |
|
]) |
|
|
|
|
|
try: |
|
clicked_topic = clickData["points"][0]["customdata"][0] |
|
|
|
|
|
topic_color = color_map[clicked_topic] |
|
|
|
|
|
topic_theme = df[df["topic"] == clicked_topic]["theme"].iloc[0] |
|
|
|
except (KeyError, IndexError): |
|
return html.Div("Error retrieving cluster data.", style={"textAlign": "center", "marginTop": "20px"}) |
|
|
|
|
|
papers_in_cluster = df[df["topic"] == clicked_topic][["title", "url", "paperId"]] |
|
|
|
if papers_in_cluster.empty: |
|
return html.Div(f"No papers found for Cluster {clicked_topic}.", |
|
style={"textAlign": "center", "marginTop": "20px"}) |
|
|
|
|
|
topic_label = df[df["topic"] == clicked_topic]['topic_label'].iloc[ |
|
0] if 'topic_label' in df.columns else f"Cluster {clicked_topic}" |
|
|
|
|
|
paper_list = [] |
|
for i, (_, paper) in enumerate(papers_in_cluster.iterrows()): |
|
paper_url = paper["url"] |
|
paper_title = paper["title"] |
|
|
|
paper_list.append( |
|
dbc.Card( |
|
dbc.CardBody([ |
|
html.A( |
|
html.H6( |
|
f"{i + 1}. {paper_title}", |
|
className="card-title", |
|
style={ |
|
"fontSize": "14px", |
|
"margin": "5px 0", |
|
"fontWeight": "normal", |
|
"lineHeight": "1.4", |
|
"color": "#aaccff", |
|
"cursor": "pointer" |
|
} |
|
), |
|
href=paper_url, |
|
target="_blank", |
|
style={"textDecoration": "none"} |
|
), |
|
], style={"padding": "12px"}), |
|
style={ |
|
"marginBottom": "10px", |
|
"backgroundColor": "rgba(40, 45, 60, 0.8)", |
|
"borderRadius": "8px", |
|
"borderLeft": f"4px solid {topic_color}", |
|
"boxShadow": "0px 3px 8px rgba(0, 0, 0, 0.2)", |
|
"transition": "transform 0.2s", |
|
":hover": { |
|
"transform": "translateY(-2px)", |
|
"boxShadow": "0px 5px 10px rgba(0, 0, 0, 0.3)" |
|
} |
|
}, |
|
className="paper-card" |
|
) |
|
) |
|
|
|
return html.Div([ |
|
html.Div([ |
|
html.H4( |
|
f"Cluster {clicked_topic}", |
|
style={ |
|
"textAlign": "center", |
|
"marginBottom": "5px", |
|
"color": topic_color, |
|
"fontWeight": "bold" |
|
} |
|
), |
|
html.H5( |
|
topic_label, |
|
style={ |
|
"textAlign": "center", |
|
"marginBottom": "5px", |
|
"color": "#aaaacc", |
|
"fontStyle": "italic", |
|
"fontWeight": "normal" |
|
} |
|
), |
|
html.Div( |
|
topic_theme, |
|
style={ |
|
"textAlign": "center", |
|
"marginBottom": "15px", |
|
"fontSize": "16px", |
|
"fontWeight": "bold" |
|
} |
|
), |
|
html.Hr(style={"borderColor": "rgba(100, 100, 200, 0.3)", "margin": "10px 0 20px 0"}), |
|
html.H5( |
|
f"Papers ({len(papers_in_cluster)})", |
|
style={ |
|
"textAlign": "left", |
|
"marginBottom": "15px", |
|
"color": "#ffffff", |
|
"fontWeight": "bold" |
|
} |
|
), |
|
]), |
|
html.Div( |
|
paper_list, |
|
style={"paddingRight": "10px"}, |
|
) |
|
]) |
|
|
|
|
|
app.index_string = ''' |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
{%metas%} |
|
<title>Trend Analysis Clusters Dashboard</title> |
|
{%favicon%} |
|
{%css%} |
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css"> |
|
<style> |
|
.paper-card:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0px 5px 10px rgba(0, 0, 0, 0.3); |
|
background-color: rgba(50, 55, 70, 0.8) !important; |
|
} |
|
a h6:hover { |
|
color: #ffffff !important; |
|
text-decoration: underline; |
|
} |
|
/* Add subtle scroll bar styling */ |
|
::-webkit-scrollbar { |
|
width: 8px; |
|
} |
|
::-webkit-scrollbar-track { |
|
background: rgba(30, 30, 50, 0.3); |
|
border-radius: 10px; |
|
} |
|
::-webkit-scrollbar-thumb { |
|
background: rgba(100, 100, 200, 0.5); |
|
border-radius: 10px; |
|
} |
|
::-webkit-scrollbar-thumb:hover { |
|
background: rgba(120, 120, 220, 0.7); |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
{%app_entry%} |
|
<footer> |
|
{%config%} |
|
{%scripts%} |
|
{%renderer%} |
|
</footer> |
|
</body> |
|
</html> |
|
''' |
|
return app |
|
|
|
|
|
@router.post("/analyze-trends/") |
|
async def analyze_trends(request: Request, data_request: TrendAnalysisRequest): |
|
global dash_thread |
|
TitleName = data_request.topic |
|
Topic_year = data_request.year |
|
|
|
|
|
df, current_page, total_pages, papers_count, total_papers = await fetch_papers_with_pagination( |
|
request, data_request.userId, data_request.topic, data_request.year, data_request.page |
|
) |
|
|
|
if df.empty and total_papers > 0: |
|
raise HTTPException( |
|
status_code=404, |
|
detail=f"No papers found for page {data_request.page + 1}. Valid pages are 1 to {total_pages}." |
|
) |
|
elif df.empty: |
|
raise HTTPException( |
|
status_code=404, |
|
detail=f"No papers found for userId '{data_request.userId}', topic '{data_request.topic}'" + |
|
(f", and year '{data_request.year}'" if data_request.year else "") |
|
) |
|
|
|
|
|
df, topic_labels = perform_trend_analysis(df) |
|
|
|
if df.empty: |
|
raise HTTPException(status_code=500, detail="Failed to process embeddings for trend analysis") |
|
|
|
|
|
cluster_sizes = df.groupby("topic").size().to_dict() |
|
|
|
|
|
from app import get_or_create_dash_app |
|
dash_app = get_or_create_dash_app() |
|
|
|
|
|
updated_dash_app = build_dashboard(df, TitleName, Topic_year if Topic_year else "", existing_app=dash_app) |
|
|
|
|
|
from app import dash_app as main_dash_app |
|
main_dash_app.layout = updated_dash_app.layout |
|
|
|
|
|
dashboard_path = f"/dash" |
|
|
|
|
|
scheme = request.url.scheme |
|
base_url = f"{scheme}://{request.headers['host']}" |
|
dashboard_url = f"{base_url}/dash" |
|
|
|
|
|
def open_browser(): |
|
import webbrowser |
|
webbrowser.open(dashboard_url,new=2) |
|
|
|
|
|
import threading |
|
browser_thread = threading.Timer(1.5, open_browser) |
|
browser_thread.daemon = True |
|
browser_thread.start() |
|
|
|
return { |
|
"message": f"Trend analysis completed for papers (page {current_page + 1} of {total_pages})", |
|
"current_page": current_page, |
|
"total_pages": total_pages, |
|
"papers_count": papers_count, |
|
"total_papers": total_papers, |
|
"cluster_sizes": cluster_sizes, |
|
"cluster_titles": topic_labels, |
|
"dashboard_url": dashboard_url, |
|
"redirect": True |
|
} |
|
|
|
|
|
|
|
|
|
|
|
@router.get("/dashboard/{userId}/{topic}/{year}") |
|
@router.get("/dashboard/{userId}/{topic}") |
|
async def get_dashboard(request: Request, userId: str, topic: str, year: str = None): |
|
|
|
from pydantic import BaseModel |
|
import webbrowser |
|
import threading |
|
|
|
class TempRequest(BaseModel): |
|
userId: str |
|
topic: str |
|
year: str = None |
|
page: int = 0 |
|
|
|
data_request = TempRequest(userId=userId, topic=topic, year=year) |
|
|
|
|
|
base_url = str(request.base_url) |
|
dashboard_url = f"{base_url}dash" |
|
|
|
|
|
def open_browser(): |
|
webbrowser.open(dashboard_url,new=2) |
|
|
|
|
|
browser_thread = threading.Timer(1.5, open_browser) |
|
browser_thread.daemon = True |
|
browser_thread.start() |
|
|
|
|
|
result = await analyze_trends(request, data_request) |
|
|
|
|
|
from fastapi.responses import RedirectResponse |
|
return RedirectResponse(url="/dash") |