from motor.motor_asyncio import AsyncIOMotorClient import pandas as pd import numpy as np import re import json import umap import plotly.io as pio import hdbscan from bertopic import BERTopic from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS from skopt import gp_minimize from sentence_transformers import SentenceTransformer import torch import random import multiprocessing from sklearn.feature_extraction.text import CountVectorizer from bertopic.vectorizers import ClassTfidfTransformer from bertopic.representation import KeyBERTInspired import optuna import pandas as pd import dash from dash import dcc, html, Input, Output, State import plotly.graph_objects as go import plotly.express as px import numpy as np import dash_bootstrap_components as dbc from fastapi import HTTPException, APIRouter, Request from pydantic import BaseModel import threading import time import webbrowser import asyncio # Set seed for reproducibility def set_seed(seed=42): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False if __name__ == "__main__": set_seed(42) multiprocessing.freeze_support() global TitleName TitleName = "Dashboard" router = APIRouter() class TrendAnalysisRequest(BaseModel): userId: str topic: str year: str = None page: int = 0 async def fetch_papers_with_pagination(request: Request, userId: str, topic: str, year: str = None, page: int = 0): # Build the query filter query_filter = {"userId": userId, "topic": topic} if year: query_filter["year"] = year # Count total matching documents count_pipeline = [ {"$match": query_filter}, {"$unwind": "$papers"}, {"$count": "total_papers"} ] collection = request.app.state.collection count_result = await collection.aggregate(count_pipeline).to_list(length=1) total_papers = count_result[0]['total_papers'] if count_result else 0 print(f"Total papers matching criteria: {total_papers}") # If no papers found, return empty result if total_papers == 0: return pd.DataFrame(), 0, 0, 0, 0 # Define pagination constants papers_per_page = 200 min_papers_last_page = 50 # Calculate basic pagination if total_papers <= papers_per_page: # Simple case: all papers fit in one page total_pages = 1 else: # Multiple pages case full_pages = total_papers // papers_per_page remaining = total_papers % papers_per_page if remaining >= min_papers_last_page: # If remaining papers meet minimum threshold, create a separate page total_pages = full_pages + 1 else: # Otherwise, we'll have exactly 'full_pages' pages # The remaining papers will be added to the last page total_pages = full_pages # Ensure page is within valid range if page >= total_pages: return pd.DataFrame(), 0, total_pages, 0, total_papers # Calculate skip and limit based on page number if total_pages == 1: # Only one page - return all papers skip = 0 limit = total_papers elif page < total_pages - 1: # Regular full page skip = page * papers_per_page limit = papers_per_page else: # Last page - might include remaining papers remaining = total_papers % papers_per_page if remaining >= min_papers_last_page or remaining == 0: # Last page with either enough remaining papers or perfectly divided skip = page * papers_per_page limit = remaining if remaining > 0 else papers_per_page else: # Last page with remaining papers that don't meet minimum threshold # We distribute by adding them to the last page skip = (total_pages - 1) * papers_per_page limit = papers_per_page + remaining print(f"Pagination: Page {page + 1} of {total_pages}, Skip {skip}, Limit {limit}") # MongoDB aggregation pipeline pipeline = [ {"$match": query_filter}, {"$unwind": "$papers"}, {"$replaceRoot": {"newRoot": "$papers"}}, {"$project": { "_id": 0, "paperId": 1, "url": 1, "title": 1, "abstract": 1, "citationCount": 1, "influentialCitationCount": 1, "embedding": 1, "publicationDate": 1, "authors": 1 }}, {"$sort": {"publicationDate": 1}}, {"$skip": skip}, {"$limit": limit} ] # Execute the aggregation pipeline cursor = collection.aggregate(pipeline) papers = await cursor.to_list(None) papers_count = len(papers) print(f"Papers Retrieved: {papers_count}") # Convert to DataFrame df = pd.DataFrame(papers) df = df.sort_values(by="publicationDate") print(df[["paperId", "publicationDate"]].head(10)) return df, page, total_pages, papers_count, total_papers # Preprocessing function def clean_text(text): text = str(text).lower() text = re.sub(r"[^a-zA-Z0-9\s]", "", text) return ' '.join([word for word in text.split() if word not in ENGLISH_STOP_WORDS]) # Adaptive clustering and topic modeling def perform_trend_analysis(df): # Convert embeddings def convert_embedding(embedding): return np.array(embedding["vector"], dtype=np.float64) if isinstance(embedding, dict) and "vector" in embedding else None df["embedding"] = df["embedding"].apply(convert_embedding) df = df.dropna(subset=["embedding"]) if df.empty: return df, {} df["clean_text"] = (df["abstract"].fillna("")).apply(clean_text) def objective(trial): umap_n_components = trial.suggest_int("umap_n_components", 1, 12) umap_min_dist = trial.suggest_float("umap_min_dist", 0.1, 0.8) umap_n_neighbors = trial.suggest_int("umap_n_neighbors", 2, 12) hdbscan_min_cluster_size = trial.suggest_int("hdbscan_min_cluster_size", 2, 10) hdbscan_min_samples = trial.suggest_int("hdbscan_min_samples", 1, 10) hdbscan_cluster_selection_epsilon = trial.suggest_float("hdbscan_cluster_selection_epsilon", 0.2, 0.8) hdbscan_cluster_selection_method = trial.suggest_categorical("hdbscan_cluster_selection_method", ["eom", "leaf"]) reducer_high_dim = umap.UMAP( n_components=umap_n_components, random_state=42, min_dist=umap_min_dist, n_neighbors=umap_n_neighbors, metric="cosine" ) reduced_embeddings_high_dim = reducer_high_dim.fit_transform(np.vstack(df["embedding"].values)).astype( np.float64) clusterer = hdbscan.HDBSCAN( min_cluster_size=hdbscan_min_cluster_size, min_samples=hdbscan_min_samples, cluster_selection_epsilon=hdbscan_cluster_selection_epsilon, cluster_selection_method=hdbscan_cluster_selection_method, prediction_data=True, core_dist_n_jobs=1 ) labels = clusterer.fit_predict(reduced_embeddings_high_dim) if len(set(labels)) > 1: dbcv_score = hdbscan.validity.validity_index(reduced_embeddings_high_dim, labels) else: dbcv_score = -np.inf return dbcv_score study = optuna.create_study( direction="maximize", sampler=optuna.samplers.TPESampler(seed=42)) study.optimize(objective, n_trials=100) best_params = study.best_params umap_model = umap.UMAP( n_components=best_params["umap_n_components"], random_state=42, min_dist=best_params["umap_min_dist"], n_neighbors=best_params["umap_n_neighbors"], metric="cosine" ) hdbscan_model = hdbscan.HDBSCAN( min_cluster_size=best_params["hdbscan_min_cluster_size"], min_samples=best_params["hdbscan_min_samples"], cluster_selection_epsilon=best_params["hdbscan_cluster_selection_epsilon"], cluster_selection_method=best_params["hdbscan_cluster_selection_method"], prediction_data=True, core_dist_n_jobs=1 ) vectorizer = CountVectorizer( stop_words=list(ENGLISH_STOP_WORDS), ngram_range=(2, 3) ) representation_model = KeyBERTInspired() embedding_model = SentenceTransformer("allenai/specter") topic_model = BERTopic( vectorizer_model=vectorizer, umap_model=umap_model, hdbscan_model=hdbscan_model, embedding_model=embedding_model, nr_topics='auto', top_n_words=8, representation_model=representation_model, ctfidf_model=ClassTfidfTransformer(reduce_frequent_words=False, bm25_weighting=True) ) topics, _ = topic_model.fit_transform(df["clean_text"], np.vstack(df["embedding"].values)) df["topic"] = topics topic_labels = {t: " | ".join([word for word, _ in topic_model.get_topic(t)][:8]) for t in set(topics)} reduced_embeddings_2d = umap.UMAP(n_components=2, random_state=42).fit_transform( np.vstack(df["embedding"].values)).astype(np.float64) df["x"] = reduced_embeddings_2d[:, 0] df["y"] = reduced_embeddings_2d[:, 1] df["topic_label"] = df["topic"].map(topic_labels) return df, topic_labels def build_dashboard(df, titleNm, topic_year,existing_app=None): global dash_app TitleName = titleNm + "_" + topic_year color_palette = px.colors.qualitative.Vivid unique_topics = sorted(df["topic"].unique()) color_map = {topic: color_palette[i % len(color_palette)] for i, topic in enumerate(unique_topics)} # Map colors to topics df["color"] = df["topic"].map(color_map) # Calculate the number of papers in each cluster cluster_sizes = df.groupby("topic").size().reset_index(name="paper_count") df = df.merge(cluster_sizes, on="topic", how="left") app = existing_app if existing_app else dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY]) # Improved marker scaling with a better range min_size = 50 max_size = 140 df["marker_size"] = ((df["paper_count"] - df["paper_count"].min()) / (df["paper_count"].max() - df["paper_count"].min())) * (max_size - min_size) + min_size # Add log-transformed citation and influence columns df["log_citation"] = np.log1p(df["citationCount"]) df["log_influence"] = np.log1p(df["influentialCitationCount"]) # Bayesian shrinkage for citations and influence global_median_citation = df["log_citation"].median() global_median_influence = df["log_influence"].median() C = 10 # Shrinkage constant def bayesian_shrinkage(group, global_median, C): return (group.sum() + C * global_median) / (len(group) + C) adjusted_citations = df.groupby("topic")["log_citation"].apply( lambda x: bayesian_shrinkage(x, global_median_citation, C)) adjusted_influence = df.groupby("topic")["log_influence"].apply( lambda x: bayesian_shrinkage(x, global_median_influence, C)) # Merge adjusted metrics back into the dataframe df = df.merge(adjusted_citations.rename("adjusted_citation"), on="topic") df = df.merge(adjusted_influence.rename("adjusted_influence"), on="topic") # Calculate global percentiles for thresholds citation_25th = df["adjusted_citation"].quantile(0.25) citation_75th = df["adjusted_citation"].quantile(0.75) influence_25th = df["adjusted_influence"].quantile(0.25) influence_75th = df["adjusted_influence"].quantile(0.75) # Enhanced theme classification with more distinct emojis def classify_theme(row): if row["adjusted_citation"] >= citation_75th and row["adjusted_influence"] >= influence_75th: return "🔥 Hot Topic" elif row["adjusted_citation"] <= citation_25th and row["adjusted_influence"] >= influence_75th: return "💎 Gap Opportunity" elif row["adjusted_citation"] >= citation_75th and row["adjusted_influence"] <= influence_25th: return "⚠️ Risky Theme" else: return "🔄 Neutral" df["theme"] = df.apply(classify_theme, axis=1) # Create a more visually appealing figure fig = go.Figure() # Add subtle grid lines for reference fig.update_xaxes( showgrid=True, gridwidth=0.1, gridcolor='rgba(255, 255, 255, 0.05)', zeroline=False ) fig.update_yaxes( showgrid=True, gridwidth=0.1, gridcolor='rgba(255, 255, 255, 0.05)', zeroline=False ) for topic in unique_topics: topic_data = df[df["topic"] == topic] # Get cluster center center_x = topic_data["x"].mean() center_y = topic_data["y"].mean() # Get label full_topic_formatted = topic_data['topic_label'].iloc[ 0] if 'topic_label' in topic_data.columns else f"Cluster {topic}" # Add a subtle glow effect with a larger outer circle fig.add_trace( go.Scatter( x=[center_x], y=[center_y], mode="markers", marker=dict( color=color_map[topic], size=topic_data["marker_size"].iloc[0] * 1.2, # Slightly larger for glow effect opacity=0.3, line=dict(width=0), symbol="circle", ), showlegend=False, hoverinfo="none", ) ) # Add main cluster circle with enhanced styling fig.add_trace( go.Scatter( x=[center_x], y=[center_y], mode="markers+text", marker=dict( color=color_map[topic], size=topic_data["marker_size"].iloc[0], opacity=0.85, line=dict(width=2, color="white"), symbol="circle", ), text=[f"{topic}"], textposition="middle center", textfont=dict( family="Arial Black", size=16, color="white" ), name=f"{topic}", hovertemplate=( "Cluster ID: %{text}
" + "Name:
" + full_topic_formatted + "
" + "Papers: " + str(topic_data["paper_count"].iloc[0]) + "
" + "Popularity: " + ( "🔼 High" if topic_data["adjusted_citation"].iloc[0] >= citation_75th else "🔽 Low") + f" (Adjusted Citation: {topic_data['adjusted_citation'].iloc[0]:.2f})
" + "Impactfulness: " + ( "🔼 High" if topic_data["adjusted_influence"].iloc[0] >= influence_75th else "🔽 Low") + f" (Adjusted Influence: {topic_data['adjusted_influence'].iloc[0]:.2f})
" + "Theme: " + topic_data["theme"].iloc[0] + "" ), customdata=[[topic]], ) ) # Add an aesthetic background with gradient fig.update_layout( shapes=[ # Improved gradient background dict( type="rect", xref="paper", yref="paper", x0=0, y0=0, x1=1, y1=1, fillcolor="rgba(0, 0, 40, 0.95)", line_width=0, layer="below" ), # Add a subtle radial gradient effect dict( type="circle", xref="paper", yref="paper", x0=0.3, y0=0.3, x1=0.7, y1=0.7, fillcolor="rgba(50, 50, 120, 0.2)", line_width=0, layer="below" ) ], template="plotly_dark", title={ 'text': f"{TitleName.title()}", 'y': 0.97, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top', 'font': dict( family="Arial Black", size=28, color="white", ), 'xref': 'paper', 'yref': 'paper', }, margin=dict(l=40, r=40, b=150, t=100), hovermode="closest", xaxis=dict(showticklabels=False), yaxis=dict(showticklabels=False), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", dragmode="pan", legend=dict( orientation="h", yanchor="bottom", y=-0.15, xanchor="center", x=0.5, bgcolor="rgba(30,30,60,0.5)", bordercolor="rgba(255,255,255,0.2)", borderwidth=1 ), ) # Add subtle animation options fig.update_layout( updatemenus=[ dict( type="buttons", showactive=False, buttons=[ dict( label="Reset View", method="relayout", args=[{"xaxis.range": None, "yaxis.range": None}] ), ], x=0.05, y=0.05, xanchor="left", yanchor="bottom", bgcolor="rgba(50,50,80,0.7)", bordercolor="rgba(255,255,255,0.2)", ) ] ) # Enhanced app layout with modern design elements app.layout = dbc.Container( fluid=True, style={ "backgroundColor": "#111122", "minHeight": "100vh", "height": "100%", "width": "100%", "backgroundImage": "linear-gradient(135deg, #111122 0%, #15162c 100%)", "padding": "20px" }, children=[ dbc.Row([ dbc.Col(html.H1( "Trend Analysis Dashboard ", style={ "textAlign": "center", "color": "white", "marginBottom": "5px", "fontFamily": "Arial Black", "textShadow": "2px 2px 8px rgba(0,0,0,0.7)", "letterSpacing": "2px", "fontSize": "42px", "background": "linear-gradient(135deg, #790091 0%, #565cd5 100%)", "WebkitBackgroundClip": "text", "WebkitTextFillColor": "transparent", "paddingTop": "10px" } ), width=10), dbc.Col([ html.Button( [ html.I(className="fas fa-download mr-2"), " Save Dashboard" ], id="download-button", className="btn btn-outline-light", style={ "marginTop": "10px", "backgroundColor": "rgba(80, 80, 150, 0.4)", "border": "1px solid rgba(100, 100, 200, 0.5)", "borderRadius": "8px", "padding": "8px 15px", "boxShadow": "0px 4px 8px rgba(0, 0, 0, 0.3)", "transition": "all 0.3s ease", "fontSize": "14px", "fontWeight": "bold" } ), # Add the download component dcc.Download(id="download-dashboard") ], width=2), dbc.Col(html.P( "Interactive visualization of research topics and their relationships", style={ "textAlign": "center", "color": "#aaddff", "marginBottom": "15px", "fontStyle": "italic", "fontSize": "16px", "fontWeight": "300", "letterSpacing": "0.5px", "textShadow": "1px 1px 3px rgba(0,0,0,0.5)", } ), width=12), ]), dbc.Row([ dbc.Col( dbc.Card( dbc.CardBody([ dcc.Graph( id="cluster-graph", figure=fig, config={ "scrollZoom": True, "displayModeBar": True, "modeBarButtonsToRemove": ["select2d", "lasso2d"] }, style={"height": "80vh", "min-height": "800px"} ) ], style={"height": "80vh", "min-height": "800px"}), style={ "backgroundColor": "rgba(20, 20, 40, 0.7)", "borderRadius": "15px", "boxShadow": "0px 10px 30px rgba(0, 0, 0, 0.5)", "border": "1px solid rgba(100, 100, 200, 0.3)", "height": "80vh", "min-height": "800px" # Ensure minimum height } ), width=9 ), dbc.Col( dbc.Card( dbc.CardBody([ html.H3("Paper List", style={ "textAlign": "center", "marginBottom": "15px", "color": "#ffffff", "fontFamily": "Arial", "fontWeight": "bold", "textShadow": "1px 1px 3px rgba(0,0,0,0.3)" }), html.Hr(style={"borderColor": "rgba(100, 100, 200, 0.3)", "margin": "10px 0 20px 0"}), html.Div( id="paper-list", style={ "overflowY": "auto", "height": "700px", "padding": "5px" }, children=html.Div([ html.Div( html.I(className="fas fa-mouse-pointer", style={"marginRight": "10px"}), style={"textAlign": "center", "fontSize": "24px", "marginBottom": "10px", "color": "#7f8fa6"} ), html.P("Click on a cluster to view its papers", style={"textAlign": "center", "color": "#7f8fa6"}) ]) ), ], style={ "backgroundColor": "rgba(30, 30, 50, 0.8)", "borderRadius": "15px", "padding": "20px", "height": "100%" }), style={ "height": "800px", "boxShadow": "0px 10px 30px rgba(0, 0, 0, 0.5)", "border": "1px solid rgba(100, 100, 200, 0.3)", "borderRadius": "15px" } ), width=3 ), ], style={"marginTop": "20px"}), # Add a footer with theme legend dbc.Row([ dbc.Col( dbc.Card( dbc.CardBody([ html.H5("Theme Legend", style={"textAlign": "center", "marginBottom": "15px"}), dbc.Row([ dbc.Col(html.Div([ html.Span("🔥", style={"fontSize": "20px", "marginRight": "10px"}), "Hot Topic: High citations & high influence" ]), width=3), dbc.Col(html.Div([ html.Span("💎", style={"fontSize": "20px", "marginRight": "10px"}), "Gap Opportunity: Low citations but high influence" ]), width=3), dbc.Col(html.Div([ html.Span("⚠️", style={"fontSize": "20px", "marginRight": "10px"}), "Risky Theme: High citations but low influence" ]), width=3), dbc.Col(html.Div([ html.Span("🔄", style={"fontSize": "20px", "marginRight": "10px"}), "Neutral: Average citations and influence" ]), width=3), ]) ]), style={ "backgroundColor": "rgba(30, 30, 50, 0.8)", "borderRadius": "15px", "marginTop": "20px", "boxShadow": "0px 5px 15px rgba(0, 0, 0, 0.3)", "border": "1px solid rgba(100, 100, 200, 0.3)" } ), width=12 ), ]), dcc.Store(id="stored-figure", data=fig) ] ) @app.callback( Output("download-dashboard", "data"), Input("download-button", "n_clicks"), State("cluster-graph", "figure"), prevent_initial_call=True ) def download_dashboard(n_clicks, figure): if n_clicks is None: return None # Save the figure as HTML with full plotly.js included dashboard_html = pio.to_html( figure, full_html=True, include_plotlyjs='cdn', config={'responsive': True} ) # Return the dashboard as an HTML file return dict( content=dashboard_html, filename="research_dashboard.html", type="text/html", ) # Enhanced callback to update paper list with better styling # Enhanced callback to update paper list with better styling @app.callback( Output("paper-list", "children"), [Input("cluster-graph", "clickData")] ) def update_paper_list(clickData): if clickData is None: return html.Div([ html.Div( html.I(className="fas fa-mouse-pointer", style={"marginRight": "10px"}), style={"textAlign": "center", "fontSize": "24px", "marginBottom": "10px", "color": "#7f8fa6"} ), html.P("Click on a cluster to view its papers", style={"textAlign": "center", "color": "#7f8fa6"}) ]) # Extract the clicked cluster ID try: clicked_topic = clickData["points"][0]["customdata"][0] # Get the color for this topic for styling consistency topic_color = color_map[clicked_topic] # Get the theme for this topic topic_theme = df[df["topic"] == clicked_topic]["theme"].iloc[0] except (KeyError, IndexError): return html.Div("Error retrieving cluster data.", style={"textAlign": "center", "marginTop": "20px"}) # Filter papers in the clicked cluster - UPDATED to include titles AND urls papers_in_cluster = df[df["topic"] == clicked_topic][["title", "url", "paperId"]] if papers_in_cluster.empty: return html.Div(f"No papers found for Cluster {clicked_topic}.", style={"textAlign": "center", "marginTop": "20px"}) # Get topic label topic_label = df[df["topic"] == clicked_topic]['topic_label'].iloc[ 0] if 'topic_label' in df.columns else f"Cluster {clicked_topic}" # Create an enhanced styled list of paper titles - UPDATED to make clickable paper_list = [] for i, (_, paper) in enumerate(papers_in_cluster.iterrows()): paper_url = paper["url"] paper_title = paper["title"] paper_list.append( dbc.Card( dbc.CardBody([ html.A( html.H6( f"{i + 1}. {paper_title}", className="card-title", style={ "fontSize": "14px", "margin": "5px 0", "fontWeight": "normal", "lineHeight": "1.4", "color": "#aaccff", # Blue color to indicate clickable link "cursor": "pointer" } ), href=paper_url, target="_blank", # Open in new tab style={"textDecoration": "none"} ), ], style={"padding": "12px"}), style={ "marginBottom": "10px", "backgroundColor": "rgba(40, 45, 60, 0.8)", "borderRadius": "8px", "borderLeft": f"4px solid {topic_color}", "boxShadow": "0px 3px 8px rgba(0, 0, 0, 0.2)", "transition": "transform 0.2s", ":hover": { "transform": "translateY(-2px)", "boxShadow": "0px 5px 10px rgba(0, 0, 0, 0.3)" } }, className="paper-card" ) ) return html.Div([ html.Div([ html.H4( f"Cluster {clicked_topic}", style={ "textAlign": "center", "marginBottom": "5px", "color": topic_color, "fontWeight": "bold" } ), html.H5( topic_label, style={ "textAlign": "center", "marginBottom": "5px", "color": "#aaaacc", "fontStyle": "italic", "fontWeight": "normal" } ), html.Div( topic_theme, style={ "textAlign": "center", "marginBottom": "15px", "fontSize": "16px", "fontWeight": "bold" } ), html.Hr(style={"borderColor": "rgba(100, 100, 200, 0.3)", "margin": "10px 0 20px 0"}), html.H5( f"Papers ({len(papers_in_cluster)})", style={ "textAlign": "left", "marginBottom": "15px", "color": "#ffffff", "fontWeight": "bold" } ), ]), html.Div( paper_list, style={"paddingRight": "10px"}, ) ]) # Add custom CSS for hover effects app.index_string = ''' {%metas%} Trend Analysis Clusters Dashboard {%favicon%} {%css%} {%app_entry%} ''' return app @router.post("/analyze-trends/") async def analyze_trends(request: Request, data_request: TrendAnalysisRequest): global dash_thread TitleName = data_request.topic Topic_year = data_request.year # Fetch and process data df, current_page, total_pages, papers_count, total_papers = await fetch_papers_with_pagination( request, data_request.userId, data_request.topic, data_request.year, data_request.page ) if df.empty and total_papers > 0: raise HTTPException( status_code=404, detail=f"No papers found for page {data_request.page + 1}. Valid pages are 1 to {total_pages}." ) elif df.empty: raise HTTPException( status_code=404, detail=f"No papers found for userId '{data_request.userId}', topic '{data_request.topic}'" + (f", and year '{data_request.year}'" if data_request.year else "") ) # Perform the trend analysis df, topic_labels = perform_trend_analysis(df) if df.empty: raise HTTPException(status_code=500, detail="Failed to process embeddings for trend analysis") # Create cluster statistics cluster_sizes = df.groupby("topic").size().to_dict() # Build the dashboard from app import get_or_create_dash_app dash_app = get_or_create_dash_app() # Build the dashboard using existing dash_app updated_dash_app = build_dashboard(df, TitleName, Topic_year if Topic_year else "", existing_app=dash_app) # Update the global dash_app in the main app from app import dash_app as main_dash_app main_dash_app.layout = updated_dash_app.layout # Add dashboard_path to the response dashboard_path = f"/dash" # Get base URL from request and build complete URL scheme = request.url.scheme # 'http' or 'https' base_url = f"{scheme}://{request.headers['host']}" dashboard_url = f"{base_url}/dash" # Open browser in a new thread def open_browser(): import webbrowser webbrowser.open(dashboard_url,new=2) # Start a thread to open the browser after a short delay import threading browser_thread = threading.Timer(1.5, open_browser) browser_thread.daemon = True browser_thread.start() return { "message": f"Trend analysis completed for papers (page {current_page + 1} of {total_pages})", "current_page": current_page, "total_pages": total_pages, "papers_count": papers_count, "total_papers": total_papers, "cluster_sizes": cluster_sizes, "cluster_titles": topic_labels, "dashboard_url": dashboard_url, "redirect": True # Add a flag to indicate redirect is needed } # Additional function to add at the bottom of TrendAnalysis.py to ensure browser opening works # on direct dashboard access as well @router.get("/dashboard/{userId}/{topic}/{year}") @router.get("/dashboard/{userId}/{topic}") async def get_dashboard(request: Request, userId: str, topic: str, year: str = None): # Fetch and process data from pydantic import BaseModel import webbrowser import threading class TempRequest(BaseModel): userId: str topic: str year: str = None page: int = 0 data_request = TempRequest(userId=userId, topic=topic, year=year) # Get base URL from request and build complete URL base_url = str(request.base_url) dashboard_url = f"{base_url}dash" # Open browser in a new thread def open_browser(): webbrowser.open(dashboard_url,new=2) # Start a thread to open the browser after a short delay browser_thread = threading.Timer(1.5, open_browser) browser_thread.daemon = True browser_thread.start() # Reuse the analyze_trends logic to create the dashboard result = await analyze_trends(request, data_request) # Redirect to the dash app from fastapi.responses import RedirectResponse return RedirectResponse(url="/dash")