Ippo987 commited on
Commit
52897d7
·
verified ·
1 Parent(s): 1d51c2f

Update venuAnalysis.py

Browse files
Files changed (1) hide show
  1. venuAnalysis.py +836 -836
venuAnalysis.py CHANGED
@@ -1,837 +1,837 @@
1
- import pandas as pd
2
- import plotly.express as px
3
- import plotly.graph_objects as go
4
- from dash import Dash, dcc, html, Input, Output, State
5
- import numpy as np
6
- import random
7
- import math
8
- from collections import defaultdict
9
- import colorsys
10
- from fastapi import HTTPException
11
- from pydantic import BaseModel
12
- import threading
13
- import webbrowser
14
- import os
15
- import psutil
16
- import socket
17
- from fastapi import HTTPException, APIRouter, Request
18
- router = APIRouter()
19
-
20
- # Global variables to track dashboard state
21
- dashboard_port = 8050
22
- dashboard_process = None
23
-
24
- # MongoDB connection and data loader function
25
- async def load_data_from_mongodb(userId, topic, year, request:Request):
26
- query = {
27
- "userId": userId,
28
- "topic": topic,
29
- "year": year
30
- }
31
- collection = request.app.state.collection2
32
- document = await collection.find_one(query)
33
- if not document:
34
- raise ValueError(f"No data found for userId={userId}, topic={topic}, year={year}")
35
- # Extract metadata and convert to DataFrame
36
- metadata = document.get("metadata", [])
37
- df = pd.DataFrame(metadata)
38
- df['publication_date'] = pd.to_datetime(df['publication_date'])
39
- return df
40
-
41
- # Common functions (unchanged)
42
- def filter_by_date_range(dataframe, start_idx, end_idx):
43
- start_date = date_range[start_idx]
44
- end_date = date_range[end_idx]
45
- return dataframe[(dataframe['publication_date'] >= start_date) &
46
- (dataframe['publication_date'] <= end_date)]
47
-
48
- def generate_vibrant_colors(n):
49
- base_colors = []
50
- for i in range(n):
51
- hue = (i / n) % 1.0
52
- saturation = random.uniform(0.7, 0.9)
53
- value = random.uniform(0.7, 0.9)
54
- r, g, b = colorsys.hsv_to_rgb(hue, saturation, value)
55
- vibrant_color = '#{:02x}{:02x}{:02x}'.format(
56
- int(r * 255),
57
- int(g * 255),
58
- int(b * 255)
59
- )
60
- end_color_r = min(255, int(r * 255 * 1.1))
61
- end_color_g = min(255, int(g * 255 * 1.1))
62
- end_color_b = min(255, int(b * 255 * 1.1))
63
- gradient_end = '#{:02x}{:02x}{:02x}'.format(end_color_r, end_color_g, end_color_b)
64
- base_colors.append({
65
- 'start': vibrant_color,
66
- 'end': gradient_end
67
- })
68
- extended_colors = base_colors * math.ceil(n/10)
69
- final_colors = []
70
- for i in range(n):
71
- color = extended_colors[i]
72
- jitter = random.uniform(0.9, 1.1)
73
- def jitter_color(hex_color):
74
- r, g, b = [min(255, max(0, int(int(hex_color[j:j+2], 16) * jitter))) for j in (1, 3, 5)]
75
- return f'rgba({r}, {g}, {b}, 0.9)'
76
- final_colors.append({
77
- 'start': jitter_color(color['start']),
78
- 'end': jitter_color(color['end']).replace('0.9', '0.8')
79
- })
80
- return final_colors
81
-
82
- # Knowledge map creator function (unchanged)
83
- def create_knowledge_map(filtered_df, view_type='host'):
84
- color_palette = {
85
- 'background': '#1E1E1E', # Dark background (almost black)
86
- 'card_bg': '#1A2238', # Bluish-black for cards (from your image)
87
- 'accent1': '#FF6A3D', # Orange for headings (keeping from original)
88
- 'accent2': '#4ECCA3', # Keeping teal for secondary elements
89
- 'accent3': '#9D84B7', # Keeping lavender for tertiary elements
90
- 'text_light': '#FFFFFF', # White text
91
- 'text_dark': '#E0E0E0', # Light grey text for dark backgrounds
92
- }
93
-
94
- if view_type == 'host':
95
- group_col = 'host_organization_name'
96
- id_col = 'host_organization_id'
97
- title = "Host Organization Clusters"
98
- else:
99
- group_col = 'venue'
100
- id_col = 'venue_id'
101
- title = "Publication Venue Clusters"
102
- summary = filtered_df.groupby(group_col).agg(
103
- paper_count=('id', 'count'),
104
- is_oa=('is_oa', 'mean'),
105
- oa_status=('oa_status', lambda x: x.mode()[0] if not x.mode().empty else None),
106
- entity_id=(id_col, 'first')
107
- ).reset_index()
108
- paper_count_groups = defaultdict(list)
109
- for _, row in summary.iterrows():
110
- paper_count_groups[row['paper_count']].append(row)
111
- knowledge_map_fig = go.Figure()
112
- sorted_counts = sorted(paper_count_groups.keys(), reverse=True)
113
- vibrant_colors = generate_vibrant_colors(len(sorted_counts))
114
- golden_angle = np.pi * (3 - np.sqrt(5))
115
- spiral_coef = 150
116
- cluster_metadata = {}
117
- max_x, max_y = 500, 500
118
- for i, count in enumerate(sorted_counts):
119
- radius = np.sqrt(i) * spiral_coef
120
- theta = golden_angle * i
121
- cluster_x, cluster_y = radius * np.cos(theta), radius * np.sin(theta)
122
- label_offset_angle = theta + np.pi/4
123
- label_offset_distance = 80 + 4 * np.sqrt(len(paper_count_groups[count]))
124
- label_x = cluster_x + label_offset_distance * np.cos(label_offset_angle)
125
- label_y = cluster_y + label_offset_distance * np.sin(label_offset_angle)
126
- cluster_metadata[count] = {
127
- 'center_x': cluster_x,
128
- 'center_y': cluster_y,
129
- 'entities': paper_count_groups[count],
130
- 'color': vibrant_colors[i]
131
- }
132
- entities = paper_count_groups[count]
133
- num_entities = len(entities)
134
- cluster_size = min(200, max(80, 40 + 8 * np.sqrt(num_entities)))
135
- color = vibrant_colors[i]
136
- knowledge_map_fig.add_shape(
137
- type="circle",
138
- x0=cluster_x - cluster_size/2, y0=cluster_y - cluster_size/2,
139
- x1=cluster_x + cluster_size/2, y1=cluster_y + cluster_size/2,
140
- fillcolor=color['end'].replace("0.8", "0.15"),
141
- line=dict(color=color['start'], width=1.5),
142
- opacity=0.7
143
- )
144
- knowledge_map_fig.add_trace(go.Scatter(
145
- x=[cluster_x], y=[cluster_y],
146
- mode='markers',
147
- marker=dict(size=cluster_size, color=color['start'], opacity=0.3),
148
- customdata=[[count, "cluster"]],
149
- hoverinfo='skip'
150
- ))
151
- knowledge_map_fig.add_trace(go.Scatter(
152
- x=[cluster_x, label_x], y=[cluster_y, label_y],
153
- mode='lines',
154
- line=dict(color=color['start'], width=1, dash='dot'),
155
- hoverinfo='skip'
156
- ))
157
- knowledge_map_fig.add_annotation(
158
- x=label_x, y=label_y,
159
- text=f"{count} papers<br>{num_entities} {'orgs' if view_type == 'host' else 'venues'}",
160
- showarrow=False,
161
- font=dict(size=11, color='white'),
162
- bgcolor=color['start'],
163
- bordercolor='white',
164
- borderwidth=1,
165
- opacity=0.9
166
- )
167
- entities_sorted = sorted(entities, key=lambda x: x[group_col])
168
- inner_spiral_coef = 0.4
169
- for j, entity_data in enumerate(entities_sorted):
170
- spiral_radius = np.sqrt(j) * cluster_size * inner_spiral_coef / np.sqrt(num_entities + 1)
171
- spiral_angle = golden_angle * j
172
- jitter_radius = random.uniform(0.9, 1.1) * spiral_radius
173
- jitter_angle = spiral_angle + random.uniform(-0.1, 0.1)
174
- entity_x = cluster_x + jitter_radius * np.cos(jitter_angle)
175
- entity_y = cluster_y + jitter_radius * np.sin(jitter_angle)
176
- node_size = min(18, max(8, np.sqrt(entity_data['paper_count']) * 1.5))
177
- knowledge_map_fig.add_trace(go.Scatter(
178
- x=[entity_x], y=[entity_y],
179
- mode='markers',
180
- marker=dict(
181
- size=node_size,
182
- color=color['start'],
183
- line=dict(color='rgba(255, 255, 255, 0.9)', width=1.5)
184
- ),
185
- customdata=[[
186
- entity_data[group_col],
187
- entity_data['paper_count'],
188
- entity_data['is_oa'],
189
- entity_data['entity_id'],
190
- count,
191
- "entity"
192
- ]],
193
- hovertemplate=(
194
- f"<b>{entity_data[group_col]}</b><br>"
195
- f"Papers: {entity_data['paper_count']}<br>"
196
- f"Open Access: {entity_data['is_oa']:.1%}<extra></extra>"
197
- )
198
- ))
199
- max_x = max([abs(cluster['center_x']) for cluster in cluster_metadata.values()]) + 150 if cluster_metadata else 500
200
- max_y = max([abs(cluster['center_y']) for cluster in cluster_metadata.values()]) + 150 if cluster_metadata else 500
201
- # Update knowledge_map_fig layout
202
- knowledge_map_fig.update_layout(
203
- title=dict(
204
- text=title,
205
- font=dict(size=22, family='"Poppins", sans-serif', color=color_palette['accent1']) # Orange title
206
- ),
207
- plot_bgcolor='rgba(26, 34, 56, 1)', # Bluish-black background
208
- paper_bgcolor='rgba(26, 34, 56, 0.7)',
209
- xaxis=dict(range=[-max(700, max_x), max(700, max_x)], showticklabels=False, showgrid=False),
210
- yaxis=dict(range=[-max(500, max_y), max(500, max_y)], showticklabels=False, showgrid=False),
211
- margin=dict(l=10, r=10, t=60, b=10),
212
- height=700,
213
- hovermode='closest',
214
- showlegend=False,
215
- font=dict(family='"Poppins", sans-serif', color=color_palette['text_light']), # Light text
216
- )
217
- return knowledge_map_fig, cluster_metadata
218
-
219
- # Other chart functions (unchanged)
220
- def create_oa_pie_fig(filtered_df):
221
- color_palette = {
222
- 'background': '#1A2238', # Dark blue background
223
- 'card_bg': '#1A2238', # Changed to match the other chart
224
- 'accent1': '#FF6A3D', # Vibrant orange for highlights
225
- 'accent2': '#4ECCA3', # Teal for secondary elements
226
- 'accent3': '#9D84B7', # Lavender for tertiary elements
227
- 'text_light': '#FFFFFF', # White text
228
- 'text_dark': '#FFFFFF', # Changed to white for better contrast
229
- }
230
-
231
- fig = px.pie(
232
- filtered_df, names='is_oa', title="Overall Open Access Status",
233
- labels={True: "Open Access", False: "Not Open Access"},
234
- color_discrete_sequence=[color_palette['accent2'], color_palette['accent1']]
235
- )
236
-
237
- fig.update_traces(
238
- textinfo='label+percent',
239
- textfont=dict(size=14, family='"Poppins", sans-serif'),
240
- marker=dict(line=dict(color='#1A2238', width=2)) # Match background color
241
- )
242
-
243
- fig.update_layout(
244
- title=dict(
245
- text="Overall Open Access Status",
246
- font=dict(size=18, family='"Poppins", sans-serif', color=color_palette['accent1']) # Orange title
247
- ),
248
- font=dict(family='"Poppins", sans-serif', color=color_palette['text_light']),
249
- paper_bgcolor=color_palette['background'], # Dark background
250
- plot_bgcolor=color_palette['background'], # Dark background
251
- margin=dict(t=50, b=20, l=20, r=20),
252
- legend=dict(
253
- orientation="h",
254
- yanchor="bottom",
255
- y=-0.2,
256
- xanchor="center",
257
- x=0.5,
258
- font=dict(size=12, color=color_palette['text_light'])
259
- )
260
- )
261
-
262
- return fig
263
- def create_oa_status_pie_fig(filtered_df):
264
- custom_colors = [
265
- "#9D84B7",
266
- '#4DADFF',
267
- '#FFD166',
268
- '#06D6A0',
269
- '#EF476F'
270
- ]
271
- fig = px.pie(
272
- filtered_df,
273
- names='oa_status',
274
- title="Open Access Status Distribution",
275
- color_discrete_sequence=custom_colors
276
- )
277
- fig.update_traces(
278
- textinfo='label+percent',
279
- insidetextorientation='radial',
280
- textfont=dict(size=14, family='"Poppins", sans-serif'),
281
- marker=dict(line=dict(color='#FFFFFF', width=2))
282
- )
283
- fig.update_layout(
284
- title=dict(
285
- text="Open Access Status Distribution",
286
- font=dict(size=18, family='"Poppins", sans-serif', color="#FF6A3D")
287
- ),
288
- font=dict(family='"Poppins", sans-serif', color='#FFFFFF'),
289
- paper_bgcolor='#1A2238', # Bluish-black background
290
- plot_bgcolor='#1A2238',
291
- margin=dict(t=50, b=20, l=20, r=20),
292
- legend=dict(
293
- orientation="h",
294
- yanchor="bottom",
295
- y=-0.2,
296
- xanchor="center",
297
- x=0.5,
298
- font=dict(size=12, color='#FFFFFF')
299
- )
300
- )
301
- return fig
302
- def create_type_bar_fig(filtered_df):
303
- type_counts = filtered_df['type'].value_counts()
304
- vibrant_colors = [
305
- '#4361EE', '#3A0CA3', '#4CC9F0',
306
- '#F72585', '#7209B7', '#B5179E',
307
- '#480CA8', '#560BAD', '#F77F00'
308
- ]
309
- fig = px.bar(
310
- type_counts,
311
- title="Publication Types",
312
- labels={'value': 'Count', 'index': 'Type'},
313
- color=type_counts.index,
314
- color_discrete_sequence=vibrant_colors[:len(type_counts)]
315
- )
316
- fig.update_layout(
317
- title=dict(
318
- text="Publication Types",
319
- font=dict(size=20, family='"Poppins", sans-serif', color="#FF6A3D") # Larger font size
320
- ),
321
- xaxis_title="Type",
322
- yaxis_title="Count",
323
- font=dict(family='"Poppins", sans-serif', color="#FFFFFF", size=14), # Increased font size
324
- paper_bgcolor='#1A2238', # Consistent dark background
325
- plot_bgcolor='#1A2238', # Consistent dark background
326
- margin=dict(t=70, b=60, l=60, r=40), # Increased margins
327
- xaxis=dict(
328
- tickfont=dict(size=14, color="#FFFFFF"), # Increased tick font size
329
- tickangle=-45,
330
- gridcolor='rgba(255, 255, 255, 0.1)' # Lighter grid lines
331
- ),
332
- yaxis=dict(
333
- tickfont=dict(size=14, color="#FFFFFF"), # Increased tick font size
334
- gridcolor='rgba(255, 255, 255, 0.1)' # Lighter grid lines
335
- ),
336
- bargap=0.3, # Increased bar gap
337
- )
338
- fig.update_traces(
339
- marker_line_width=1,
340
- marker_line_color='rgba(0, 0, 0, 0.5)',
341
- opacity=0.9,
342
- hovertemplate='%{y} publications<extra></extra>',
343
- texttemplate='%{y}', # Add text labels
344
- textposition='outside', # Position labels outside bars
345
- textfont=dict(size=14, color='white') # Text label formatting
346
- )
347
- return fig
348
-
349
- # Function to check if port is in use
350
- def is_port_in_use(port):
351
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
352
- return s.connect_ex(('localhost', port)) == 0
353
-
354
- # Function to find a free port
355
- def find_free_port(start_port=8050):
356
- port = start_port
357
- while is_port_in_use(port):
358
- port += 1
359
- return port
360
-
361
- # Function to shutdown any existing dashboard
362
- def shutdown_existing_dashboard():
363
- global dashboard_process
364
-
365
- # First, check if our port is in use
366
- if is_port_in_use(dashboard_port):
367
- try:
368
- # Kill processes using the port
369
- for proc in psutil.process_iter(['pid', 'name', 'connections']):
370
- try:
371
- for conn in proc.connections():
372
- if conn.laddr.port == dashboard_port:
373
- print(f"Terminating process {proc.pid} using port {dashboard_port}")
374
- proc.terminate()
375
- proc.wait(timeout=3)
376
- except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
377
- pass
378
- except Exception as e:
379
- print(f"Error freeing port {dashboard_port}: {e}")
380
-
381
- # If we're tracking a dashboard process, try to terminate it
382
- if dashboard_process is not None:
383
- try:
384
- # Kill the process if it's still running
385
- if dashboard_process.is_alive():
386
- parent = psutil.Process(os.getpid())
387
- children = parent.children(recursive=True)
388
- for process in children:
389
- try:
390
- process.terminate()
391
- except:
392
- pass
393
- dashboard_process = None
394
- except Exception as e:
395
- print(f"Error terminating dashboard process: {e}")
396
- dashboard_process = None # Reset the reference anyway
397
-
398
- # Pydantic model for request validation
399
- class DashboardRequest(BaseModel):
400
- userId: str
401
- topic: str
402
- year: int
403
-
404
- @router.post("/load_and_display_dashboard/")
405
- async def load_and_display_dashboard(request: DashboardRequest, req:Request):
406
- global dashboard_process, dashboard_port
407
-
408
- # Make sure any existing dashboard is shut down
409
- shutdown_existing_dashboard()
410
-
411
- # Find a free port
412
- dashboard_port = find_free_port()
413
-
414
- try:
415
- # Load data from MongoDB
416
- df = await load_data_from_mongodb(request.userId, request.topic, request.year, req)
417
-
418
- # Get date range for the slider
419
- global min_date, max_date, date_range, date_marks
420
- min_date = df['publication_date'].min()
421
- max_date = df['publication_date'].max()
422
- date_range = pd.date_range(start=min_date, end=max_date, freq='MS')
423
- date_marks = {i: date.strftime('%b %Y') for i, date in enumerate(date_range)}
424
-
425
- # Function to create and run the dashboard
426
- def create_and_run_dashboard():
427
- # Create a new app instance
428
- app = Dash(__name__, suppress_callback_exceptions=True)
429
- app.cluster_metadata = {}
430
- color_palette = {
431
- 'background': '#1A2238', # Dark blue background
432
- 'card_bg': '#F8F8FF', # Off-white for cards
433
- 'accent1': '#FF6A3D', # Vibrant orange for highlights
434
- 'accent2': '#4ECCA3', # Teal for secondary elements
435
- 'accent3': '#9D84B7', # Lavender for tertiary elements
436
- 'text_light': '#FFFFFF', # White text
437
- 'text_dark': '#2D3748', # Dark gray text
438
- }
439
-
440
- # Define modern styling for containers
441
- container_style = {
442
- 'padding': '5px',
443
- 'backgroundColor': color_palette['text_dark'],
444
- 'borderRadius': '12px',
445
- 'boxShadow': '0 4px 12px rgba(0, 0, 0, 0.15)',
446
- 'marginBottom': '25px',
447
- 'border': f'1px solid rgba(255, 255, 255, 0.2)',
448
-
449
- }
450
-
451
- hidden_style = {**container_style, 'display': 'none'}
452
- visible_style = {**container_style}
453
-
454
- # Create a modern, attractive layout
455
- app.layout = html.Div([
456
- # Header section with gradient background
457
- html.Div([
458
- html.H1(request.topic.capitalize() + " Analytics Dashboard", style={
459
- 'textAlign': 'center',
460
- 'marginBottom': '10px',
461
- 'color': color_palette['accent1'],
462
- 'fontSize': '2.5rem',
463
- 'fontWeight': '700',
464
- 'letterSpacing': '0.5px',
465
- }),
466
- html.Div([
467
- html.P("Research Publication Analysis & Knowledge Mapping", style={
468
- 'textAlign': 'center',
469
- 'color': color_palette['text_light'],
470
- 'opacity': '0.8',
471
- 'fontSize': '1.2rem',
472
- 'marginTop': '0',
473
- })
474
- ])
475
- ], style={
476
- 'background': f'linear-gradient(135deg, {color_palette["background"]}, #364156)',
477
- 'padding': '30px 20px',
478
- 'borderRadius': '12px',
479
- 'marginBottom': '25px',
480
- 'boxShadow': '0 4px 20px rgba(0, 0, 0, 0.2)',
481
- }),
482
-
483
- # Controls section
484
- html.Div([
485
- html.Div([
486
- html.Button(
487
- id='view-toggle',
488
- children='Switch to Venue View',
489
- style={
490
- 'padding': '12px 20px',
491
- 'fontSize': '1rem',
492
- 'borderRadius': '8px',
493
- 'border': 'none',
494
- 'backgroundColor': color_palette['accent1'],
495
- 'color': 'white',
496
- 'cursor': 'pointer',
497
- 'boxShadow': '0 2px 5px rgba(0, 0, 0, 0.1)',
498
- 'transition': 'all 0.3s ease',
499
- 'marginRight': '20px',
500
- 'fontWeight': '500',
501
- }
502
- ),
503
- html.H3("Filter by Publication Date", style={
504
- 'marginBottom': '15px',
505
- 'color': color_palette['text_dark'],
506
- 'fontSize': '1.3rem',
507
- 'fontWeight': '600',
508
- }),
509
- ], style={'display': 'flex', 'alignItems': 'center', 'marginBottom': '15px'}),
510
-
511
- dcc.RangeSlider(
512
- id='date-slider',
513
- min=0,
514
- max=len(date_range) - 1,
515
- value=[0, len(date_range) - 1],
516
- marks=date_marks if len(date_marks) <= 12 else {
517
- i: date_marks[i] for i in range(0, len(date_range), max(1, len(date_range) // 12))
518
- },
519
- step=1,
520
- tooltip={"placement": "bottom", "always_visible": True},
521
- updatemode='mouseup'
522
- ),
523
- html.Div(id='date-range-display', style={
524
- 'textAlign': 'center',
525
- 'marginTop': '12px',
526
- 'fontSize': '1.1rem',
527
- 'fontWeight': '500',
528
- 'color': color_palette['accent1'],
529
- })
530
- ], style={**container_style, 'marginBottom': '25px'}),
531
-
532
- # Knowledge map - main visualization
533
- html.Div([
534
- dcc.Graph(
535
- id='knowledge-map',
536
- style={'width': '100%', 'height': '700px'},
537
- config={'scrollZoom': True, 'displayModeBar': True, 'responsive': True}
538
- )
539
- ], style={
540
- **container_style,
541
- 'height': '750px',
542
- 'marginBottom': '25px',
543
- 'background': f'linear-gradient(to bottom right, {color_palette["card_bg"]}, #F0F0F8)',
544
- }),
545
-
546
- # Details container - appears when clicking elements
547
- html.Div([
548
- html.H3(id='details-title', style={
549
- 'marginBottom': '15px',
550
- 'color': color_palette['accent1'],
551
- 'fontSize': '1.4rem',
552
- 'fontWeight': '600',
553
- }),
554
- html.Div(id='details-content', style={
555
- 'maxHeight': '350px',
556
- 'overflowY': 'auto',
557
- 'padding': '10px',
558
- 'borderRadius': '8px',
559
- 'backgroundColor': 'rgba(255, 255, 255, 0.7)',
560
- })
561
- ], id='details-container', style=hidden_style),
562
-
563
- # Charts in flex container
564
- html.Div([
565
- html.Div([
566
- dcc.Graph(
567
- id='oa-pie-chart',
568
- style={'width': '100%', 'height': '350px'},
569
- config={'displayModeBar': False, 'responsive': True}
570
- )
571
- ], style={
572
- 'flex': 1,
573
- **container_style,
574
- 'margin': '0 10px',
575
- 'height': '400px',
576
- 'transition': 'transform 0.3s ease',
577
- ':hover': {'transform': 'translateY(-5px)'},
578
- }),
579
- html.Div([
580
- dcc.Graph(
581
- id='oa-status-pie-chart',
582
- style={'width': '100%', 'height': '350px'},
583
- config={'displayModeBar': False, 'responsive': True}
584
- )
585
- ], style={
586
- 'flex': 1,
587
- **container_style,
588
- 'margin': '0 10px',
589
- 'height': '400px',
590
- 'transition': 'transform 0.3s ease',
591
- ':hover': {'transform': 'translateY(-5px)'},
592
- })
593
- ], style={'display': 'flex', 'marginBottom': '25px', 'height': '420px'}),
594
-
595
- # Bar chart container
596
- # Increase bar chart height and improve visibility
597
- html.Div([
598
- dcc.Graph(
599
- id='type-bar-chart',
600
- style={'width': '100%', 'height': '50vh'}, # Reduced from 60vh
601
- config={'displayModeBar': False, 'responsive': True}
602
- )
603
- ], style={
604
- **container_style,
605
- 'height': '500px', # Decreased from 650px
606
- 'background': 'rgba(26, 34, 56, 1)',
607
- 'marginBottom': '10px', # Added smaller bottom margin
608
- }),
609
- # Store components for state
610
- dcc.Store(id='filtered-df-info'),
611
- dcc.Store(id='current-view', data='host'),
612
- html.Div(id='load-trigger', children='trigger-initial-load', style={'display': 'none'})
613
- ], style={
614
- 'fontFamily': '"Poppins", "Segoe UI", Arial, sans-serif',
615
- 'backgroundColor': '#121212', # Dark background
616
- 'backgroundImage': 'none', # Remove gradient
617
- 'padding': '30px',
618
- 'maxWidth': '1800px',
619
- 'margin': '0 auto',
620
- 'minHeight': '100vh',
621
- 'color': color_palette['text_light'],
622
- 'paddingBottom': '10px',
623
- })
624
-
625
-
626
-
627
- @app.callback(
628
- [Output('current-view', 'data'),
629
- Output('view-toggle', 'children')],
630
- [Input('view-toggle', 'n_clicks')],
631
- [State('current-view', 'data')]
632
- )
633
- def toggle_view(n_clicks, current_view):
634
- if not n_clicks:
635
- return current_view, 'Switch to Venue View' if current_view == 'host' else 'Switch to Host View'
636
- new_view = 'venue' if current_view == 'host' else 'host'
637
- new_button_text = 'Switch to Host View' if new_view == 'venue' else 'Switch to Venue View'
638
- return new_view, new_button_text
639
-
640
- @app.callback(
641
- Output('date-range-display', 'children'),
642
- [Input('date-slider', 'value')]
643
- )
644
- def update_date_range_display(date_range_indices):
645
- start_date = date_range[date_range_indices[0]]
646
- end_date = date_range[date_range_indices[1]]
647
- return f"Selected period: {start_date.strftime('%b %Y')} to {end_date.strftime('%b %Y')}"
648
-
649
- @app.callback(
650
- [Output('knowledge-map', 'figure'),
651
- Output('oa-pie-chart', 'figure'),
652
- Output('oa-status-pie-chart', 'figure'),
653
- Output('type-bar-chart', 'figure'),
654
- Output('filtered-df-info', 'data'),
655
- Output('details-container', 'style')],
656
- [Input('date-slider', 'value'),
657
- Input('current-view', 'data'),
658
- Input('load-trigger', 'children')] # Added trigger
659
- )
660
- def update_visualizations(date_range_indices, current_view, _):
661
- filtered_df = filter_by_date_range(df, date_range_indices[0], date_range_indices[1])
662
- knowledge_map_fig, cluster_metadata = create_knowledge_map(filtered_df, current_view)
663
- app.cluster_metadata = cluster_metadata
664
- filtered_info = {
665
- 'start_idx': date_range_indices[0],
666
- 'end_idx': date_range_indices[1],
667
- 'start_date': date_range[date_range_indices[0]].strftime('%Y-%m-%d'),
668
- 'end_date': date_range[date_range_indices[1]].strftime('%Y-%m-%d'),
669
- 'record_count': len(filtered_df),
670
- 'view_type': current_view
671
- }
672
- return (
673
- knowledge_map_fig,
674
- create_oa_pie_fig(filtered_df),
675
- create_oa_status_pie_fig(filtered_df),
676
- create_type_bar_fig(filtered_df),
677
- filtered_info,
678
- hidden_style
679
- )
680
-
681
- @app.callback(
682
- [Output('details-container', 'style', allow_duplicate=True),
683
- Output('details-title', 'children'),
684
- Output('details-content', 'children')],
685
- [Input('knowledge-map', 'clickData')],
686
- [State('filtered-df-info', 'data')],
687
- prevent_initial_call=True
688
- )
689
- def display_details(clickData, filtered_info):
690
- if not clickData or not filtered_info:
691
- return hidden_style, "", []
692
- customdata = clickData['points'][0]['customdata']
693
- view_type = filtered_info['view_type']
694
- entity_type = "Organization" if view_type == 'host' else "Venue"
695
- if len(customdata) >= 2 and customdata[-1] == "cluster":
696
- count = customdata[0]
697
- if count not in app.cluster_metadata:
698
- return hidden_style, "", []
699
- entities = app.cluster_metadata[count]['entities']
700
- color = app.cluster_metadata[count]['color']['start']
701
- table_header = [
702
- html.Thead(html.Tr([
703
- html.Th(f"{entity_type} Name", style={'padding': '8px'}),
704
- html.Th(f"{entity_type} ID", style={'padding': '8px'}),
705
- html.Th("Papers", style={'padding': '8px', 'textAlign': 'center'}),
706
- html.Th("Open Access %", style={'padding': '8px', 'textAlign': 'center'})
707
- ], style={'backgroundColor': color_palette['accent1'], 'color': 'white'}))
708
- ]
709
-
710
- # Update row styles
711
- row_style = {'backgroundColor': '#232D42'} if i % 2 == 0 else {'backgroundColor': '#1A2238'}
712
- rows = []
713
- for i, entity in enumerate(sorted(entities, key=lambda x: x['paper_count'], reverse=True)):
714
- row_style = {'backgroundColor': '#f9f9f9'} if i % 2 == 0 else {'backgroundColor': 'white'}
715
- entity_name_link = html.A(
716
- entity[f"{view_type}_organization_name" if view_type == 'host' else "venue"],
717
- href=entity['entity_id'],
718
- target="_blank",
719
- style={'color': color, 'textDecoration': 'underline'}
720
- )
721
- entity_id_link = html.A(
722
- entity['entity_id'].split('/')[-1],
723
- href=entity['entity_id'],
724
- target="_blank",
725
- style={'color': color, 'textDecoration': 'underline'}
726
- )
727
- rows.append(html.Tr([
728
- html.Td(entity_name_link, style={'padding': '8px'}),
729
- html.Td(entity_id_link, style={'padding': '8px'}),
730
- html.Td(entity['paper_count'], style={'padding': '8px', 'textAlign': 'center'}),
731
- html.Td(f"{entity['is_oa']:.1%}", style={'padding': '8px', 'textAlign': 'center'})
732
- ], style=row_style))
733
- table = html.Table(table_header + [html.Tbody(rows)], style={
734
- 'width': '100%',
735
- 'borderCollapse': 'collapse',
736
- 'boxShadow': '0 1px 3px rgba(0,0,0,0.1)'
737
- })
738
- return (
739
- visible_style,
740
- f"{entity_type}s with {count} papers",
741
- [html.P(f"Showing {len(entities)} {entity_type.lower()}s during selected period"), table]
742
- )
743
- elif len(customdata) >= 6 and customdata[-1] == "entity":
744
- entity_name = customdata[0]
745
- entity_id = customdata[3]
746
- cluster_count = customdata[4]
747
- color = app.cluster_metadata[cluster_count]['color']['start']
748
- if view_type == 'host':
749
- entity_papers = df[df['host_organization_name'] == entity_name].copy()
750
- else:
751
- entity_papers = df[df['venue'] == entity_name].copy()
752
- entity_papers = entity_papers[
753
- (entity_papers['publication_date'] >= pd.to_datetime(filtered_info['start_date'])) &
754
- (entity_papers['publication_date'] <= pd.to_datetime(filtered_info['end_date']))
755
- ]
756
- entity_name_link = html.A(
757
- entity_name,
758
- href=entity_id,
759
- target="_blank",
760
- style={'color': color, 'textDecoration': 'underline', 'fontSize': '1.2em'}
761
- )
762
- entity_id_link = html.A(
763
- entity_id.split('/')[-1],
764
- href=entity_id,
765
- target="_blank",
766
- style={'color': color, 'textDecoration': 'underline'}
767
- )
768
- header = [
769
- html.Div([
770
- html.Span("Name: ", style={'fontWeight': 'bold'}),
771
- entity_name_link
772
- ], style={'marginBottom': '10px'}),
773
- html.Div([
774
- html.Span("ID: ", style={'fontWeight': 'bold'}),
775
- entity_id_link
776
- ], style={'marginBottom': '10px'}),
777
- html.Div([
778
- html.Span(f"Papers: {len(entity_papers)}", style={'marginRight': '20px'}),
779
- ], style={'marginBottom': '20px'})
780
- ]
781
- table_header = [
782
- html.Thead(html.Tr([
783
- html.Th("Paper ID", style={'padding': '8px'}),
784
- html.Th("Type", style={'padding': '8px'}),
785
- html.Th("OA Status", style={'padding': '8px', 'textAlign': 'center'}),
786
- html.Th("Publication Date", style={'padding': '8px', 'textAlign': 'center'})
787
- ], style={'backgroundColor': color, 'color': 'white'}))
788
- ]
789
- rows = []
790
- for i, (_, paper) in enumerate(entity_papers.sort_values('publication_date', ascending=False).iterrows()):
791
- row_style = {'backgroundColor': '#232D42'} if i % 2 == 0 else {'backgroundColor': '#1A2238'}
792
- paper_link = html.A(
793
- paper['id'],
794
- href=paper['id'],
795
- target="_blank",
796
- style={'color': color, 'textDecoration': 'underline'}
797
- )
798
- rows.append(html.Tr([
799
- html.Td(paper_link, style={'padding': '8px'}),
800
- html.Td(paper['type'], style={'padding': '8px'}),
801
- html.Td(paper['oa_status'], style={'padding': '8px', 'textAlign': 'center'}),
802
- html.Td(paper['publication_date'].strftime('%Y-%m-%d'), style={'padding': '8px', 'textAlign': 'center'})
803
- ], style=row_style))
804
- table = html.Table(table_header + [html.Tbody(rows)], style={
805
- 'width': '100%',
806
- 'borderCollapse': 'collapse',
807
- 'boxShadow': '0 1px 3px rgba(0,0,0,0.1)'
808
- })
809
- with open("dashboard.html", "w") as f:
810
- f.write(app.index())
811
- print("yup saved!!")
812
- return visible_style, f"{entity_type} Papers", header + [table]
813
- return hidden_style, "", []
814
-
815
- # Start the Dash app
816
- app.run_server(debug=False, port=dashboard_port, use_reloader=False)
817
-
818
- # Run the dashboard in a separate process
819
- dashboard_process = threading.Thread(target=create_and_run_dashboard)
820
- dashboard_process.daemon = True
821
- dashboard_process.start()
822
-
823
- # Open the browser after a delay
824
- def open_browser():
825
- try:
826
- webbrowser.open_new(f"http://127.0.0.1:{dashboard_port}/")
827
- except:
828
- pass
829
-
830
- threading.Timer(1.5, open_browser).start()
831
-
832
- return {"status": "success", "message": f"Dashboard loaded successfully on port {dashboard_port}."}
833
-
834
- except Exception as e:
835
- # Clean up in case of failure
836
- shutdown_existing_dashboard()
837
  raise HTTPException(status_code=400, detail=str(e))
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objects as go
4
+ from dash import Dash, dcc, html, Input, Output, State
5
+ import numpy as np
6
+ import random
7
+ import math
8
+ from collections import defaultdict
9
+ import colorsys
10
+ from fastapi import HTTPException
11
+ from pydantic import BaseModel
12
+ import threading
13
+ import webbrowser
14
+ import os
15
+ import psutil
16
+ import socket
17
+ from fastapi import HTTPException, APIRouter, Request
18
+ router = APIRouter()
19
+
20
+ # Global variables to track dashboard state
21
+ dashboard_port = 8050
22
+ dashboard_process = None
23
+
24
+ # MongoDB connection and data loader function
25
+ async def load_data_from_mongodb(userId, topic, year, request:Request):
26
+ query = {
27
+ "userId": userId,
28
+ "topic": topic,
29
+ "year": year
30
+ }
31
+ collection = request.app.state.collection2
32
+ document = await collection.find_one(query)
33
+ if not document:
34
+ raise ValueError(f"No data found for userId={userId}, topic={topic}, year={year}")
35
+ # Extract metadata and convert to DataFrame
36
+ metadata = document.get("metadata", [])
37
+ df = pd.DataFrame(metadata)
38
+ df['publication_date'] = pd.to_datetime(df['publication_date'])
39
+ return df
40
+
41
+ # Common functions (unchanged)
42
+ def filter_by_date_range(dataframe, start_idx, end_idx):
43
+ start_date = date_range[start_idx]
44
+ end_date = date_range[end_idx]
45
+ return dataframe[(dataframe['publication_date'] >= start_date) &
46
+ (dataframe['publication_date'] <= end_date)]
47
+
48
+ def generate_vibrant_colors(n):
49
+ base_colors = []
50
+ for i in range(n):
51
+ hue = (i / n) % 1.0
52
+ saturation = random.uniform(0.7, 0.9)
53
+ value = random.uniform(0.7, 0.9)
54
+ r, g, b = colorsys.hsv_to_rgb(hue, saturation, value)
55
+ vibrant_color = '#{:02x}{:02x}{:02x}'.format(
56
+ int(r * 255),
57
+ int(g * 255),
58
+ int(b * 255)
59
+ )
60
+ end_color_r = min(255, int(r * 255 * 1.1))
61
+ end_color_g = min(255, int(g * 255 * 1.1))
62
+ end_color_b = min(255, int(b * 255 * 1.1))
63
+ gradient_end = '#{:02x}{:02x}{:02x}'.format(end_color_r, end_color_g, end_color_b)
64
+ base_colors.append({
65
+ 'start': vibrant_color,
66
+ 'end': gradient_end
67
+ })
68
+ extended_colors = base_colors * math.ceil(n/10)
69
+ final_colors = []
70
+ for i in range(n):
71
+ color = extended_colors[i]
72
+ jitter = random.uniform(0.9, 1.1)
73
+ def jitter_color(hex_color):
74
+ r, g, b = [min(255, max(0, int(int(hex_color[j:j+2], 16) * jitter))) for j in (1, 3, 5)]
75
+ return f'rgba({r}, {g}, {b}, 0.9)'
76
+ final_colors.append({
77
+ 'start': jitter_color(color['start']),
78
+ 'end': jitter_color(color['end']).replace('0.9', '0.8')
79
+ })
80
+ return final_colors
81
+
82
+ # Knowledge map creator function (unchanged)
83
+ def create_knowledge_map(filtered_df, view_type='host'):
84
+ color_palette = {
85
+ 'background': '#1E1E1E', # Dark background (almost black)
86
+ 'card_bg': '#1A2238', # Bluish-black for cards (from your image)
87
+ 'accent1': '#FF6A3D', # Orange for headings (keeping from original)
88
+ 'accent2': '#4ECCA3', # Keeping teal for secondary elements
89
+ 'accent3': '#9D84B7', # Keeping lavender for tertiary elements
90
+ 'text_light': '#FFFFFF', # White text
91
+ 'text_dark': '#E0E0E0', # Light grey text for dark backgrounds
92
+ }
93
+
94
+ if view_type == 'host':
95
+ group_col = 'host_organization_name'
96
+ id_col = 'host_organization_id'
97
+ title = "Host Organization Clusters"
98
+ else:
99
+ group_col = 'venue'
100
+ id_col = 'venue_id'
101
+ title = "Publication Venue Clusters"
102
+ summary = filtered_df.groupby(group_col).agg(
103
+ paper_count=('id', 'count'),
104
+ is_oa=('is_oa', 'mean'),
105
+ oa_status=('oa_status', lambda x: x.mode()[0] if not x.mode().empty else None),
106
+ entity_id=(id_col, 'first')
107
+ ).reset_index()
108
+ paper_count_groups = defaultdict(list)
109
+ for _, row in summary.iterrows():
110
+ paper_count_groups[row['paper_count']].append(row)
111
+ knowledge_map_fig = go.Figure()
112
+ sorted_counts = sorted(paper_count_groups.keys(), reverse=True)
113
+ vibrant_colors = generate_vibrant_colors(len(sorted_counts))
114
+ golden_angle = np.pi * (3 - np.sqrt(5))
115
+ spiral_coef = 150
116
+ cluster_metadata = {}
117
+ max_x, max_y = 500, 500
118
+ for i, count in enumerate(sorted_counts):
119
+ radius = np.sqrt(i) * spiral_coef
120
+ theta = golden_angle * i
121
+ cluster_x, cluster_y = radius * np.cos(theta), radius * np.sin(theta)
122
+ label_offset_angle = theta + np.pi/4
123
+ label_offset_distance = 80 + 4 * np.sqrt(len(paper_count_groups[count]))
124
+ label_x = cluster_x + label_offset_distance * np.cos(label_offset_angle)
125
+ label_y = cluster_y + label_offset_distance * np.sin(label_offset_angle)
126
+ cluster_metadata[count] = {
127
+ 'center_x': cluster_x,
128
+ 'center_y': cluster_y,
129
+ 'entities': paper_count_groups[count],
130
+ 'color': vibrant_colors[i]
131
+ }
132
+ entities = paper_count_groups[count]
133
+ num_entities = len(entities)
134
+ cluster_size = min(200, max(80, 40 + 8 * np.sqrt(num_entities)))
135
+ color = vibrant_colors[i]
136
+ knowledge_map_fig.add_shape(
137
+ type="circle",
138
+ x0=cluster_x - cluster_size/2, y0=cluster_y - cluster_size/2,
139
+ x1=cluster_x + cluster_size/2, y1=cluster_y + cluster_size/2,
140
+ fillcolor=color['end'].replace("0.8", "0.15"),
141
+ line=dict(color=color['start'], width=1.5),
142
+ opacity=0.7
143
+ )
144
+ knowledge_map_fig.add_trace(go.Scatter(
145
+ x=[cluster_x], y=[cluster_y],
146
+ mode='markers',
147
+ marker=dict(size=cluster_size, color=color['start'], opacity=0.3),
148
+ customdata=[[count, "cluster"]],
149
+ hoverinfo='skip'
150
+ ))
151
+ knowledge_map_fig.add_trace(go.Scatter(
152
+ x=[cluster_x, label_x], y=[cluster_y, label_y],
153
+ mode='lines',
154
+ line=dict(color=color['start'], width=1, dash='dot'),
155
+ hoverinfo='skip'
156
+ ))
157
+ knowledge_map_fig.add_annotation(
158
+ x=label_x, y=label_y,
159
+ text=f"{count} papers<br>{num_entities} {'orgs' if view_type == 'host' else 'venues'}",
160
+ showarrow=False,
161
+ font=dict(size=11, color='white'),
162
+ bgcolor=color['start'],
163
+ bordercolor='white',
164
+ borderwidth=1,
165
+ opacity=0.9
166
+ )
167
+ entities_sorted = sorted(entities, key=lambda x: x[group_col])
168
+ inner_spiral_coef = 0.4
169
+ for j, entity_data in enumerate(entities_sorted):
170
+ spiral_radius = np.sqrt(j) * cluster_size * inner_spiral_coef / np.sqrt(num_entities + 1)
171
+ spiral_angle = golden_angle * j
172
+ jitter_radius = random.uniform(0.9, 1.1) * spiral_radius
173
+ jitter_angle = spiral_angle + random.uniform(-0.1, 0.1)
174
+ entity_x = cluster_x + jitter_radius * np.cos(jitter_angle)
175
+ entity_y = cluster_y + jitter_radius * np.sin(jitter_angle)
176
+ node_size = min(18, max(8, np.sqrt(entity_data['paper_count']) * 1.5))
177
+ knowledge_map_fig.add_trace(go.Scatter(
178
+ x=[entity_x], y=[entity_y],
179
+ mode='markers',
180
+ marker=dict(
181
+ size=node_size,
182
+ color=color['start'],
183
+ line=dict(color='rgba(255, 255, 255, 0.9)', width=1.5)
184
+ ),
185
+ customdata=[[
186
+ entity_data[group_col],
187
+ entity_data['paper_count'],
188
+ entity_data['is_oa'],
189
+ entity_data['entity_id'],
190
+ count,
191
+ "entity"
192
+ ]],
193
+ hovertemplate=(
194
+ f"<b>{entity_data[group_col]}</b><br>"
195
+ f"Papers: {entity_data['paper_count']}<br>"
196
+ f"Open Access: {entity_data['is_oa']:.1%}<extra></extra>"
197
+ )
198
+ ))
199
+ max_x = max([abs(cluster['center_x']) for cluster in cluster_metadata.values()]) + 150 if cluster_metadata else 500
200
+ max_y = max([abs(cluster['center_y']) for cluster in cluster_metadata.values()]) + 150 if cluster_metadata else 500
201
+ # Update knowledge_map_fig layout
202
+ knowledge_map_fig.update_layout(
203
+ title=dict(
204
+ text=title,
205
+ font=dict(size=22, family='"Poppins", sans-serif', color=color_palette['accent1']) # Orange title
206
+ ),
207
+ plot_bgcolor='rgba(26, 34, 56, 1)', # Bluish-black background
208
+ paper_bgcolor='rgba(26, 34, 56, 0.7)',
209
+ xaxis=dict(range=[-max(700, max_x), max(700, max_x)], showticklabels=False, showgrid=False),
210
+ yaxis=dict(range=[-max(500, max_y), max(500, max_y)], showticklabels=False, showgrid=False),
211
+ margin=dict(l=10, r=10, t=60, b=10),
212
+ height=700,
213
+ hovermode='closest',
214
+ showlegend=False,
215
+ font=dict(family='"Poppins", sans-serif', color=color_palette['text_light']), # Light text
216
+ )
217
+ return knowledge_map_fig, cluster_metadata
218
+
219
+ # Other chart functions (unchanged)
220
+ def create_oa_pie_fig(filtered_df):
221
+ color_palette = {
222
+ 'background': '#1A2238', # Dark blue background
223
+ 'card_bg': '#1A2238', # Changed to match the other chart
224
+ 'accent1': '#FF6A3D', # Vibrant orange for highlights
225
+ 'accent2': '#4ECCA3', # Teal for secondary elements
226
+ 'accent3': '#9D84B7', # Lavender for tertiary elements
227
+ 'text_light': '#FFFFFF', # White text
228
+ 'text_dark': '#FFFFFF', # Changed to white for better contrast
229
+ }
230
+
231
+ fig = px.pie(
232
+ filtered_df, names='is_oa', title="Overall Open Access Status",
233
+ labels={True: "Open Access", False: "Not Open Access"},
234
+ color_discrete_sequence=[color_palette['accent2'], color_palette['accent1']]
235
+ )
236
+
237
+ fig.update_traces(
238
+ textinfo='label+percent',
239
+ textfont=dict(size=14, family='"Poppins", sans-serif'),
240
+ marker=dict(line=dict(color='#1A2238', width=2)) # Match background color
241
+ )
242
+
243
+ fig.update_layout(
244
+ title=dict(
245
+ text="Overall Open Access Status",
246
+ font=dict(size=18, family='"Poppins", sans-serif', color=color_palette['accent1']) # Orange title
247
+ ),
248
+ font=dict(family='"Poppins", sans-serif', color=color_palette['text_light']),
249
+ paper_bgcolor=color_palette['background'], # Dark background
250
+ plot_bgcolor=color_palette['background'], # Dark background
251
+ margin=dict(t=50, b=20, l=20, r=20),
252
+ legend=dict(
253
+ orientation="h",
254
+ yanchor="bottom",
255
+ y=-0.2,
256
+ xanchor="center",
257
+ x=0.5,
258
+ font=dict(size=12, color=color_palette['text_light'])
259
+ )
260
+ )
261
+
262
+ return fig
263
+ def create_oa_status_pie_fig(filtered_df):
264
+ custom_colors = [
265
+ "#9D84B7",
266
+ '#4DADFF',
267
+ '#FFD166',
268
+ '#06D6A0',
269
+ '#EF476F'
270
+ ]
271
+ fig = px.pie(
272
+ filtered_df,
273
+ names='oa_status',
274
+ title="Open Access Status Distribution",
275
+ color_discrete_sequence=custom_colors
276
+ )
277
+ fig.update_traces(
278
+ textinfo='label+percent',
279
+ insidetextorientation='radial',
280
+ textfont=dict(size=14, family='"Poppins", sans-serif'),
281
+ marker=dict(line=dict(color='#FFFFFF', width=2))
282
+ )
283
+ fig.update_layout(
284
+ title=dict(
285
+ text="Open Access Status Distribution",
286
+ font=dict(size=18, family='"Poppins", sans-serif', color="#FF6A3D")
287
+ ),
288
+ font=dict(family='"Poppins", sans-serif', color='#FFFFFF'),
289
+ paper_bgcolor='#1A2238', # Bluish-black background
290
+ plot_bgcolor='#1A2238',
291
+ margin=dict(t=50, b=20, l=20, r=20),
292
+ legend=dict(
293
+ orientation="h",
294
+ yanchor="bottom",
295
+ y=-0.2,
296
+ xanchor="center",
297
+ x=0.5,
298
+ font=dict(size=12, color='#FFFFFF')
299
+ )
300
+ )
301
+ return fig
302
+ def create_type_bar_fig(filtered_df):
303
+ type_counts = filtered_df['type'].value_counts()
304
+ vibrant_colors = [
305
+ '#4361EE', '#3A0CA3', '#4CC9F0',
306
+ '#F72585', '#7209B7', '#B5179E',
307
+ '#480CA8', '#560BAD', '#F77F00'
308
+ ]
309
+ fig = px.bar(
310
+ type_counts,
311
+ title="Publication Types",
312
+ labels={'value': 'Count', 'index': 'Type'},
313
+ color=type_counts.index,
314
+ color_discrete_sequence=vibrant_colors[:len(type_counts)]
315
+ )
316
+ fig.update_layout(
317
+ title=dict(
318
+ text="Publication Types",
319
+ font=dict(size=20, family='"Poppins", sans-serif', color="#FF6A3D") # Larger font size
320
+ ),
321
+ xaxis_title="Type",
322
+ yaxis_title="Count",
323
+ font=dict(family='"Poppins", sans-serif', color="#FFFFFF", size=14), # Increased font size
324
+ paper_bgcolor='#1A2238', # Consistent dark background
325
+ plot_bgcolor='#1A2238', # Consistent dark background
326
+ margin=dict(t=70, b=60, l=60, r=40), # Increased margins
327
+ xaxis=dict(
328
+ tickfont=dict(size=14, color="#FFFFFF"), # Increased tick font size
329
+ tickangle=-45,
330
+ gridcolor='rgba(255, 255, 255, 0.1)' # Lighter grid lines
331
+ ),
332
+ yaxis=dict(
333
+ tickfont=dict(size=14, color="#FFFFFF"), # Increased tick font size
334
+ gridcolor='rgba(255, 255, 255, 0.1)' # Lighter grid lines
335
+ ),
336
+ bargap=0.3, # Increased bar gap
337
+ )
338
+ fig.update_traces(
339
+ marker_line_width=1,
340
+ marker_line_color='rgba(0, 0, 0, 0.5)',
341
+ opacity=0.9,
342
+ hovertemplate='%{y} publications<extra></extra>',
343
+ texttemplate='%{y}', # Add text labels
344
+ textposition='outside', # Position labels outside bars
345
+ textfont=dict(size=14, color='white') # Text label formatting
346
+ )
347
+ return fig
348
+
349
+ # Function to check if port is in use
350
+ def is_port_in_use(port):
351
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
352
+ return s.connect_ex(('localhost', port)) == 0
353
+
354
+ # Function to find a free port
355
+ def find_free_port(start_port=7860):
356
+ port = start_port
357
+ while is_port_in_use(port):
358
+ port += 1
359
+ return port
360
+
361
+ # Function to shutdown any existing dashboard
362
+ def shutdown_existing_dashboard():
363
+ global dashboard_process
364
+
365
+ # First, check if our port is in use
366
+ if is_port_in_use(dashboard_port):
367
+ try:
368
+ # Kill processes using the port
369
+ for proc in psutil.process_iter(['pid', 'name', 'connections']):
370
+ try:
371
+ for conn in proc.connections():
372
+ if conn.laddr.port == dashboard_port:
373
+ print(f"Terminating process {proc.pid} using port {dashboard_port}")
374
+ proc.terminate()
375
+ proc.wait(timeout=3)
376
+ except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
377
+ pass
378
+ except Exception as e:
379
+ print(f"Error freeing port {dashboard_port}: {e}")
380
+
381
+ # If we're tracking a dashboard process, try to terminate it
382
+ if dashboard_process is not None:
383
+ try:
384
+ # Kill the process if it's still running
385
+ if dashboard_process.is_alive():
386
+ parent = psutil.Process(os.getpid())
387
+ children = parent.children(recursive=True)
388
+ for process in children:
389
+ try:
390
+ process.terminate()
391
+ except:
392
+ pass
393
+ dashboard_process = None
394
+ except Exception as e:
395
+ print(f"Error terminating dashboard process: {e}")
396
+ dashboard_process = None # Reset the reference anyway
397
+
398
+ # Pydantic model for request validation
399
+ class DashboardRequest(BaseModel):
400
+ userId: str
401
+ topic: str
402
+ year: int
403
+
404
+ @router.post("/load_and_display_dashboard/")
405
+ async def load_and_display_dashboard(request: DashboardRequest, req:Request):
406
+ global dashboard_process, dashboard_port
407
+
408
+ # Make sure any existing dashboard is shut down
409
+ shutdown_existing_dashboard()
410
+
411
+ # Find a free port
412
+ dashboard_port = find_free_port()
413
+
414
+ try:
415
+ # Load data from MongoDB
416
+ df = await load_data_from_mongodb(request.userId, request.topic, request.year, req)
417
+
418
+ # Get date range for the slider
419
+ global min_date, max_date, date_range, date_marks
420
+ min_date = df['publication_date'].min()
421
+ max_date = df['publication_date'].max()
422
+ date_range = pd.date_range(start=min_date, end=max_date, freq='MS')
423
+ date_marks = {i: date.strftime('%b %Y') for i, date in enumerate(date_range)}
424
+
425
+ # Function to create and run the dashboard
426
+ def create_and_run_dashboard():
427
+ # Create a new app instance
428
+ app = Dash(__name__, suppress_callback_exceptions=True)
429
+ app.cluster_metadata = {}
430
+ color_palette = {
431
+ 'background': '#1A2238', # Dark blue background
432
+ 'card_bg': '#F8F8FF', # Off-white for cards
433
+ 'accent1': '#FF6A3D', # Vibrant orange for highlights
434
+ 'accent2': '#4ECCA3', # Teal for secondary elements
435
+ 'accent3': '#9D84B7', # Lavender for tertiary elements
436
+ 'text_light': '#FFFFFF', # White text
437
+ 'text_dark': '#2D3748', # Dark gray text
438
+ }
439
+
440
+ # Define modern styling for containers
441
+ container_style = {
442
+ 'padding': '5px',
443
+ 'backgroundColor': color_palette['text_dark'],
444
+ 'borderRadius': '12px',
445
+ 'boxShadow': '0 4px 12px rgba(0, 0, 0, 0.15)',
446
+ 'marginBottom': '25px',
447
+ 'border': f'1px solid rgba(255, 255, 255, 0.2)',
448
+
449
+ }
450
+
451
+ hidden_style = {**container_style, 'display': 'none'}
452
+ visible_style = {**container_style}
453
+
454
+ # Create a modern, attractive layout
455
+ app.layout = html.Div([
456
+ # Header section with gradient background
457
+ html.Div([
458
+ html.H1(request.topic.capitalize() + " Analytics Dashboard", style={
459
+ 'textAlign': 'center',
460
+ 'marginBottom': '10px',
461
+ 'color': color_palette['accent1'],
462
+ 'fontSize': '2.5rem',
463
+ 'fontWeight': '700',
464
+ 'letterSpacing': '0.5px',
465
+ }),
466
+ html.Div([
467
+ html.P("Research Publication Analysis & Knowledge Mapping", style={
468
+ 'textAlign': 'center',
469
+ 'color': color_palette['text_light'],
470
+ 'opacity': '0.8',
471
+ 'fontSize': '1.2rem',
472
+ 'marginTop': '0',
473
+ })
474
+ ])
475
+ ], style={
476
+ 'background': f'linear-gradient(135deg, {color_palette["background"]}, #364156)',
477
+ 'padding': '30px 20px',
478
+ 'borderRadius': '12px',
479
+ 'marginBottom': '25px',
480
+ 'boxShadow': '0 4px 20px rgba(0, 0, 0, 0.2)',
481
+ }),
482
+
483
+ # Controls section
484
+ html.Div([
485
+ html.Div([
486
+ html.Button(
487
+ id='view-toggle',
488
+ children='Switch to Venue View',
489
+ style={
490
+ 'padding': '12px 20px',
491
+ 'fontSize': '1rem',
492
+ 'borderRadius': '8px',
493
+ 'border': 'none',
494
+ 'backgroundColor': color_palette['accent1'],
495
+ 'color': 'white',
496
+ 'cursor': 'pointer',
497
+ 'boxShadow': '0 2px 5px rgba(0, 0, 0, 0.1)',
498
+ 'transition': 'all 0.3s ease',
499
+ 'marginRight': '20px',
500
+ 'fontWeight': '500',
501
+ }
502
+ ),
503
+ html.H3("Filter by Publication Date", style={
504
+ 'marginBottom': '15px',
505
+ 'color': color_palette['text_dark'],
506
+ 'fontSize': '1.3rem',
507
+ 'fontWeight': '600',
508
+ }),
509
+ ], style={'display': 'flex', 'alignItems': 'center', 'marginBottom': '15px'}),
510
+
511
+ dcc.RangeSlider(
512
+ id='date-slider',
513
+ min=0,
514
+ max=len(date_range) - 1,
515
+ value=[0, len(date_range) - 1],
516
+ marks=date_marks if len(date_marks) <= 12 else {
517
+ i: date_marks[i] for i in range(0, len(date_range), max(1, len(date_range) // 12))
518
+ },
519
+ step=1,
520
+ tooltip={"placement": "bottom", "always_visible": True},
521
+ updatemode='mouseup'
522
+ ),
523
+ html.Div(id='date-range-display', style={
524
+ 'textAlign': 'center',
525
+ 'marginTop': '12px',
526
+ 'fontSize': '1.1rem',
527
+ 'fontWeight': '500',
528
+ 'color': color_palette['accent1'],
529
+ })
530
+ ], style={**container_style, 'marginBottom': '25px'}),
531
+
532
+ # Knowledge map - main visualization
533
+ html.Div([
534
+ dcc.Graph(
535
+ id='knowledge-map',
536
+ style={'width': '100%', 'height': '700px'},
537
+ config={'scrollZoom': True, 'displayModeBar': True, 'responsive': True}
538
+ )
539
+ ], style={
540
+ **container_style,
541
+ 'height': '750px',
542
+ 'marginBottom': '25px',
543
+ 'background': f'linear-gradient(to bottom right, {color_palette["card_bg"]}, #F0F0F8)',
544
+ }),
545
+
546
+ # Details container - appears when clicking elements
547
+ html.Div([
548
+ html.H3(id='details-title', style={
549
+ 'marginBottom': '15px',
550
+ 'color': color_palette['accent1'],
551
+ 'fontSize': '1.4rem',
552
+ 'fontWeight': '600',
553
+ }),
554
+ html.Div(id='details-content', style={
555
+ 'maxHeight': '350px',
556
+ 'overflowY': 'auto',
557
+ 'padding': '10px',
558
+ 'borderRadius': '8px',
559
+ 'backgroundColor': 'rgba(255, 255, 255, 0.7)',
560
+ })
561
+ ], id='details-container', style=hidden_style),
562
+
563
+ # Charts in flex container
564
+ html.Div([
565
+ html.Div([
566
+ dcc.Graph(
567
+ id='oa-pie-chart',
568
+ style={'width': '100%', 'height': '350px'},
569
+ config={'displayModeBar': False, 'responsive': True}
570
+ )
571
+ ], style={
572
+ 'flex': 1,
573
+ **container_style,
574
+ 'margin': '0 10px',
575
+ 'height': '400px',
576
+ 'transition': 'transform 0.3s ease',
577
+ ':hover': {'transform': 'translateY(-5px)'},
578
+ }),
579
+ html.Div([
580
+ dcc.Graph(
581
+ id='oa-status-pie-chart',
582
+ style={'width': '100%', 'height': '350px'},
583
+ config={'displayModeBar': False, 'responsive': True}
584
+ )
585
+ ], style={
586
+ 'flex': 1,
587
+ **container_style,
588
+ 'margin': '0 10px',
589
+ 'height': '400px',
590
+ 'transition': 'transform 0.3s ease',
591
+ ':hover': {'transform': 'translateY(-5px)'},
592
+ })
593
+ ], style={'display': 'flex', 'marginBottom': '25px', 'height': '420px'}),
594
+
595
+ # Bar chart container
596
+ # Increase bar chart height and improve visibility
597
+ html.Div([
598
+ dcc.Graph(
599
+ id='type-bar-chart',
600
+ style={'width': '100%', 'height': '50vh'}, # Reduced from 60vh
601
+ config={'displayModeBar': False, 'responsive': True}
602
+ )
603
+ ], style={
604
+ **container_style,
605
+ 'height': '500px', # Decreased from 650px
606
+ 'background': 'rgba(26, 34, 56, 1)',
607
+ 'marginBottom': '10px', # Added smaller bottom margin
608
+ }),
609
+ # Store components for state
610
+ dcc.Store(id='filtered-df-info'),
611
+ dcc.Store(id='current-view', data='host'),
612
+ html.Div(id='load-trigger', children='trigger-initial-load', style={'display': 'none'})
613
+ ], style={
614
+ 'fontFamily': '"Poppins", "Segoe UI", Arial, sans-serif',
615
+ 'backgroundColor': '#121212', # Dark background
616
+ 'backgroundImage': 'none', # Remove gradient
617
+ 'padding': '30px',
618
+ 'maxWidth': '1800px',
619
+ 'margin': '0 auto',
620
+ 'minHeight': '100vh',
621
+ 'color': color_palette['text_light'],
622
+ 'paddingBottom': '10px',
623
+ })
624
+
625
+
626
+
627
+ @app.callback(
628
+ [Output('current-view', 'data'),
629
+ Output('view-toggle', 'children')],
630
+ [Input('view-toggle', 'n_clicks')],
631
+ [State('current-view', 'data')]
632
+ )
633
+ def toggle_view(n_clicks, current_view):
634
+ if not n_clicks:
635
+ return current_view, 'Switch to Venue View' if current_view == 'host' else 'Switch to Host View'
636
+ new_view = 'venue' if current_view == 'host' else 'host'
637
+ new_button_text = 'Switch to Host View' if new_view == 'venue' else 'Switch to Venue View'
638
+ return new_view, new_button_text
639
+
640
+ @app.callback(
641
+ Output('date-range-display', 'children'),
642
+ [Input('date-slider', 'value')]
643
+ )
644
+ def update_date_range_display(date_range_indices):
645
+ start_date = date_range[date_range_indices[0]]
646
+ end_date = date_range[date_range_indices[1]]
647
+ return f"Selected period: {start_date.strftime('%b %Y')} to {end_date.strftime('%b %Y')}"
648
+
649
+ @app.callback(
650
+ [Output('knowledge-map', 'figure'),
651
+ Output('oa-pie-chart', 'figure'),
652
+ Output('oa-status-pie-chart', 'figure'),
653
+ Output('type-bar-chart', 'figure'),
654
+ Output('filtered-df-info', 'data'),
655
+ Output('details-container', 'style')],
656
+ [Input('date-slider', 'value'),
657
+ Input('current-view', 'data'),
658
+ Input('load-trigger', 'children')] # Added trigger
659
+ )
660
+ def update_visualizations(date_range_indices, current_view, _):
661
+ filtered_df = filter_by_date_range(df, date_range_indices[0], date_range_indices[1])
662
+ knowledge_map_fig, cluster_metadata = create_knowledge_map(filtered_df, current_view)
663
+ app.cluster_metadata = cluster_metadata
664
+ filtered_info = {
665
+ 'start_idx': date_range_indices[0],
666
+ 'end_idx': date_range_indices[1],
667
+ 'start_date': date_range[date_range_indices[0]].strftime('%Y-%m-%d'),
668
+ 'end_date': date_range[date_range_indices[1]].strftime('%Y-%m-%d'),
669
+ 'record_count': len(filtered_df),
670
+ 'view_type': current_view
671
+ }
672
+ return (
673
+ knowledge_map_fig,
674
+ create_oa_pie_fig(filtered_df),
675
+ create_oa_status_pie_fig(filtered_df),
676
+ create_type_bar_fig(filtered_df),
677
+ filtered_info,
678
+ hidden_style
679
+ )
680
+
681
+ @app.callback(
682
+ [Output('details-container', 'style', allow_duplicate=True),
683
+ Output('details-title', 'children'),
684
+ Output('details-content', 'children')],
685
+ [Input('knowledge-map', 'clickData')],
686
+ [State('filtered-df-info', 'data')],
687
+ prevent_initial_call=True
688
+ )
689
+ def display_details(clickData, filtered_info):
690
+ if not clickData or not filtered_info:
691
+ return hidden_style, "", []
692
+ customdata = clickData['points'][0]['customdata']
693
+ view_type = filtered_info['view_type']
694
+ entity_type = "Organization" if view_type == 'host' else "Venue"
695
+ if len(customdata) >= 2 and customdata[-1] == "cluster":
696
+ count = customdata[0]
697
+ if count not in app.cluster_metadata:
698
+ return hidden_style, "", []
699
+ entities = app.cluster_metadata[count]['entities']
700
+ color = app.cluster_metadata[count]['color']['start']
701
+ table_header = [
702
+ html.Thead(html.Tr([
703
+ html.Th(f"{entity_type} Name", style={'padding': '8px'}),
704
+ html.Th(f"{entity_type} ID", style={'padding': '8px'}),
705
+ html.Th("Papers", style={'padding': '8px', 'textAlign': 'center'}),
706
+ html.Th("Open Access %", style={'padding': '8px', 'textAlign': 'center'})
707
+ ], style={'backgroundColor': color_palette['accent1'], 'color': 'white'}))
708
+ ]
709
+
710
+ # Update row styles
711
+ row_style = {'backgroundColor': '#232D42'} if i % 2 == 0 else {'backgroundColor': '#1A2238'}
712
+ rows = []
713
+ for i, entity in enumerate(sorted(entities, key=lambda x: x['paper_count'], reverse=True)):
714
+ row_style = {'backgroundColor': '#f9f9f9'} if i % 2 == 0 else {'backgroundColor': 'white'}
715
+ entity_name_link = html.A(
716
+ entity[f"{view_type}_organization_name" if view_type == 'host' else "venue"],
717
+ href=entity['entity_id'],
718
+ target="_blank",
719
+ style={'color': color, 'textDecoration': 'underline'}
720
+ )
721
+ entity_id_link = html.A(
722
+ entity['entity_id'].split('/')[-1],
723
+ href=entity['entity_id'],
724
+ target="_blank",
725
+ style={'color': color, 'textDecoration': 'underline'}
726
+ )
727
+ rows.append(html.Tr([
728
+ html.Td(entity_name_link, style={'padding': '8px'}),
729
+ html.Td(entity_id_link, style={'padding': '8px'}),
730
+ html.Td(entity['paper_count'], style={'padding': '8px', 'textAlign': 'center'}),
731
+ html.Td(f"{entity['is_oa']:.1%}", style={'padding': '8px', 'textAlign': 'center'})
732
+ ], style=row_style))
733
+ table = html.Table(table_header + [html.Tbody(rows)], style={
734
+ 'width': '100%',
735
+ 'borderCollapse': 'collapse',
736
+ 'boxShadow': '0 1px 3px rgba(0,0,0,0.1)'
737
+ })
738
+ return (
739
+ visible_style,
740
+ f"{entity_type}s with {count} papers",
741
+ [html.P(f"Showing {len(entities)} {entity_type.lower()}s during selected period"), table]
742
+ )
743
+ elif len(customdata) >= 6 and customdata[-1] == "entity":
744
+ entity_name = customdata[0]
745
+ entity_id = customdata[3]
746
+ cluster_count = customdata[4]
747
+ color = app.cluster_metadata[cluster_count]['color']['start']
748
+ if view_type == 'host':
749
+ entity_papers = df[df['host_organization_name'] == entity_name].copy()
750
+ else:
751
+ entity_papers = df[df['venue'] == entity_name].copy()
752
+ entity_papers = entity_papers[
753
+ (entity_papers['publication_date'] >= pd.to_datetime(filtered_info['start_date'])) &
754
+ (entity_papers['publication_date'] <= pd.to_datetime(filtered_info['end_date']))
755
+ ]
756
+ entity_name_link = html.A(
757
+ entity_name,
758
+ href=entity_id,
759
+ target="_blank",
760
+ style={'color': color, 'textDecoration': 'underline', 'fontSize': '1.2em'}
761
+ )
762
+ entity_id_link = html.A(
763
+ entity_id.split('/')[-1],
764
+ href=entity_id,
765
+ target="_blank",
766
+ style={'color': color, 'textDecoration': 'underline'}
767
+ )
768
+ header = [
769
+ html.Div([
770
+ html.Span("Name: ", style={'fontWeight': 'bold'}),
771
+ entity_name_link
772
+ ], style={'marginBottom': '10px'}),
773
+ html.Div([
774
+ html.Span("ID: ", style={'fontWeight': 'bold'}),
775
+ entity_id_link
776
+ ], style={'marginBottom': '10px'}),
777
+ html.Div([
778
+ html.Span(f"Papers: {len(entity_papers)}", style={'marginRight': '20px'}),
779
+ ], style={'marginBottom': '20px'})
780
+ ]
781
+ table_header = [
782
+ html.Thead(html.Tr([
783
+ html.Th("Paper ID", style={'padding': '8px'}),
784
+ html.Th("Type", style={'padding': '8px'}),
785
+ html.Th("OA Status", style={'padding': '8px', 'textAlign': 'center'}),
786
+ html.Th("Publication Date", style={'padding': '8px', 'textAlign': 'center'})
787
+ ], style={'backgroundColor': color, 'color': 'white'}))
788
+ ]
789
+ rows = []
790
+ for i, (_, paper) in enumerate(entity_papers.sort_values('publication_date', ascending=False).iterrows()):
791
+ row_style = {'backgroundColor': '#232D42'} if i % 2 == 0 else {'backgroundColor': '#1A2238'}
792
+ paper_link = html.A(
793
+ paper['id'],
794
+ href=paper['id'],
795
+ target="_blank",
796
+ style={'color': color, 'textDecoration': 'underline'}
797
+ )
798
+ rows.append(html.Tr([
799
+ html.Td(paper_link, style={'padding': '8px'}),
800
+ html.Td(paper['type'], style={'padding': '8px'}),
801
+ html.Td(paper['oa_status'], style={'padding': '8px', 'textAlign': 'center'}),
802
+ html.Td(paper['publication_date'].strftime('%Y-%m-%d'), style={'padding': '8px', 'textAlign': 'center'})
803
+ ], style=row_style))
804
+ table = html.Table(table_header + [html.Tbody(rows)], style={
805
+ 'width': '100%',
806
+ 'borderCollapse': 'collapse',
807
+ 'boxShadow': '0 1px 3px rgba(0,0,0,0.1)'
808
+ })
809
+ with open("dashboard.html", "w") as f:
810
+ f.write(app.index())
811
+ print("yup saved!!")
812
+ return visible_style, f"{entity_type} Papers", header + [table]
813
+ return hidden_style, "", []
814
+
815
+ # Start the Dash app
816
+ app.run_server(debug=False, port=dashboard_port, use_reloader=False)
817
+
818
+ # Run the dashboard in a separate process
819
+ dashboard_process = threading.Thread(target=create_and_run_dashboard)
820
+ dashboard_process.daemon = True
821
+ dashboard_process.start()
822
+
823
+ # Open the browser after a delay
824
+ def open_browser():
825
+ try:
826
+ webbrowser.open_new(f"http://127.0.0.1:{dashboard_port}/")
827
+ except:
828
+ pass
829
+
830
+ threading.Timer(1.5, open_browser).start()
831
+
832
+ return {"status": "success", "message": f"Dashboard loaded successfully on port {dashboard_port}."}
833
+
834
+ except Exception as e:
835
+ # Clean up in case of failure
836
+ shutdown_existing_dashboard()
837
  raise HTTPException(status_code=400, detail=str(e))