File size: 35,886 Bytes
4bfc55c
4435d63
5d5bc80
e186be4
5d5bc80
4435d63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521e71f
e5730a3
 
4435d63
e1e6335
 
4435d63
6fc0a4d
52413b9
34f85e7
 
 
 
 
 
 
 
6fc0a4d
 
19d49ec
7e8aad4
8dc99e2
77d278c
6fc0a4d
 
 
 
 
 
 
 
34f85e7
6fc0a4d
 
4435d63
6fc0a4d
 
34f85e7
6fc0a4d
 
4435d63
6fc0a4d
 
34f85e7
6fc0a4d
 
 
 
 
 
 
 
 
 
 
 
 
 
34f85e7
6fc0a4d
 
 
 
 
 
 
 
 
34f85e7
6fc0a4d
 
 
 
 
 
 
 
 
34f85e7
6fc0a4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34f85e7
6fc0a4d
 
 
34f85e7
6fc0a4d
 
 
34f85e7
6fc0a4d
 
 
 
 
 
 
 
 
 
 
34f85e7
 
 
4bfc55c
26c620e
 
 
4bfc55c
 
 
 
 
bc8f9bb
29d5f65
4bfc55c
6e8471f
 
4bfc55c
 
34f85e7
f390488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fc0a4d
9d5ea5c
 
 
 
 
2c1efa2
7b88260
34f85e7
9d5ea5c
 
5d5bc80
9d5ea5c
88dc9d6
 
9d5ea5c
34f85e7
9d5ea5c
 
6fc0a4d
34f85e7
 
 
9d5ea5c
c803036
6fc0a4d
 
34f85e7
 
83f4ed1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b841058
6fc0a4d
fba3086
f5ffc1d
b841058
fba3086
 
f5ffc1d
84fc798
fba3086
 
f5ffc1d
84fc798
fba3086
 
f5ffc1d
 
 
 
 
 
 
 
 
 
 
 
 
b841058
fba3086
 
f5ffc1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17044f7
fba3086
 
f5ffc1d
 
 
 
 
 
 
 
 
 
 
 
 
6fc0a4d
a985384
f5ffc1d
b841058
a985384
 
f5ffc1d
133ff3e
a985384
 
f5ffc1d
a985384
 
 
f5ffc1d
133ff3e
a985384
 
f5ffc1d
133ff3e
a985384
 
f5ffc1d
 
 
 
 
b841058
f5ffc1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adfe2a3
f5ffc1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fc0a4d
4bfc55c
 
 
d96c2fb
4bfc55c
 
 
 
 
30c96c5
4bfc55c
 
6fc0a4d
e5730a3
 
b52ed25
 
e1e6335
 
 
 
 
 
 
 
b5ad794
4ae3280
e1e6335
 
 
 
ff02658
dfb00c6
e1e6335
 
dfb00c6
 
 
17d13e8
 
dfb00c6
 
 
 
4ae3280
0572d22
2b24d04
ce2da58
 
18a06db
 
 
 
2ff9fdc
18a06db
 
a5bbf1e
18a06db
 
 
 
 
 
ce2da58
 
981b62f
f6fef17
04c77dd
f6fef17
981b62f
 
a5bbf1e
 
 
 
1f9da65
f6fef17
2a66489
4ae3280
ce2da58
f6fef17
 
2a66489
b5ad794
e1e6335
 
19def65
2652201
c1ce398
2d5a65e
ef12036
f6fef17
 
e1e6335
 
26c0bdd
0572d22
f6fef17
 
0572d22
 
6fc0a4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e380a56
34f85e7
 
5d5bc80
34f85e7
 
 
4bfc55c
34f85e7
6fc0a4d
 
 
4424c93
6fc0a4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e380a56
 
6fc0a4d
 
f87790d
a985384
88cdcd8
34f85e7
4bfc55c
6fc0a4d
 
 
e380a56
6fc0a4d
 
5d5bc80
 
 
e186be4
6fc0a4d
 
 
5d5bc80
 
e186be4
5d5bc80
 
 
e186be4
5d5bc80
 
 
 
e186be4
5d5bc80
 
 
 
 
 
 
e186be4
5d5bc80
88cdcd8
6fc0a4d
5d5bc80
e186be4
 
6fc0a4d
 
b2817b2
c2c3b43
b2817b2
 
 
e186be4
6fc0a4d
34f85e7
 
 
 
 
 
dad694e
88cdcd8
34f85e7
 
e380a56
 
 
 
e186be4
e380a56
 
ab25b50
e380a56
88cdcd8
34f85e7
88cdcd8
 
 
 
e186be4
88cdcd8
 
 
 
 
34f85e7
6fc0a4d
 
 
 
e186be4
6fc0a4d
2c6b734
6fc0a4d
 
34f85e7
6fc0a4d
 
fba3086
6fc0a4d
4435d63
6fc0a4d
a985384
34f85e7
6fc0a4d
a985384
 
 
 
 
4bfc55c
 
 
 
090bdce
4bfc55c
 
f390488
4bfc55c
f390488
4bfc55c
f390488
 
 
 
4bfc55c
f6fef17
 
 
 
 
 
4bfc55c
f390488
 
 
 
f6fef17
 
 
 
2b24d04
f6fef17
2b24d04
f390488
64a5566
d85563e
f6fef17
d85563e
090bdce
f390488
 
 
 
f6fef17
 
f390488
4bfc55c
 
 
45d7b4b
 
 
 
 
4bfc55c
 
 
 
45d7b4b
 
4bfc55c
 
45d7b4b
 
 
 
4bfc55c
d6debf6
4bfc55c
b329a41
 
 
 
 
4bfc55c
 
 
d6debf6
4bfc55c
91bd078
6fc0a4d
e380a56
6fc0a4d
 
11ac7d8
a985384
34f85e7
4bfc55c
 
6fc0a4d
 
 
4bfc55c
6fc0a4d
 
 
 
 
5d5bc80
6fc0a4d
 
 
 
 
 
 
 
 
fba3086
6fc0a4d
 
 
 
4435d63
6fc0a4d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
import json
import warnings

from bokeh.models import DatetimeTicker, DatetimeTickFormatter

warnings.filterwarnings("ignore")
import io
import os
import time
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
import pandas as pd
import csv
import ast
from tqdm import tqdm
from operator import itemgetter
import numpy as np
import re
import datetime
import html
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#plt.style.use('seaborn-paper')
import holoviews as hv
from holoviews import opts, dim
from bokeh.sampledata.les_mis import data
from bokeh.io import show
from bokeh.sampledata.les_mis import data
import panel as pn
import bokeh
from bokeh.resources import INLINE
from holoviews.operation.timeseries import rolling, rolling_outlier_std
hv.extension('bokeh')
from scipy.ndimage import gaussian_filter1d
from bokeh.models import Legend, LegendItem


import os
os.environ['MPLCONFIGDIR'] = '/tmp/matplotlib'

## LOAD DATASETS

data_folder = './data'

country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))

total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)


## AECO topic over time html file:
AECO_topics_over_time_file_path = '/assets/optimized_merged_AECO_topics_over_time_2D_gpt_labels.html'
AECO_topics_dendogram_file_path = '/assets/topic_hierarchy_optimal_params.htm'
AECO_topic_map_path = '/assets/document_datamap_ver2.html'


regions = ['eu', 'us', 'eu_us']

sorted_ent_type_freq_map_eu=dict()
sorted_ent_type_freq_map_us=dict()
sorted_ent_type_freq_map_eu_us=dict()

def read_top_ent_types():
    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_type_freq_map_eu.tsv'), 'r'))
    for i,row in enumerate(reader):
        if i < 20:
            k, v = row
            sorted_ent_type_freq_map_eu[k] = int(v)
    del sorted_ent_type_freq_map_eu['Entity']
    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_type_freq_map_us.tsv'), 'r'))
    for i, row in enumerate(reader):
        if i < 20:
            k, v = row
            sorted_ent_type_freq_map_us[k] = int(v)
    del sorted_ent_type_freq_map_us['Entity']
    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_type_freq_map_eu_us.tsv'), 'r'))
    for i, row in enumerate(reader):
        if i < 20:
            k, v = row
            sorted_ent_type_freq_map_eu_us[k] = int(v)
    del sorted_ent_type_freq_map_eu_us['Entity']

read_top_ent_types()

top_type_filtered_eu = ['DBpedia:Country', 'DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:GovernmentAgency', 'DBpedia:City', 'DBpedia:MonoclonalAntibody']
top_type_filtered_us = ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Person', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Region', 'DBpedia:MonoclonalAntibody', 'DBpedia:City', 'DBpedia:Biomolecule']
top_type_filtered_eu_us =  ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:MonoclonalAntibody', 'DBpedia:GovernmentAgency', 'DBpedia:Biomolecule', 'DBpedia:Gene']


def read_top_ent_maps():
    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_freq_map_eu.tsv'), 'r'), delimiter='\t')
    for row in reader:
        k,v = row
        lista = ast.literal_eval(v)
        dizionario = dict()
        for pair in lista:
            dizionario[pair[0]]=pair[1]
        dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
        ent_freq_maps_eu[k]=dizionario

    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_freq_map_us.tsv'), 'r'), delimiter='\t')
    for row in reader:
        k, v = row
        lista = ast.literal_eval(v)
        dizionario = dict()
        for pair in lista:
            dizionario[pair[0]] = pair[1]
        dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
        ent_freq_maps_us[k] = dizionario

    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_freq_map_eu_us.tsv'), 'r'), delimiter='\t')
    for row in reader:
        k, v = row
        lista = ast.literal_eval(v)
        dizionario = dict()
        for pair in lista:
            dizionario[pair[0]] = pair[1]
        dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
        ent_freq_maps_eu_us[k] = dizionario

ent_freq_maps_eu = dict()
ent_freq_maps_us = dict()
ent_freq_maps_eu_us = dict()

read_top_ent_maps()


def read_type_filtered_triples():
    for t in top_type_filtered_eu:
        df = pd.read_csv(data_folder+'/filtered_rows/eu/'+t.replace(':','_')+'.tsv', sep="	", header=0)
        df.drop(columns=['Unnamed: 0'], inplace=True)
        top_type_filtered_triples_eu[t]=df
    for t in top_type_filtered_us:
        df = pd.read_csv(data_folder+'/filtered_rows/us/'+t.replace(':','_')+'.tsv', sep="	")
        df.drop(columns=['Unnamed: 0'], inplace=True)
        top_type_filtered_triples_us[t]=df
    for t in top_type_filtered_eu_us:
        df = pd.read_csv(data_folder+'/filtered_rows/eu_us/'+t.replace(':','_')+'.tsv', sep="	")
        df.drop(columns=['Unnamed: 0'], inplace=True)
        top_type_filtered_triples_eu_us[t]=df



top_type_filtered_triples_eu = dict()
top_type_filtered_triples_us = dict()
top_type_filtered_triples_eu_us = dict()

read_type_filtered_triples()

grouping_filtered = pd.read_csv(os.path.join(data_folder, 'dna_relations.tsv'), sep="	")


def load_topic2toptasks():
    with open(os.path.join(data_folder+'/time_series','topic2toptasks.json'), "r", encoding="utf-8") as file:
        mapping = json.load(file)

    return mapping



def loadTaskMethodTimeSeries(topic,task):

    task_method_ts = pd.read_csv(os.path.join(data_folder+'/time_series', f"""{topic}_{task}_time_series.csv"""),
                                                  header=0, sep=',', lineterminator='\n', low_memory=False)

    task_method_ts.set_index(task_method_ts.columns[0], inplace=True)
    return task_method_ts


def loadTaskTimeSeries(topic):
#cluster_{cluster_id}_TASK_time_series.csv
    task_ts = pd.read_csv(os.path.join(data_folder+'/time_series', f"""cluster_{topic}_TASK_time_series.csv"""),
                                                  header=0, sep=',', lineterminator='\n', low_memory=False)

    task_ts.set_index(task_ts.columns[0], inplace=True)
    return task_ts

def loadMethodTimeSeries(topic):

    method_ts = pd.read_csv(os.path.join(data_folder+'/time_series', f"""cluster_{topic}_METHOD_time_series.csv"""),
                                                  header=0, sep=',', lineterminator='\n', low_memory=False)

    method_ts.set_index(method_ts.columns[0], inplace=True)
    return method_ts


################################# CREATE CHARTS ############################
################################# CREATE CHARTS ############################

# Hook function to customize x-axis for Bokeh
def customize_x_axis_bokeh(plot, element):
    bokeh_plot = plot.state
    bokeh_plot.xaxis.formatter = DatetimeTickFormatter(months='%m%Y')
    bokeh_plot.xaxis.ticker.desired_num_ticks = 12
def create_publication_curve_chart():
    country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
    country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))
    #country_name_map
    total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
    total_publications_time_indexed['month_bin'] = pd.to_datetime(total_publications_time_indexed['month_bin'])

    country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
    total_publications_time_indexed.id = np.log1p(total_publications_time_indexed.id)
    country_publications_time_indexed = country_publications_time_indexed.applymap(lambda x: np.log1p(x) if np.issubdtype(type(x), np.number) else x)
    curve_total = hv.Curve((total_publications_time_indexed.month_bin, total_publications_time_indexed.id), 'Time', 'Publication Counts (log)',label='Total')
    #Overlay the line plots
    overlay = curve_total
    curve_countries = []
    for country in country_name_map.keys():
       overlay = overlay * hv.Curve((total_publications_time_indexed.month_bin, country_publications_time_indexed[country]), label=country_name_map[country])
    overlay.opts(show_legend=True,legend_position='right', width=1400, height=900, hooks=[customize_x_axis_bokeh])
    return overlay



macro_topics_mapping = {"Energy Efficiency and Thermal Comfort in Building Environments":0,
                        "Indoor Air Quality and Sustainable Air Conditioning Systems":1,
                        "Urban Development Strategies and Sustainable City Planning":2,
                        "Enhancing Child-Friendly Urban Spaces Through Design":3,
                        "Smart city development and urban data management":4,
                        "Urban Resilience and Green Infrastructure in Climate Change Planning":5,
                        "Architectural Integration of Solar Photovoltaic Systems in Buildings":6,
                        "Preservation and Evolution of Traditional Architecture in Modern Contexts":7,
                        "Sustainable Building Construction and Design with Environmental Assessment":8,
                        "Landscape Planning and Design Theory":9,
                        "Urban Sound Environment Research in Architectural Design":10,
                        "Sustainable Construction Materials and Technologies":11,
                        "Utilizing BIM in Construction and Building Information Modeling Industry":12,
                        "Urban Agriculture and Sustainable Food Systems":13,
                        "Sustainable Bridge Design and Construction":14,
                        "Investigation of Cavity Dynamics and Heat Transfer in Various Flow Scenarios":15}

macro_topics_active_subset = ["Energy Efficiency and Thermal Comfort in Building Environments","Architectural Integration of Solar Photovoltaic Systems in Buildings","Utilizing BIM in Construction and Building Information Modeling Industry"]

def load_institute_network(topic, **kwargs):
    if topic=='Energy Efficiency and Thermal Comfort in Building Environments':
        html = """<iframe src="https://tinyurl.com/2d4gl4tl" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Indoor Air Quality and Sustainable Air Conditioning Systems':
        html = """<iframe src="https://app.vosviewer.com/?json=https%3A%2F%2Fdrive.google.com%2Fuc%3Fid%3D1rqPx3X_9Hnv9mTq2bMCbWWh5VIOw9CRh" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Development Strategies and Sustainable City Planning':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Enhancing Child-Friendly Urban Spaces Through Design':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Smart city development and urban data management':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Resilience and Green Infrastructure in Climate Change Planning':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Architectural Integration of Solar Photovoltaic Systems in Buildings':
        html = """<iframe src="https://tinyurl.com/2a2ha2r8" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Preservation and Evolution of Traditional Architecture in Modern Contexts':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Sustainable Building Construction and Design with Environmental Assessment':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Landscape Planning and Design Theory':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Sound Environment Research in Architectural Design':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Sustainable Construction Materials and Technologies':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Utilizing BIM in Construction and Building Information Modeling Industry':
        html = """<iframe src="https://app.vosviewer.com/?json=https%3A%2F%2Fdrive.google.com%2Fuc%3Fid%3D1V-Cto19dxV_GR3MtNP6Yk642CnTQkjEK" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Agriculture and Sustainable Food Systems':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Sustainable Bridge Design and Construction':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Investigation of Cavity Dynamics and Heat Transfer in Various Flow Scenarios':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane


def load_country_network(topic, **kwargs):
    if topic=='Energy Efficiency and Thermal Comfort in Building Environments':
        html = """<iframe src="https://tinyurl.com/2b7sqbdc" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Indoor Air Quality and Sustainable Air Conditioning Systems':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Development Strategies and Sustainable City Planning':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Enhancing Child-Friendly Urban Spaces Through Design':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Smart city development and urban data management':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Resilience and Green Infrastructure in Climate Change Planning':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Architectural Integration of Solar Photovoltaic Systems in Buildings':
        html = """<iframe src="https://tinyurl.com/29mkxzep" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Preservation and Evolution of Traditional Architecture in Modern Contexts':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Sustainable Building Construction and Design with Environmental Assessment':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Landscape Planning and Design Theory':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Sound Environment Research in Architectural Design':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Sustainable Construction Materials and Technologies':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Utilizing BIM in Construction and Building Information Modeling Industry':
        html = """<iframe src="https://tinyurl.com/2ynebkcr" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Urban Agriculture and Sustainable Food Systems':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Sustainable Bridge Design and Construction':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane
    elif topic=='Investigation of Cavity Dynamics and Heat Transfer in Various Flow Scenarios':
        html = """<iframe src="" width="1000" height="800"></iframe>"""
        html_pane = pn.pane.HTML(html)
        return html_pane


def create_overlay_plot(subject_df):
    overlay = hv.Overlay()
    for obj_column in subject_df.columns:
        overlay *= hv.Curve((subject_df.index, subject_df[obj_column]), 'Time', 'Frequency', label=obj_column)

    overlay.opts(
        show_legend=True,
        legend_position='right',
        width=1400,
        height=900
    )
    return overlay





def hook1(plot, element):
    plot.handles['xaxis'].axis_label_text_color = 'black'
    plot.handles['yaxis'].axis_label_text_color = 'black'
    plot.handles['xaxis'].axis_label_text_alpha = 1.0
    plot.handles['yaxis'].axis_label_text_alpha = 1.0
    plot.handles['xaxis'].axis_line_alpha = 1.0
    plot.handles['yaxis'].axis_line_alpha = 1.0


'''
def legend_hook(plot, element):
    p = plot.state
    if p.legend:
        legend = p.legend[0]
        legend.orientation = 'horizontal'
        legend.location = 'center'
        legend.background_fill_color = 'white'
        legend.border_line_color = 'black'
        legend.label_text_font_size = '12pt'
        legend.label_text_color = 'black'
        legend.spacing = 10
        legend.label_standoff = 5,
        legend.margin = 20
        # Distribute legend items in multiple columns
        legend.columns = 8  # ⚠ This sets 8 columns for Bokeh >= 2.4
        # Move legend below the plot
        p.add_layout(legend, 'below')
'''

def create_overlay_plots(df):
    def move_legend_below(plot, element):
        p = plot.state
        if p.legend:
            legend = p.legend[0]
            legend.orientation = 'horizontal'
            legend.location = 'center'  # center of the below layout, not plot area
            legend.ncols = 4
            legend.background_fill_color = 'white'
            legend.border_line_color = 'black'
            legend.label_text_font_size = '20pt'
            legend.label_text_color = 'black'
            legend.spacing = 10
            legend.margin = 10
            legend.label_standoff = 5

            p.add_layout(legend, 'below')


    line_styles = ['solid', 'dashed', 'dashdot', 'dotted']
    curves = []
    max_y = 0
    for i,obj_column in enumerate(df.columns):
        linestyle = line_styles[i % len(line_styles)]
        curve = hv.Curve((df.index, np.log1p(df[obj_column])), 'Time', 'Occurrence Ratio', label=obj_column).opts(show_legend=True, line_dash=linestyle,fontsize={
        'xlabel': 18,
        'ylabel': 18,
        'xticks': 18,
        'yticks': 18,
    })
        curves.append(curve)



    overlay = hv.Overlay(curves).opts(
    opts.Overlay(bgcolor='white',
            #padding=-0.1,
            show_legend=True,
            #legend_position='top_left',
            #ylim=(0, max_y + 0.18*max_y),
            width=1850,
            height=900,
            #padding=(0.2, 0.2),
            hooks=[move_legend_below,hook1,lambda p, _: p.state.update(border_fill_color='white')],

    ),
    opts.Curve(
        show_grid=True,
        line_width=2,

    )
)
    return overlay


############################# WIDGETS & CALLBACK ###########################################

def filter_data0(df, min_value):
    filtered_df = df[df['value'] >= min_value]
    return filtered_df


def plot_chord0_new(df,min_value):
    filtered_df = filter_data0(df, min_value)
  # Create a Holoviews Dataset for nodes
    nodes = hv.Dataset(filtered_df, 'index')
    nodes.data.head()
    chord = hv.Chord(filtered_df, ['source', 'target'], ['value'])
    return chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', label_text_color="white",  node_color = hv.dim('index').str(),  edge_color = hv.dim('source').str(), labels = 'index', tools=['hover'],   width=800, height=800))


def retrieveRegionTypes(region):
  if region == 'eu':
    return top_type_filtered_eu
  elif region == 'us':
    return top_type_filtered_us
  elif region == 'eu_us':
    return top_type_filtered_eu_us


def filter_region(region):
    if region == 'eu':
        region_grouping = grouping_filtered[grouping_filtered['region'] == 'eu']
    elif region == 'us':
        region_grouping = grouping_filtered[grouping_filtered['region'] == 'us']
    elif region == 'eu_us':
        region_grouping = grouping_filtered[grouping_filtered['region'] == 'eu_us']

    #print(len(region_grouping))
    # Define range for minimum value slider
    min_value_range = region_grouping['value'].unique()
    min_value_range.sort()

    # Define HoloMap with minimum value and attribute as key dimensions
    holomap = hv.HoloMap({min_value: plot_chord0_new(region_grouping, min_value)
                          for min_value in min_value_range},
                         kdims=['Show triples with support greater than']
                         )
    return holomap


# Define a function to generate Entity List RadioButtonGroup based on Region selection
def generate_radio_buttons(value):
    if value == 'eu':
        return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Company', name='eu', orientation='vertical')
    elif value == 'us':
        return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Disease', name='us', orientation='vertical')
    elif value == 'eu_us':
        return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Person', name='eu_us', orientation='vertical')



# https://tabler-icons.io/
button0 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
button1 = pn.widgets.Button(name="Publication Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
button2 = pn.widgets.Button(name="Topic Map", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
button3 = pn.widgets.Button(name="AECO Macro Topic Hierarchy", button_type="warning",  icon="chart-dots-3", styles={"width": "100%"})
button4 = pn.widgets.Button(name="AECO Macro Topics Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
button5 = pn.widgets.Button(name="Research Collaboration Networks: Institutes", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
button6 = pn.widgets.Button(name="Research Collaboration Networks: Countries", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
button7 = pn.widgets.Button(name="Research Tasks and Methods Trends", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})


region1 = pn.widgets.RadioButtonGroup(name='### Select News Region', options=regions)

macro_topics_button = pn.widgets.Select(name='Select Macro Topic', value='Energy Efficiency and Thermal Comfort in Building Environments', options=macro_topics_active_subset)


# Initial RadioButtonGroup
radio_buttons_regions =  pn.widgets.RadioButtonGroup(options=regions,value='eu',name='Select region')
# Generate initial dynamic RadioButtonGroup
radio_buttons_types  = generate_radio_buttons(radio_buttons_regions.value)



# Define a callback function to update the panel dynamically
def update_radio_group(event):
  #print(event.new)
  #print(retrieveRegionTypes(event.new))
  radio_buttons_types.options = retrieveRegionTypes(event.new)


# bind the function to the widget(s)
# Bind the selected value of the first RadioButtonGroup to update the second RadioButtonGroup
radio_buttons_regions.param.watch(update_radio_group, 'value')

# Define the callback function to update the HoloMap
def update_holomap(event):
    initial_holomap.object = filter_region(event.new)

region_radio_button = pn.widgets.RadioButtonGroup(options=regions, value='eu', name='Select Region')

# Create the initial HoloMap
initial_holomap = filter_region(region_radio_button.value)

# Bind the callback function to the value change event of the RadioButton widget
region_radio_button.param.watch(update_holomap, 'value')



def show_page(page_key):
    main_area.clear()
    main_area.append(mapping[page_key])
    
button0.on_click(lambda event: show_page("Page0"))
button1.on_click(lambda event: show_page("Page1"))
button2.on_click(lambda event: show_page("Page2"))
button3.on_click(lambda event: show_page("Page3"))
button4.on_click(lambda event: show_page("Page4"))
button5.on_click(lambda event: show_page("Page5"))
button6.on_click(lambda event: show_page("Page6"))
button7.on_click(lambda event: show_page("Page7"))


### CREATE PAGE LAYOUTS
def CreatePage0():
    return pn.Column(pn.pane.Markdown("""


## Introduction

This is a dashboard for a Data Analytics project regarding research publications in the AECO domain. The source data consists of around 267k English-language research papers gathered from the openalex.org graph database, covering a timeframe from 2011 through early 2024.

---------------------------

## Publication Trends

In the "Publication Trends" panel we show monthly time series of the total number of publications and the number of publications per country (both in log scale), for the top 20 countries by number of publications in the dataset.


## Topic Map
In the "Topic Map" panel we show a UMAP reduced 2-dimensional visualization of the optimized 52 topic clusters of AECO research papers, embedded using Sentence Transformer model, with the descriptive labels overlayed on the clusters being generated by LLama 2 Large Language Model. Each point in the space represent a paper from the dataset. 
Hovering over it has the paper title popping up, while clicking on it redirects to the corresponding OpenAlex paper entry page.
 
 
## AECO Macro Topic Hierarchy
The "AECO Macro Topic Hierarchy" panel allows to explore the dendrogram representation of the optimized clustering, with the leaves of the tree representing the 51 clusters, the intermediate nodes representing merged clusters and the height of the merging (distance from the leaves) indicating topic
similarity as based on the cosine distance matrix between topic embeddings.
 
 
## AECO Macro Topic Trends
The "AECO Macro Topic Trends" panel shows the evolution over time of the 16 AECO macro topics by plotting the semi-annual time series of the absolute numbers of publications per topic.

## Research Collaboration Networks: Institutes
The "Research Collaboration Networks: Institutes" panel contains VOSViewer-generated network representations of the research institutions co-authorship connections, for each of the 16 macro clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent research institutions, edges represent co-authorships relations (with edge thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node groups.
Open the VOSViewer left panel to customize the visualization and/or search for a target institution in the graph.


## Research Collaboration Networks: Countries
The "Research Collaboration Networks: Countries" panel contains VOSViewer-generated network representations of the authors/institutions' country co-authorship connections, for each of the 16 macro clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent authors/institutions' countries, edges represent co-authorships relations (with edge thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node  groups. 
Open the VOSViewer left panel to customize the visualization and/or search for a target country in the graph.


## Research Tasks and Methods Trend
The "Research Tasks and Methods Trend" shows the evolution over time of the most prominent TASK and METHOD entities extracted from research papers for each of the 16 AECO macro topics. It plots annual time series of the occurrence ratio for each triple target entity (Task or Method) to the number of papers of the macro topic.



""", width=1000), align="center")

def CreatePage1():
    return pn.Column(
        pn.pane.Markdown("## Publication Trends "),
        create_publication_curve_chart(),
        align="center",
    )



def CreatePage2():
    # Load the HTML content from the local file
    #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file:
    #    html_content = file.read()
    # Use an iframe to load the local HTML file
    iframe_html = f'<iframe src="{AECO_topic_map_path}" width="1400px" height="1200px"></iframe>'
    # Create an HTML pane to render the content
    html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both')
    return pn.Column(pn.pane.Markdown(" ## AECO Topic Map "), html_pane, align="center")

    
def CreatePage3():
    # Load the HTML content from the local file
    #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file:
    #    html_content = file.read()
    # Use an iframe to load the local HTML file
    iframe_html = f'<iframe src="{AECO_topics_dendogram_file_path}" width="1400px" height="1200px"></iframe>'
    # Create an HTML pane to render the content
    html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both')
    return pn.Column(pn.pane.Markdown(" ## AECO Macro Topics Dendogram "), html_pane, align="center")


def CreatePage4():
    # Load the HTML content from the local file
    #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file:
    #    html_content = file.read()
    # Use an iframe to load the local HTML file
    iframe_html = f'<iframe src="{AECO_topics_over_time_file_path}" width="1400px" height="1200px"></iframe>'
    # Create an HTML pane to render the content
    html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both')
    return pn.Column(pn.pane.Markdown(" ## AECO Macro Topics "), html_pane, align="center")

def CreatePage5():
    return pn.Column(
    macro_topics_button,
       pn.bind(load_institute_network, macro_topics_button),
        align="center",
    )


def CreatePage6():
    return pn.Column(
    macro_topics_button,
       pn.bind(load_country_network, macro_topics_button),
        align="center",
    )

def CreatePage7():
    return pn.Column(
    macro_topics_button,
       pn.bind(load_Task_Method_trends, macro_topics_button),
        align="center",
    )

def load_Task_Method_trends(topic, **kwargs):

    task_data = dict()

    # Check if macro_topics_mapping exists
    if topic not in macro_topics_mapping:
        raise ValueError(f"Topic '{topic}' not found in macro_topics_mapping")
    macro_topic_str = str(macro_topics_mapping[topic])

    # Create Panel UI with dropdown selection
    dropdown = pn.widgets.Select(
        name="Select:",
        options=["TASK","METHOD"]
    )


    #load the tasks and methods timeseries dataframe for the selected topic
    task_ts_df = loadTaskTimeSeries(macro_topic_str)
    method_ts_df = loadMethodTimeSeries(macro_topic_str)


    # @pn.depends(dropdown.param.value)
    def load_overlays(value):
        if value ==  "TASK":
            return create_overlay_plots(task_ts_df)
        elif value ==  "METHOD":
            return create_overlay_plots(method_ts_df)


    return pn.Column(
        dropdown,
        pn.bind(load_overlays, dropdown),
        align="center"
    )





def load_Task_Method_triple_trends(topic, **kwargs):

    task_data = dict()

    # Check if macro_topics_mapping exists
    if topic not in macro_topics_mapping:
        raise ValueError(f"Topic '{topic}' not found in macro_topics_mapping")
    macro_topic_str = str(macro_topics_mapping[topic])

    topic2toptasks = load_topic2toptasks()


    #load the task_timeseries dataframe for each of the task mapped from the selected topic in the dict topic2toptasks
    for task in topic2toptasks.get(macro_topic_str, []):
        task_data[task] = loadTaskMethodTimeSeries(macro_topic_str, task)

    # Create Panel UI with dropdown selection
    task_dropdown = pn.widgets.Select(
        name="Select TASK",
        options=[key for key in task_data.keys()] if task_data else ["No available tasks"]
    )

    #@pn.depends(task_dropdown.param.value)
    def load_task_overlays(task):
        if task not in task_data:
            return hv.Text(0.5, 0.5, "No data available", halign="center")
        else:
            task_method_df = task_data.get(task)
            return create_overlay_plot(task_method_df)

    return pn.Column(
        task_dropdown,
        pn.bind(load_task_overlays, task_dropdown)
    )
    
mapping = {
    "Page0": CreatePage0(),
    "Page1": CreatePage1(),
    "Page2": CreatePage2(),
    "Page3": CreatePage3(),
    "Page4": CreatePage4(),
    "Page5": CreatePage5(),
    "Page6": CreatePage6(),
    "Page7": CreatePage7()
}

#################### SIDEBAR LAYOUT ##########################
sidebar = pn.Column(pn.pane.Markdown("## Panels"),button0,button1,button2,button3,button4,button5,button6,button7,
                    #button5,
                    #button6,
                    styles={"width": "100%", "padding": "15px"})

#################### MAIN AREA LAYOUT ##########################
main_area = pn.Column(mapping["Page0"], styles={"width":"100%"})

###################### APP LAYOUT ##############################
template = pn.template.BootstrapTemplate(
    title=" AECO Tech Dashboard",
    sidebar=[sidebar],
    main=[main_area],
    header_background="black",
    #site="Charting the Landscape of AECO Research",
    theme=pn.template.DarkTheme,
    sidebar_width=330, ## Default is 330
    busy_indicator=pn.indicators.BooleanStatus(value=True),
)

### DEPLOY APP

# Serve the Panel app
template.servable()