import warnings warnings.filterwarnings("ignore") import io import os import time import warnings warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=RuntimeWarning) import pandas as pd import csv import ast from tqdm import tqdm from operator import itemgetter import numpy as np import re import datetime import html from joblib import Parallel, delayed import matplotlib.pyplot as plt import matplotlib.dates as mdates #plt.style.use('seaborn-paper') import holoviews as hv from holoviews import opts, dim from bokeh.sampledata.les_mis import data from bokeh.io import show from bokeh.sampledata.les_mis import data import panel as pn import bokeh from bokeh.resources import INLINE from holoviews.operation.timeseries import rolling, rolling_outlier_std hv.extension('bokeh') ## LOAD DATASETS dna_folder = './data' ## AECO topic over time html file: AECO_topics_over_time_file_path = '/assets/optimized_merged_AECO_topics_over_time_2D.html' AECO_topics_dendogram_file_path = '/assets/topic_hierarchy_optimal_params.htm' #### full data unfiltered: dna_articles_unfiltered_eu_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_eu_time_indexed_resampled.tsv'),sep='\t',header=0) dna_articles_unfiltered_us_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_us_time_indexed_resampled.tsv'),sep='\t',header=0) dna_articles_unfiltered_eu_us_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_eu_us_time_indexed_resampled.tsv'),sep='\t',header=0) #### classifier filtered articles: dh_ration_df_eu = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_eu.tsv'),sep='\t',header=0) dh_ration_df_us = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_us.tsv'),sep='\t',header=0) dh_ration_df_eu_us = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_eu_us.tsv'),sep='\t',header=0) regions = ['eu', 'us', 'eu_us'] sorted_ent_type_freq_map_eu=dict() sorted_ent_type_freq_map_us=dict() sorted_ent_type_freq_map_eu_us=dict() def read_top_ent_types(): reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_eu.tsv'), 'r')) for i,row in enumerate(reader): if i < 20: k, v = row sorted_ent_type_freq_map_eu[k] = int(v) del sorted_ent_type_freq_map_eu['Entity'] reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_us.tsv'), 'r')) for i, row in enumerate(reader): if i < 20: k, v = row sorted_ent_type_freq_map_us[k] = int(v) del sorted_ent_type_freq_map_us['Entity'] reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_eu_us.tsv'), 'r')) for i, row in enumerate(reader): if i < 20: k, v = row sorted_ent_type_freq_map_eu_us[k] = int(v) del sorted_ent_type_freq_map_eu_us['Entity'] read_top_ent_types() top_type_filtered_eu = ['DBpedia:Country', 'DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:GovernmentAgency', 'DBpedia:City', 'DBpedia:MonoclonalAntibody'] top_type_filtered_us = ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Person', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Region', 'DBpedia:MonoclonalAntibody', 'DBpedia:City', 'DBpedia:Biomolecule'] top_type_filtered_eu_us = ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:MonoclonalAntibody', 'DBpedia:GovernmentAgency', 'DBpedia:Biomolecule', 'DBpedia:Gene'] dna_healthtech_articles_eu_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_eu_time_indexed_resampled.tsv'),sep='\t',header=0) dna_healthtech_articles_us_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_us_time_indexed_resampled.tsv'),sep='\t',header=0) dna_healthtech_articles_eu_us_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_eu_us_time_indexed_resampled.tsv'),sep='\t',header=0) def read_top_ent_maps(): reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_eu.tsv'), 'r'), delimiter='\t') for row in reader: k,v = row lista = ast.literal_eval(v) dizionario = dict() for pair in lista: dizionario[pair[0]]=pair[1] dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True) ent_freq_maps_eu[k]=dizionario reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_us.tsv'), 'r'), delimiter='\t') for row in reader: k, v = row lista = ast.literal_eval(v) dizionario = dict() for pair in lista: dizionario[pair[0]] = pair[1] dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True) ent_freq_maps_us[k] = dizionario reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_eu_us.tsv'), 'r'), delimiter='\t') for row in reader: k, v = row lista = ast.literal_eval(v) dizionario = dict() for pair in lista: dizionario[pair[0]] = pair[1] dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True) ent_freq_maps_eu_us[k] = dizionario ent_freq_maps_eu = dict() ent_freq_maps_us = dict() ent_freq_maps_eu_us = dict() read_top_ent_maps() def read_type_filtered_triples(): for t in top_type_filtered_eu: df = pd.read_csv(dna_folder+'/filtered_rows/eu/'+t.replace(':','_')+'.tsv', sep=" ", header=0) df.drop(columns=['Unnamed: 0'], inplace=True) top_type_filtered_triples_eu[t]=df for t in top_type_filtered_us: df = pd.read_csv(dna_folder+'/filtered_rows/us/'+t.replace(':','_')+'.tsv', sep=" ") df.drop(columns=['Unnamed: 0'], inplace=True) top_type_filtered_triples_us[t]=df for t in top_type_filtered_eu_us: df = pd.read_csv(dna_folder+'/filtered_rows/eu_us/'+t.replace(':','_')+'.tsv', sep=" ") df.drop(columns=['Unnamed: 0'], inplace=True) top_type_filtered_triples_eu_us[t]=df top_type_filtered_triples_eu = dict() top_type_filtered_triples_us = dict() top_type_filtered_triples_eu_us = dict() read_type_filtered_triples() grouping_filtered = pd.read_csv(os.path.join(dna_folder, 'dna_relations.tsv'), sep=" ") ################################# CREATE CHARTS ############################ def create_curve_chart(): # Create the 3 line plots curve_eu = hv.Curve((dh_ration_df_eu.index, dh_ration_df_eu.ids/dna_articles_unfiltered_eu_time_indexed_resampled.ids), 'Time', 'Digital Health News Ratio',label='EU') curve_us = hv.Curve((dh_ration_df_us.index, dh_ration_df_us.ids/dna_articles_unfiltered_us_time_indexed_resampled.ids),'Time', 'Digital Health News Ratio', label='US') curve_eu_us = hv.Curve((dh_ration_df_eu_us.index, dh_ration_df_eu_us.ids/dna_articles_unfiltered_eu_us_time_indexed_resampled.ids),'Time', 'Digital Health News Ratio', label='EU-US') #Overlay the line plots overlay = curve_eu * curve_us * curve_eu_us overlay.opts(show_legend = True, legend_position='top_left', width=1200, height=600) return overlay macro_topics = ["Energy-Efficient Building Design for Thermal Comfort and Sustainability","Indoor Air Quality and Energy Efficiency in Low-Energy Houses","Urban Planning and Development in China\'s Cities", "Design Thinking and Sustainable Product Development", "Smart Cities and Urban Computing", "Urban Resilience and Water Management","Renewable Energy Systems: Solar PV & Building Applications","Exploring the Intersection of Traditional Heritage and Modern Steel Architecture in Historical Buildings","Green Building Assessment and Design","Landscape Design, Planning, and Research: Integrating Cultural, Ecological, and Rural Perspectives", "Noise and Acoustic Design in Urban Development","Sustainable Building Materials: Wood & 3D Printing Innovations","BIM in AEC: Trends, Challenges, and Opportunities","Urban Food Systems: Community Development and Social Sustainability in Cities","Innovative Bridge Design and Construction: Trends and Case Studies", "Cavity Flow and Heat Transfer"] macro_topics_active_subset = ["Energy-Efficient Building Design for Thermal Comfort and Sustainability","Indoor Air Quality and Energy Efficiency in Low-Energy Houses","Smart Cities and Urban Computing", "Renewable Energy Systems: Solar PV & Building Applications","BIM in AEC: Trends, Challenges, and Opportunities"] def load_institute_network(topic, **kwargs): if topic=='Energy-Efficient Building Design for Thermal Comfort and Sustainability': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='Indoor Air Quality and Energy Efficiency in Low-Energy Houses': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='Smart Cities and Urban Computing': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='Renewable Energy Systems: Solar PV & Building Applications': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='BIM in AEC: Trends, Challenges, and Opportunities': html = """""" html_pane = pn.pane.HTML(html) return html_pane def load_country_network(topic, **kwargs): if topic=='Energy-Efficient Building Design for Thermal Comfort and Sustainability': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='Indoor Air Quality and Energy Efficiency in Low-Energy Houses': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='Smart Cities and Urban Computing': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='Renewable Energy Systems: Solar PV & Building Applications': html = """""" html_pane = pn.pane.HTML(html) return html_pane elif topic=='BIM in AEC: Trends, Challenges, and Opportunities': html = """""" html_pane = pn.pane.HTML(html) return html_pane # Define a function to generate Curve based on selected values def generate_entity_curves(region_value, type_value, **kwargs): if region_value=='eu': top20Ents = ent_freq_maps_eu[type_value] curveList = [] for ent in top20Ents: entityTriples = top_type_filtered_triples_eu[type_value][(top_type_filtered_triples_eu[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_eu[type_value]['objEntityLinks']==ent[0])] entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False) del entityTriples_time_indexed['timestamp'] entityTriples_time_indexed_resampled = entityTriples_time_indexed.resample("Y").count() #print(entityTriples_time_indexed_resampled) entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_eu_time_indexed_resampled.index, fill_value=0) curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_eu_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0]) curve.opts(autorange='y') #curve.opts(logy=True) curveList.append(curve) overlay = hv.Overlay(curveList) overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800) return overlay elif region_value=='us': top20Ents = ent_freq_maps_us[type_value] curveList = [] for ent in top20Ents: entityTriples = top_type_filtered_triples_us[type_value][(top_type_filtered_triples_us[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_us[type_value]['objEntityLinks']==ent[0])] entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False) del entityTriples_time_indexed['timestamp'] entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_us_time_indexed_resampled.index, fill_value=0) curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_us_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0]) curve.opts(autorange='y') curveList.append(curve) overlay = hv.Overlay(curveList) overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800) return overlay elif region_value=='eu_us': top20Ents = ent_freq_maps_eu_us[type_value] curveList = [] for ent in top20Ents: entityTriples = top_type_filtered_triples_eu_us[type_value][(top_type_filtered_triples_eu_us[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_eu_us[type_value]['objEntityLinks']==ent[0])] entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False) del entityTriples_time_indexed['timestamp'] entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_eu_us_time_indexed_resampled.index, fill_value=0) curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_eu_us_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0]) curve.opts(autorange='y') curveList.append(curve) overlay = hv.Overlay(curveList) overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800) return overlay ############################# WIDGETS & CALLBACK ########################################### def filter_data0(df, min_value): filtered_df = df[df['value'] >= min_value] return filtered_df def plot_chord0_new(df,min_value): filtered_df = filter_data0(df, min_value) # Create a Holoviews Dataset for nodes nodes = hv.Dataset(filtered_df, 'index') nodes.data.head() chord = hv.Chord(filtered_df, ['source', 'target'], ['value']) return chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', label_text_color="white", node_color = hv.dim('index').str(), edge_color = hv.dim('source').str(), labels = 'index', tools=['hover'], width=800, height=800)) def retrieveRegionTypes(region): if region == 'eu': return top_type_filtered_eu elif region == 'us': return top_type_filtered_us elif region == 'eu_us': return top_type_filtered_eu_us def filter_region(region): if region == 'eu': region_grouping = grouping_filtered[grouping_filtered['region'] == 'eu'] elif region == 'us': region_grouping = grouping_filtered[grouping_filtered['region'] == 'us'] elif region == 'eu_us': region_grouping = grouping_filtered[grouping_filtered['region'] == 'eu_us'] #print(len(region_grouping)) # Define range for minimum value slider min_value_range = region_grouping['value'].unique() min_value_range.sort() # Define HoloMap with minimum value and attribute as key dimensions holomap = hv.HoloMap({min_value: plot_chord0_new(region_grouping, min_value) for min_value in min_value_range}, kdims=['Show triples with support greater than'] ) return holomap # Define a function to generate Entity List RadioButtonGroup based on Region selection def generate_radio_buttons(value): if value == 'eu': return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Company', name='eu', orientation='vertical') elif value == 'us': return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Disease', name='us', orientation='vertical') elif value == 'eu_us': return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Person', name='eu_us', orientation='vertical') # https://tabler-icons.io/ button0 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"}) button1 = pn.widgets.Button(name="AECO Macro Topics Hierarchy", button_type="warning", icon="file-info", styles={"width": "100%"}) button2 = pn.widgets.Button(name="AECO Macro Topics Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"}) button3 = pn.widgets.Button(name="Research Collaboration Networks: Institutes", button_type="warning", icon="chart-dots-3", styles={"width": "100%"}) button4 = pn.widgets.Button(name="Research Collaboration Networks: Countries", button_type="warning", icon="chart-dots-3", styles={"width": "100%"}) #button5 = pn.widgets.Button(name="Entity Chord Diagrams", button_type="warning", icon="chart-dots-filled", styles={"width": "100%"}) region1 = pn.widgets.RadioButtonGroup(name='### Select News Region', options=regions) macro_topics_button = pn.widgets.Select(name='Select Macro Topic', value='Energy-Efficient Building Design for Thermal Comfort and Sustainability', options=macro_topics_active_subset) # Initial RadioButtonGroup radio_buttons_regions = pn.widgets.RadioButtonGroup(options=regions,value='eu',name='Select region') # Generate initial dynamic RadioButtonGroup radio_buttons_types = generate_radio_buttons(radio_buttons_regions.value) # Define a callback function to update the panel dynamically def update_radio_group(event): #print(event.new) #print(retrieveRegionTypes(event.new)) radio_buttons_types.options = retrieveRegionTypes(event.new) # bind the function to the widget(s) dmap2 = hv.DynamicMap(pn.bind(generate_entity_curves, radio_buttons_regions,radio_buttons_types)) # Bind the selected value of the first RadioButtonGroup to update the second RadioButtonGroup radio_buttons_regions.param.watch(update_radio_group, 'value') # Define the callback function to update the HoloMap def update_holomap(event): initial_holomap.object = filter_region(event.new) region_radio_button = pn.widgets.RadioButtonGroup(options=regions, value='eu', name='Select Region') # Create the initial HoloMap initial_holomap = filter_region(region_radio_button.value) # Bind the callback function to the value change event of the RadioButton widget region_radio_button.param.watch(update_holomap, 'value') def show_page(page_key): main_area.clear() main_area.append(mapping[page_key]) button0.on_click(lambda event: show_page("Page0")) button1.on_click(lambda event: show_page("Page1")) button2.on_click(lambda event: show_page("Page2")) button3.on_click(lambda event: show_page("Page3")) button4.on_click(lambda event: show_page("Page4")) #button5.on_click(lambda event: show_page("Page5")) #button6.on_click(lambda event: show_page("Page6")) ### CREATE PAGE LAYOUTS def CreatePage0(): return pn.Column(pn.pane.Markdown(""" This is a dashboard for a Research Analysis project regarding research and technology in the AECO domain. The source data consists of around 276k English-language research papers gathered from the openalex.org graph database, covering a timeframe from 2011 through 2024. --------------------------- ## AECO Macro Topics In the AECO Macro Topics panel we present the 6-month-sampled time series depicting the number of published research papers for the 16 macro-topics automatically detected by an optimized BerTopic model and ppst-processed for manual topic merging. ### Research Collaboration Networks: Institutes ### Research Collaboration Networks: Authors """, width=800), align="center") def CreatePage1(): # Load the HTML content from the local file #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file: # html_content = file.read() # Use an iframe to load the local HTML file iframe_html = f'' # Create an HTML pane to render the content html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both') return pn.Column(pn.pane.Markdown(" ## AECO Macro Topics Dendogram "), html_pane, align="center") def CreatePage2(): # Load the HTML content from the local file #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file: # html_content = file.read() # Use an iframe to load the local HTML file iframe_html = f'' # Create an HTML pane to render the content html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both') return pn.Column(pn.pane.Markdown(" ## AECO Macro Topics "), html_pane, align="center") def CreatePage3(): return pn.Column( macro_topics_button, pn.bind(load_institute_network, macro_topics_button), align="center", ) def CreatePage4(): return pn.Column( macro_topics_button, pn.bind(load_country_network, macro_topics_button), align="center", ) def CreatePage5(): return pn.Column( pn.pane.Markdown("## Entity Chord Diagrams "), pn.Row(region_radio_button, pn.bind(filter_region, region_radio_button)), align="center", ) def CreatePage6(): html = """""" html_pane = pn.pane.HTML(html) #url = 'https://app.vosviewer.com/?json=https%3A%2F%2Fdrive.google.com%2Fuc%3Fid%3D16q1oLQyEeMosAgeD9UkC9hSrpzAYX_-n' return html_pane #panel.show() #return pn.Column( # pn.pane.Markdown("## VOSViewer Network "), # pn.Row(panel) # ) mapping = { "Page0": CreatePage0(), "Page1": CreatePage1(), "Page2": CreatePage2(), "Page3": CreatePage3(), "Page4": CreatePage4(), #"Page5": CreatePage5(), #"Page6": CreatePage6() } #################### SIDEBAR LAYOUT ########################## sidebar = pn.Column(pn.pane.Markdown("## Pages"),button0,button1,button2,button3,button4, #button5, #button6, styles={"width": "100%", "padding": "15px"}) #################### MAIN AREA LAYOUT ########################## main_area = pn.Column(mapping["Page1"], styles={"width":"100%"}) ###################### APP LAYOUT ############################## template = pn.template.BootstrapTemplate( title=" AECO Tech Dashboard", sidebar=[sidebar], main=[main_area], header_background="black", #site="Charting the Landscape of AECO Research", theme=pn.template.DarkTheme, sidebar_width=330, ## Default is 330 busy_indicator=pn.indicators.BooleanStatus(value=True), ) ### DEPLOY APP # Serve the Panel app template.servable()