acmc commited on
Commit
5479ffa
·
verified ·
1 Parent(s): d54e177

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +546 -10
streamlit_app.py CHANGED
@@ -82,7 +82,7 @@ class AttentionResultsExplorer:
82
  st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
83
 
84
  # Download from GitHub
85
- config_url = f"https://raw.githubusercontent.com/{self.github_repo}/refs/heads/master/experiment_config.yaml"
86
  response = self._make_github_request(config_url, "experiment configuration file")
87
 
88
  if response is None:
@@ -207,8 +207,9 @@ class AttentionResultsExplorer:
207
 
208
  def _ensure_specific_data_downloaded(self, language, config, model):
209
  """Download specific files for a language/config/model combination if not cached"""
 
210
  base_path = f"results_{language}/{config}/{model}"
211
- local_path = self.base_path / f"results_{language}" / config / model
212
 
213
  # Check if we already have this specific combination cached
214
  if local_path.exists() and self.use_cache:
@@ -227,7 +228,8 @@ class AttentionResultsExplorer:
227
 
228
  def _download_specific_model_data(self, language, config, model):
229
  """Download only the specific model data needed"""
230
- base_remote_path = f"results_{language}/{config}/{model}"
 
231
 
232
  # List of essential directories to download for a model
233
  essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
@@ -251,7 +253,8 @@ class AttentionResultsExplorer:
251
  contents = response.json()
252
 
253
  # Create local directory
254
- local_dir = self.base_path / f"results_{language}" / config / model / dir_name
 
255
  local_dir.mkdir(parents=True, exist_ok=True)
256
 
257
  # Download all files in this directory
@@ -518,7 +521,8 @@ class AttentionResultsExplorer:
518
  # Ensure we have the specific data downloaded
519
  self._ensure_specific_data_downloaded(language, config, model)
520
 
521
- metadata_path = self.base_path / f"results_{language}" / config / model / "metadata" / "metadata.json"
 
522
  if metadata_path.exists():
523
  with open(metadata_path, 'r') as f:
524
  return json.load(f)
@@ -529,7 +533,8 @@ class AttentionResultsExplorer:
529
  # Ensure we have the specific data downloaded
530
  self._ensure_specific_data_downloaded(language, config, model)
531
 
532
- uas_dir = self.base_path / f"results_{language}" / config / model / "uas_scores"
 
533
  if not uas_dir.exists():
534
  return {}
535
 
@@ -564,7 +569,8 @@ class AttentionResultsExplorer:
564
  # Ensure we have the specific data downloaded
565
  self._ensure_specific_data_downloaded(language, config, model)
566
 
567
- heads_dir = self.base_path / f"results_{language}" / config / model / "number_of_heads_matching"
 
568
  if not heads_dir.exists():
569
  return {}
570
 
@@ -577,7 +583,7 @@ class AttentionResultsExplorer:
577
  status_text = st.empty()
578
 
579
  for i, csv_file in enumerate(csv_files):
580
- relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{model}", "")
581
  status_text.text(f"Loading head matching data: {relation}")
582
 
583
  try:
@@ -599,7 +605,8 @@ class AttentionResultsExplorer:
599
  # Ensure we have the specific data downloaded
600
  self._ensure_specific_data_downloaded(language, config, model)
601
 
602
- var_path = self.base_path / f"results_{language}" / config / model / "variability" / "variability_list.csv"
 
603
  if var_path.exists():
604
  try:
605
  return pd.read_csv(var_path, index_col=0)
@@ -612,7 +619,536 @@ class AttentionResultsExplorer:
612
  # Ensure we have the specific data downloaded
613
  self._ensure_specific_data_downloaded(language, config, model)
614
 
615
- figures_dir = self.base_path / f"results_{language}" / config / model / "figures"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  if not figures_dir.exists():
617
  return []
618
  return list(figures_dir.glob("*.pdf"))
 
82
  st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
83
 
84
  # Download from GitHub
85
+ config_url = f"https://raw.githubusercontent.com/{self.github_repo}/master/experiment_config.yaml"
86
  response = self._make_github_request(config_url, "experiment configuration file")
87
 
88
  if response is None:
 
207
 
208
  def _ensure_specific_data_downloaded(self, language, config, model):
209
  """Download specific files for a language/config/model combination if not cached"""
210
+ folder_model_name = self._model_name_to_folder_name(model)
211
  base_path = f"results_{language}/{config}/{model}"
212
+ local_path = self.base_path / f"results_{language}" / config / folder_model_name
213
 
214
  # Check if we already have this specific combination cached
215
  if local_path.exists() and self.use_cache:
 
228
 
229
  def _download_specific_model_data(self, language, config, model):
230
  """Download only the specific model data needed"""
231
+ folder_model_name = self._model_name_to_folder_name(model)
232
+ base_remote_path = f"results_{language}/{config}/{folder_model_name}"
233
 
234
  # List of essential directories to download for a model
235
  essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
 
253
  contents = response.json()
254
 
255
  # Create local directory
256
+ folder_model_name = self._model_name_to_folder_name(model)
257
+ local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
258
  local_dir.mkdir(parents=True, exist_ok=True)
259
 
260
  # Download all files in this directory
 
521
  # Ensure we have the specific data downloaded
522
  self._ensure_specific_data_downloaded(language, config, model)
523
 
524
+ folder_model_name = self._model_name_to_folder_name(model)
525
+ metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
526
  if metadata_path.exists():
527
  with open(metadata_path, 'r') as f:
528
  return json.load(f)
 
533
  # Ensure we have the specific data downloaded
534
  self._ensure_specific_data_downloaded(language, config, model)
535
 
536
+ folder_model_name = self._model_name_to_folder_name(model)
537
+ uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
538
  if not uas_dir.exists():
539
  return {}
540
 
 
569
  # Ensure we have the specific data downloaded
570
  self._ensure_specific_data_downloaded(language, config, model)
571
 
572
+ folder_model_name = self._model_name_to_folder_name(model)
573
+ heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
574
  if not heads_dir.exists():
575
  return {}
576
 
 
583
  status_text = st.empty()
584
 
585
  for i, csv_file in enumerate(csv_files):
586
+ relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
587
  status_text.text(f"Loading head matching data: {relation}")
588
 
589
  try:
 
605
  # Ensure we have the specific data downloaded
606
  self._ensure_specific_data_downloaded(language, config, model)
607
 
608
+ folder_model_name = self._model_name_to_folder_name(model)
609
+ var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
610
  if var_path.exists():
611
  try:
612
  return pd.read_csv(var_path, index_col=0)
 
619
  # Ensure we have the specific data downloaded
620
  self._ensure_specific_data_downloaded(language, config, model)
621
 
622
+ folder_model_name = self._model_name_to_folder_name(model)
623
+ figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
624
+ if not figures_dir.exists():
625
+ return []
626
+ return list(figures_dir.glob("*.pdf"))
627
+
628
+ def _handle_rate_limit_error(self, response):
629
+ """Handle GitHub API rate limit errors with detailed user feedback"""
630
+ if response.status_code in (403, 429):
631
+ # Check if it's a rate limit error
632
+ if 'rate limit' in response.text.lower() or 'api rate limit' in response.text.lower():
633
+ # Extract rate limit information from headers
634
+ remaining = response.headers.get('x-ratelimit-remaining', 'unknown')
635
+ reset_timestamp = response.headers.get('x-ratelimit-reset')
636
+ limit = response.headers.get('x-ratelimit-limit', 'unknown')
637
+
638
+ # Calculate reset time
639
+ reset_time_str = "unknown"
640
+ if reset_timestamp:
641
+ try:
642
+ reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
643
+ reset_time_str = reset_time.strftime("%Y-%m-%d %H:%M:%S UTC")
644
+
645
+ # Calculate time until reset
646
+ now = datetime.now(timezone.utc)
647
+ time_until_reset = reset_time - now
648
+ minutes_until_reset = int(time_until_reset.total_seconds() / 60)
649
+
650
+ if minutes_until_reset > 0:
651
+ reset_time_str += f" (in {minutes_until_reset} minutes)"
652
+ except (ValueError, TypeError):
653
+ pass
654
+
655
+ # Display comprehensive rate limit information
656
+ st.error("🚫 **GitHub API Rate Limit Exceeded**")
657
+
658
+ with st.expander("📊 Rate Limit Details", expanded=True):
659
+ col1, col2 = st.columns(2)
660
+
661
+ with col1:
662
+ st.metric("Requests Remaining", remaining)
663
+ st.metric("Rate Limit", limit)
664
+
665
+ with col2:
666
+ st.metric("Reset Time", reset_time_str)
667
+ if reset_timestamp:
668
+ try:
669
+ reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
670
+ now = datetime.now(timezone.utc)
671
+ time_until_reset = reset_time - now
672
+ if time_until_reset.total_seconds() > 0:
673
+ st.metric("Time Until Reset", f"{int(time_until_reset.total_seconds() / 60)} minutes")
674
+ except (ValueError, TypeError):
675
+ pass
676
+
677
+ return True # Indicates rate limit error was handled
678
+
679
+ return False # Not a rate limit error
680
+
681
+ def _make_github_request(self, url, description="GitHub API request", silent_404=False):
682
+ """Make a GitHub API request with rate limit handling"""
683
+ try:
684
+ # Add GitHub token if available
685
+ headers = {}
686
+ github_token = os.environ.get('GITHUB_TOKEN')
687
+ if github_token:
688
+ headers['Authorization'] = f'token {github_token}'
689
+
690
+ response = requests.get(url, headers=headers)
691
+
692
+ # Check for rate limit before raising for status
693
+ if self._handle_rate_limit_error(response):
694
+ return None # Rate limit handled, return None
695
+
696
+ # Handle 404 errors silently if requested (for optional directories)
697
+ if response.status_code == 404 and silent_404:
698
+ return None
699
+
700
+ response.raise_for_status()
701
+ return response
702
+
703
+ except requests.exceptions.RequestException as e:
704
+ if hasattr(e, 'response') and e.response is not None:
705
+ # Handle 404 silently if requested
706
+ if e.response.status_code == 404 and silent_404:
707
+ return None
708
+
709
+ if not self._handle_rate_limit_error(e.response):
710
+ st.warning(f"Request failed for {description}: {str(e)}")
711
+ else:
712
+ st.warning(f"Network error for {description}: {str(e)}")
713
+ return None
714
+
715
+ def _model_name_to_folder_name(self, model_name):
716
+ """Convert model name from config format to folder format
717
+
718
+ Examples:
719
+ - 'PlanTL-GOB-ES/roberta-base-ca' -> 'roberta-base-ca'
720
+ - 'microsoft/deberta-v3-base' -> 'deberta-v3-base'
721
+ - 'bert-base-uncased' -> 'bert-base-uncased' (no change)
722
+ """
723
+ if '/' in model_name:
724
+ return model_name.split('/')[-1]
725
+ return model_name
726
+
727
+ def _get_available_languages_local(self):
728
+ """Get available languages from local cache"""
729
+ if not self.base_path.exists():
730
+ return []
731
+ result_dirs = [d.name for d in self.base_path.iterdir()
732
+ if d.is_dir() and d.name.startswith("results_")]
733
+ languages = [d.replace("results_", "") for d in result_dirs]
734
+ return sorted(languages)
735
+
736
+ def _ensure_specific_data_downloaded(self, language, config, model):
737
+ """Download specific files for a language/config/model combination if not cached"""
738
+ folder_model_name = self._model_name_to_folder_name(model)
739
+ base_path = f"results_{language}/{config}/{model}"
740
+ local_path = self.base_path / f"results_{language}" / config / folder_model_name
741
+
742
+ # Check if we already have this specific combination cached
743
+ if local_path.exists() and self.use_cache:
744
+ # Quick check if essential files exist
745
+ metadata_path = local_path / "metadata" / "metadata.json"
746
+ if metadata_path.exists():
747
+ return # Already have the data
748
+
749
+ with st.spinner(f"📥 Downloading data for {language.upper()}/{config}/{model}..."):
750
+ try:
751
+ self._download_specific_model_data(language, config, model)
752
+ st.success(f"✅ Downloaded {language.upper()}/{model} data!")
753
+ except Exception as e:
754
+ st.error(f"❌ Failed to download specific data: {str(e)}")
755
+ raise
756
+
757
+ def _download_specific_model_data(self, language, config, model):
758
+ """Download only the specific model data needed"""
759
+ folder_model_name = self._model_name_to_folder_name(model)
760
+ base_remote_path = f"results_{language}/{config}/{folder_model_name}"
761
+
762
+ # List of essential directories to download for a model
763
+ essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
764
+
765
+ for dir_name in essential_dirs:
766
+ remote_path = f"{base_remote_path}/{dir_name}"
767
+ try:
768
+ self._download_directory_targeted(dir_name, remote_path, language, config, model)
769
+ except Exception as e:
770
+ st.warning(f"Could not download {dir_name} for {model}: {str(e)}")
771
+
772
+ def _download_directory_targeted(self, dir_name, remote_path, language, config, model):
773
+ """Download a specific directory for a model"""
774
+ api_url = f"https://api.github.com/repos/{self.github_repo}/contents/{remote_path}"
775
+
776
+ response = self._make_github_request(api_url, f"directory {dir_name}", silent_404=True)
777
+ if response is None:
778
+ return # Rate limit, 404, or other error
779
+
780
+ try:
781
+ contents = response.json()
782
+
783
+ # Create local directory
784
+ folder_model_name = self._model_name_to_folder_name(model)
785
+ local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
786
+ local_dir.mkdir(parents=True, exist_ok=True)
787
+
788
+ # Download all files in this directory
789
+ for item in contents:
790
+ if item['type'] == 'file':
791
+ self._download_file(item, local_dir)
792
+
793
+ except Exception as e:
794
+ st.warning(f"Could not download directory {dir_name}: {str(e)}")
795
+
796
+ def _get_available_configs_from_github(self, language):
797
+ """Get available configurations for a language from GitHub"""
798
+ api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
799
+
800
+ response = self._make_github_request(api_url, f"configurations for {language}")
801
+ if response is None:
802
+ return []
803
+
804
+ try:
805
+ contents = response.json()
806
+ configs = [item['name'] for item in contents if item['type'] == 'dir']
807
+ return sorted(configs)
808
+
809
+ except Exception as e:
810
+ st.warning(f"Could not parse configurations for {language}: {str(e)}")
811
+ return []
812
+
813
+ def _discover_config_parameters(self, language=None):
814
+ """Dynamically discover configuration parameters from available configs
815
+
816
+ Now uses the first language-model pair from experiment config to discover
817
+ valid configuration parameters, since configurations are consistent across
818
+ all language-model combinations.
819
+ """
820
+ try:
821
+ # Get the first language-model pair from experiment config
822
+ if language is None:
823
+ language, model = self._get_first_language_model_pair()
824
+ if language is None or model is None:
825
+ st.warning("Could not find any language-model pairs in experiment config")
826
+ return {}
827
+ st.info(f"🔍 Discovering configurations using {language.upper()}/{model} (configurations are consistent across all languages and models)")
828
+ else:
829
+ # If language is specified, try to get first model for that language
830
+ models = self._get_models_for_language(language)
831
+ if not models:
832
+ st.warning(f"No models found for language {language}")
833
+ return {}
834
+ model = models[0]
835
+
836
+ available_configs = self._get_experimental_configs(language)
837
+ if not available_configs:
838
+ return {}
839
+
840
+ # Parse all configurations to extract unique parameters
841
+ all_params = set()
842
+ param_values = {}
843
+
844
+ for config in available_configs:
845
+ params = self._parse_config_params(config)
846
+ for param, value in params.items():
847
+ all_params.add(param)
848
+ if param not in param_values:
849
+ param_values[param] = set()
850
+ param_values[param].add(value)
851
+
852
+ # Convert sets to sorted lists for consistent UI
853
+ return {param: sorted(list(values)) for param, values in param_values.items()}
854
+
855
+ except Exception as e:
856
+ st.warning(f"Could not discover configuration parameters: {str(e)}")
857
+ return {}
858
+
859
+ def _build_config_from_params(self, param_dict):
860
+ """Build configuration string from parameter dictionary"""
861
+ config_parts = []
862
+ for param, value in sorted(param_dict.items()):
863
+ config_parts.append(f"{param}_{value}")
864
+ return "+".join(config_parts)
865
+
866
+ def _find_best_matching_config(self, language, target_params):
867
+ """Find the configuration that best matches the target parameters"""
868
+ available_configs = self._get_experimental_configs(language)
869
+
870
+ best_match = None
871
+ best_score = -1
872
+
873
+ for config in available_configs:
874
+ config_params = self._parse_config_params(config)
875
+
876
+ # Calculate match score
877
+ score = 0
878
+ total_params = len(target_params)
879
+
880
+ for param, target_value in target_params.items():
881
+ if param in config_params and config_params[param] == target_value:
882
+ score += 1
883
+
884
+ # Prefer configs with exact parameter count
885
+ if len(config_params) == total_params:
886
+ score += 0.5
887
+
888
+ if score > best_score:
889
+ best_score = score
890
+ best_match = config
891
+
892
+ return best_match, best_score == len(target_params)
893
+
894
+ def _download_repository(self):
895
+ """Download repository data from GitHub"""
896
+ st.info("🔄 Downloading results data from GitHub... This may take a moment.")
897
+
898
+ # GitHub API to get the repository contents
899
+ api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
900
+
901
+ try:
902
+ # Get list of result directories
903
+ response = requests.get(api_url)
904
+ response.raise_for_status()
905
+ contents = response.json()
906
+
907
+ result_dirs = [item['name'] for item in contents
908
+ if item['type'] == 'dir' and item['name'].startswith('results_')]
909
+
910
+ st.write(f"Found {len(result_dirs)} result directories: {', '.join(result_dirs)}")
911
+
912
+ # Download each result directory
913
+ progress_bar = st.progress(0)
914
+ for i, result_dir in enumerate(result_dirs):
915
+ st.write(f"Downloading {result_dir}...")
916
+ self._download_directory(result_dir)
917
+ progress_bar.progress((i + 1) / len(result_dirs))
918
+
919
+ st.success("✅ Download completed!")
920
+
921
+ except Exception as e:
922
+ st.error(f"❌ Error downloading repository: {str(e)}")
923
+ st.error("Please check the repository URL and your internet connection.")
924
+ raise
925
+
926
+ def _parse_config_params(self, config_name):
927
+ """Parse configuration parameters into a dictionary"""
928
+ parts = config_name.split('+')
929
+ params = {}
930
+ for part in parts:
931
+ if '_' in part:
932
+ key_parts = part.split('_')
933
+ if len(key_parts) >= 2:
934
+ key = '_'.join(key_parts[:-1])
935
+ value = key_parts[-1]
936
+ params[key] = value == 'True'
937
+ return params
938
+
939
+ def _download_directory(self, dir_name, path=""):
940
+ """Recursively download a directory from GitHub"""
941
+ url = f"https://api.github.com/repos/{self.github_repo}/contents/{path}{dir_name}"
942
+
943
+ try:
944
+ response = requests.get(url)
945
+ response.raise_for_status()
946
+ contents = response.json()
947
+
948
+ local_dir = self.cache_dir / path / dir_name
949
+ local_dir.mkdir(parents=True, exist_ok=True)
950
+
951
+ for item in contents:
952
+ if item['type'] == 'file':
953
+ self._download_file(item, local_dir)
954
+ elif item['type'] == 'dir':
955
+ self._download_directory(item['name'], f"{path}{dir_name}/")
956
+
957
+ except Exception as e:
958
+ st.warning(f"Could not download {dir_name}: {str(e)}")
959
+
960
+ def _download_file(self, file_info, local_dir):
961
+ """Download a single file from GitHub"""
962
+ try:
963
+ # Use the rate limit handling for file downloads too
964
+ file_response = self._make_github_request(file_info['download_url'], f"file {file_info['name']}")
965
+ if file_response is None:
966
+ return # Rate limit or other error
967
+
968
+ # Save to local cache
969
+ local_file = local_dir / file_info['name']
970
+
971
+ # Handle different file types
972
+ if file_info['name'].endswith(('.csv', '.json')):
973
+ with open(local_file, 'w', encoding='utf-8') as f:
974
+ f.write(file_response.text)
975
+ else: # Binary files like PDFs
976
+ with open(local_file, 'wb') as f:
977
+ f.write(file_response.content)
978
+
979
+ except Exception as e:
980
+ st.warning(f"Could not download file {file_info['name']}: {str(e)}")
981
+
982
+ def _get_available_languages(self):
983
+ """Get all available language directories"""
984
+ return self.available_languages
985
+
986
+ def _get_experimental_configs(self, language):
987
+ """Get all experimental configurations for a language from GitHub API"""
988
+ api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
989
+ response = self._make_github_request(api_url, f"experimental configs for {language}")
990
+
991
+ if response is not None:
992
+ try:
993
+ contents = response.json()
994
+ configs = [item['name'] for item in contents if item['type'] == 'dir']
995
+ return sorted(configs)
996
+ except Exception as e:
997
+ st.warning(f"Could not parse experimental configs for {language}: {str(e)}")
998
+
999
+ # Fallback to local cache if available
1000
+ lang_dir = self.base_path / f"results_{language}"
1001
+ if lang_dir.exists():
1002
+ configs = [d.name for d in lang_dir.iterdir() if d.is_dir()]
1003
+ return sorted(configs)
1004
+ return []
1005
+
1006
+ def _find_matching_config(self, language, target_params):
1007
+ """Find the first matching configuration from target parameters"""
1008
+ return self._find_best_matching_config(language, target_params)
1009
+
1010
+ def _get_models(self, language, config):
1011
+ """Get all models for a language and configuration from experiment config"""
1012
+ # First try to get models from experiment config
1013
+ models = self._get_models_for_language(language)
1014
+
1015
+ if models:
1016
+ return models
1017
+
1018
+ # Fallback to GitHub API directory listing if config unavailable
1019
+ api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
1020
+ response = self._make_github_request(api_url, f"models for {language}/{config}")
1021
+
1022
+ if response is not None:
1023
+ try:
1024
+ contents = response.json()
1025
+ models = [item['name'] for item in contents if item['type'] == 'dir']
1026
+ return sorted(models)
1027
+ except Exception as e:
1028
+ st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
1029
+
1030
+ # Final fallback to local cache if available
1031
+ config_dir = self.base_path / f"results_{language}" / config
1032
+ if config_dir.exists():
1033
+ models = [d.name for d in config_dir.iterdir() if d.is_dir()]
1034
+ return sorted(models)
1035
+ return []
1036
+
1037
+ def _parse_config_name(self, config_name):
1038
+ """Parse configuration name into readable format"""
1039
+ parts = config_name.split('+')
1040
+ config_dict = {}
1041
+ for part in parts:
1042
+ if '_' in part:
1043
+ key, value = part.split('_', 1)
1044
+ config_dict[key.replace('_', ' ').title()] = value
1045
+ return config_dict
1046
+
1047
+ def _load_metadata(self, language, config, model):
1048
+ """Load metadata for a specific combination"""
1049
+ # Ensure we have the specific data downloaded
1050
+ self._ensure_specific_data_downloaded(language, config, model)
1051
+
1052
+ folder_model_name = self._model_name_to_folder_name(model)
1053
+ metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
1054
+ if metadata_path.exists():
1055
+ with open(metadata_path, 'r') as f:
1056
+ return json.load(f)
1057
+ return None
1058
+
1059
+ def _load_uas_scores(self, language, config, model):
1060
+ """Load UAS scores data"""
1061
+ # Ensure we have the specific data downloaded
1062
+ self._ensure_specific_data_downloaded(language, config, model)
1063
+
1064
+ folder_model_name = self._model_name_to_folder_name(model)
1065
+ uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
1066
+ if not uas_dir.exists():
1067
+ return {}
1068
+
1069
+ uas_data = {}
1070
+ csv_files = list(uas_dir.glob("uas_*.csv"))
1071
+
1072
+ if csv_files:
1073
+ with st.spinner("Loading UAS scores data..."):
1074
+ progress_bar = st.progress(0)
1075
+ status_text = st.empty()
1076
+
1077
+ for i, csv_file in enumerate(csv_files):
1078
+ relation = csv_file.stem.replace("uas_", "")
1079
+ status_text.text(f"Loading UAS data: {relation}")
1080
+
1081
+ try:
1082
+ df = pd.read_csv(csv_file, index_col=0)
1083
+ uas_data[relation] = df
1084
+ except Exception as e:
1085
+ st.warning(f"Could not load {csv_file.name}: {e}")
1086
+
1087
+ progress_bar.progress((i + 1) / len(csv_files))
1088
+ time.sleep(0.01) # Small delay for smoother progress
1089
+
1090
+ progress_bar.empty()
1091
+ status_text.empty()
1092
+
1093
+ return uas_data
1094
+
1095
+ def _load_head_matching(self, language, config, model):
1096
+ """Load head matching data"""
1097
+ # Ensure we have the specific data downloaded
1098
+ self._ensure_specific_data_downloaded(language, config, model)
1099
+
1100
+ folder_model_name = self._model_name_to_folder_name(model)
1101
+ heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
1102
+ if not heads_dir.exists():
1103
+ return {}
1104
+
1105
+ heads_data = {}
1106
+ csv_files = list(heads_dir.glob("heads_matching_*.csv"))
1107
+
1108
+ if csv_files:
1109
+ with st.spinner("Loading head matching data..."):
1110
+ progress_bar = st.progress(0)
1111
+ status_text = st.empty()
1112
+
1113
+ for i, csv_file in enumerate(csv_files):
1114
+ relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
1115
+ status_text.text(f"Loading head matching data: {relation}")
1116
+
1117
+ try:
1118
+ df = pd.read_csv(csv_file, index_col=0)
1119
+ heads_data[relation] = df
1120
+ except Exception as e:
1121
+ st.warning(f"Could not load {csv_file.name}: {e}")
1122
+
1123
+ progress_bar.progress((i + 1) / len(csv_files))
1124
+ time.sleep(0.01) # Small delay for smoother progress
1125
+
1126
+ progress_bar.empty()
1127
+ status_text.empty()
1128
+
1129
+ return heads_data
1130
+
1131
+ def _load_variability(self, language, config, model):
1132
+ """Load variability data"""
1133
+ # Ensure we have the specific data downloaded
1134
+ self._ensure_specific_data_downloaded(language, config, model)
1135
+
1136
+ folder_model_name = self._model_name_to_folder_name(model)
1137
+ var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
1138
+ if var_path.exists():
1139
+ try:
1140
+ return pd.read_csv(var_path, index_col=0)
1141
+ except Exception as e:
1142
+ st.warning(f"Could not load variability data: {e}")
1143
+ return None
1144
+
1145
+ def _get_available_figures(self, language, config, model):
1146
+ """Get all available figure files"""
1147
+ # Ensure we have the specific data downloaded
1148
+ self._ensure_specific_data_downloaded(language, config, model)
1149
+
1150
+ folder_model_name = self._model_name_to_folder_name(model)
1151
+ figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
1152
  if not figures_dir.exists():
1153
  return []
1154
  return list(figures_dir.glob("*.pdf"))