Spaces:
Running
Running
Update streamlit_app.py
Browse files- streamlit_app.py +546 -10
streamlit_app.py
CHANGED
@@ -82,7 +82,7 @@ class AttentionResultsExplorer:
|
|
82 |
st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
|
83 |
|
84 |
# Download from GitHub
|
85 |
-
config_url = f"https://raw.githubusercontent.com/{self.github_repo}/
|
86 |
response = self._make_github_request(config_url, "experiment configuration file")
|
87 |
|
88 |
if response is None:
|
@@ -207,8 +207,9 @@ class AttentionResultsExplorer:
|
|
207 |
|
208 |
def _ensure_specific_data_downloaded(self, language, config, model):
|
209 |
"""Download specific files for a language/config/model combination if not cached"""
|
|
|
210 |
base_path = f"results_{language}/{config}/{model}"
|
211 |
-
local_path = self.base_path / f"results_{language}" / config /
|
212 |
|
213 |
# Check if we already have this specific combination cached
|
214 |
if local_path.exists() and self.use_cache:
|
@@ -227,7 +228,8 @@ class AttentionResultsExplorer:
|
|
227 |
|
228 |
def _download_specific_model_data(self, language, config, model):
|
229 |
"""Download only the specific model data needed"""
|
230 |
-
|
|
|
231 |
|
232 |
# List of essential directories to download for a model
|
233 |
essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
|
@@ -251,7 +253,8 @@ class AttentionResultsExplorer:
|
|
251 |
contents = response.json()
|
252 |
|
253 |
# Create local directory
|
254 |
-
|
|
|
255 |
local_dir.mkdir(parents=True, exist_ok=True)
|
256 |
|
257 |
# Download all files in this directory
|
@@ -518,7 +521,8 @@ class AttentionResultsExplorer:
|
|
518 |
# Ensure we have the specific data downloaded
|
519 |
self._ensure_specific_data_downloaded(language, config, model)
|
520 |
|
521 |
-
|
|
|
522 |
if metadata_path.exists():
|
523 |
with open(metadata_path, 'r') as f:
|
524 |
return json.load(f)
|
@@ -529,7 +533,8 @@ class AttentionResultsExplorer:
|
|
529 |
# Ensure we have the specific data downloaded
|
530 |
self._ensure_specific_data_downloaded(language, config, model)
|
531 |
|
532 |
-
|
|
|
533 |
if not uas_dir.exists():
|
534 |
return {}
|
535 |
|
@@ -564,7 +569,8 @@ class AttentionResultsExplorer:
|
|
564 |
# Ensure we have the specific data downloaded
|
565 |
self._ensure_specific_data_downloaded(language, config, model)
|
566 |
|
567 |
-
|
|
|
568 |
if not heads_dir.exists():
|
569 |
return {}
|
570 |
|
@@ -577,7 +583,7 @@ class AttentionResultsExplorer:
|
|
577 |
status_text = st.empty()
|
578 |
|
579 |
for i, csv_file in enumerate(csv_files):
|
580 |
-
relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{
|
581 |
status_text.text(f"Loading head matching data: {relation}")
|
582 |
|
583 |
try:
|
@@ -599,7 +605,8 @@ class AttentionResultsExplorer:
|
|
599 |
# Ensure we have the specific data downloaded
|
600 |
self._ensure_specific_data_downloaded(language, config, model)
|
601 |
|
602 |
-
|
|
|
603 |
if var_path.exists():
|
604 |
try:
|
605 |
return pd.read_csv(var_path, index_col=0)
|
@@ -612,7 +619,536 @@ class AttentionResultsExplorer:
|
|
612 |
# Ensure we have the specific data downloaded
|
613 |
self._ensure_specific_data_downloaded(language, config, model)
|
614 |
|
615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
616 |
if not figures_dir.exists():
|
617 |
return []
|
618 |
return list(figures_dir.glob("*.pdf"))
|
|
|
82 |
st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
|
83 |
|
84 |
# Download from GitHub
|
85 |
+
config_url = f"https://raw.githubusercontent.com/{self.github_repo}/master/experiment_config.yaml"
|
86 |
response = self._make_github_request(config_url, "experiment configuration file")
|
87 |
|
88 |
if response is None:
|
|
|
207 |
|
208 |
def _ensure_specific_data_downloaded(self, language, config, model):
|
209 |
"""Download specific files for a language/config/model combination if not cached"""
|
210 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
211 |
base_path = f"results_{language}/{config}/{model}"
|
212 |
+
local_path = self.base_path / f"results_{language}" / config / folder_model_name
|
213 |
|
214 |
# Check if we already have this specific combination cached
|
215 |
if local_path.exists() and self.use_cache:
|
|
|
228 |
|
229 |
def _download_specific_model_data(self, language, config, model):
|
230 |
"""Download only the specific model data needed"""
|
231 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
232 |
+
base_remote_path = f"results_{language}/{config}/{folder_model_name}"
|
233 |
|
234 |
# List of essential directories to download for a model
|
235 |
essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
|
|
|
253 |
contents = response.json()
|
254 |
|
255 |
# Create local directory
|
256 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
257 |
+
local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
|
258 |
local_dir.mkdir(parents=True, exist_ok=True)
|
259 |
|
260 |
# Download all files in this directory
|
|
|
521 |
# Ensure we have the specific data downloaded
|
522 |
self._ensure_specific_data_downloaded(language, config, model)
|
523 |
|
524 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
525 |
+
metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
|
526 |
if metadata_path.exists():
|
527 |
with open(metadata_path, 'r') as f:
|
528 |
return json.load(f)
|
|
|
533 |
# Ensure we have the specific data downloaded
|
534 |
self._ensure_specific_data_downloaded(language, config, model)
|
535 |
|
536 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
537 |
+
uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
|
538 |
if not uas_dir.exists():
|
539 |
return {}
|
540 |
|
|
|
569 |
# Ensure we have the specific data downloaded
|
570 |
self._ensure_specific_data_downloaded(language, config, model)
|
571 |
|
572 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
573 |
+
heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
|
574 |
if not heads_dir.exists():
|
575 |
return {}
|
576 |
|
|
|
583 |
status_text = st.empty()
|
584 |
|
585 |
for i, csv_file in enumerate(csv_files):
|
586 |
+
relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
|
587 |
status_text.text(f"Loading head matching data: {relation}")
|
588 |
|
589 |
try:
|
|
|
605 |
# Ensure we have the specific data downloaded
|
606 |
self._ensure_specific_data_downloaded(language, config, model)
|
607 |
|
608 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
609 |
+
var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
|
610 |
if var_path.exists():
|
611 |
try:
|
612 |
return pd.read_csv(var_path, index_col=0)
|
|
|
619 |
# Ensure we have the specific data downloaded
|
620 |
self._ensure_specific_data_downloaded(language, config, model)
|
621 |
|
622 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
623 |
+
figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
|
624 |
+
if not figures_dir.exists():
|
625 |
+
return []
|
626 |
+
return list(figures_dir.glob("*.pdf"))
|
627 |
+
|
628 |
+
def _handle_rate_limit_error(self, response):
|
629 |
+
"""Handle GitHub API rate limit errors with detailed user feedback"""
|
630 |
+
if response.status_code in (403, 429):
|
631 |
+
# Check if it's a rate limit error
|
632 |
+
if 'rate limit' in response.text.lower() or 'api rate limit' in response.text.lower():
|
633 |
+
# Extract rate limit information from headers
|
634 |
+
remaining = response.headers.get('x-ratelimit-remaining', 'unknown')
|
635 |
+
reset_timestamp = response.headers.get('x-ratelimit-reset')
|
636 |
+
limit = response.headers.get('x-ratelimit-limit', 'unknown')
|
637 |
+
|
638 |
+
# Calculate reset time
|
639 |
+
reset_time_str = "unknown"
|
640 |
+
if reset_timestamp:
|
641 |
+
try:
|
642 |
+
reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
|
643 |
+
reset_time_str = reset_time.strftime("%Y-%m-%d %H:%M:%S UTC")
|
644 |
+
|
645 |
+
# Calculate time until reset
|
646 |
+
now = datetime.now(timezone.utc)
|
647 |
+
time_until_reset = reset_time - now
|
648 |
+
minutes_until_reset = int(time_until_reset.total_seconds() / 60)
|
649 |
+
|
650 |
+
if minutes_until_reset > 0:
|
651 |
+
reset_time_str += f" (in {minutes_until_reset} minutes)"
|
652 |
+
except (ValueError, TypeError):
|
653 |
+
pass
|
654 |
+
|
655 |
+
# Display comprehensive rate limit information
|
656 |
+
st.error("🚫 **GitHub API Rate Limit Exceeded**")
|
657 |
+
|
658 |
+
with st.expander("📊 Rate Limit Details", expanded=True):
|
659 |
+
col1, col2 = st.columns(2)
|
660 |
+
|
661 |
+
with col1:
|
662 |
+
st.metric("Requests Remaining", remaining)
|
663 |
+
st.metric("Rate Limit", limit)
|
664 |
+
|
665 |
+
with col2:
|
666 |
+
st.metric("Reset Time", reset_time_str)
|
667 |
+
if reset_timestamp:
|
668 |
+
try:
|
669 |
+
reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
|
670 |
+
now = datetime.now(timezone.utc)
|
671 |
+
time_until_reset = reset_time - now
|
672 |
+
if time_until_reset.total_seconds() > 0:
|
673 |
+
st.metric("Time Until Reset", f"{int(time_until_reset.total_seconds() / 60)} minutes")
|
674 |
+
except (ValueError, TypeError):
|
675 |
+
pass
|
676 |
+
|
677 |
+
return True # Indicates rate limit error was handled
|
678 |
+
|
679 |
+
return False # Not a rate limit error
|
680 |
+
|
681 |
+
def _make_github_request(self, url, description="GitHub API request", silent_404=False):
|
682 |
+
"""Make a GitHub API request with rate limit handling"""
|
683 |
+
try:
|
684 |
+
# Add GitHub token if available
|
685 |
+
headers = {}
|
686 |
+
github_token = os.environ.get('GITHUB_TOKEN')
|
687 |
+
if github_token:
|
688 |
+
headers['Authorization'] = f'token {github_token}'
|
689 |
+
|
690 |
+
response = requests.get(url, headers=headers)
|
691 |
+
|
692 |
+
# Check for rate limit before raising for status
|
693 |
+
if self._handle_rate_limit_error(response):
|
694 |
+
return None # Rate limit handled, return None
|
695 |
+
|
696 |
+
# Handle 404 errors silently if requested (for optional directories)
|
697 |
+
if response.status_code == 404 and silent_404:
|
698 |
+
return None
|
699 |
+
|
700 |
+
response.raise_for_status()
|
701 |
+
return response
|
702 |
+
|
703 |
+
except requests.exceptions.RequestException as e:
|
704 |
+
if hasattr(e, 'response') and e.response is not None:
|
705 |
+
# Handle 404 silently if requested
|
706 |
+
if e.response.status_code == 404 and silent_404:
|
707 |
+
return None
|
708 |
+
|
709 |
+
if not self._handle_rate_limit_error(e.response):
|
710 |
+
st.warning(f"Request failed for {description}: {str(e)}")
|
711 |
+
else:
|
712 |
+
st.warning(f"Network error for {description}: {str(e)}")
|
713 |
+
return None
|
714 |
+
|
715 |
+
def _model_name_to_folder_name(self, model_name):
|
716 |
+
"""Convert model name from config format to folder format
|
717 |
+
|
718 |
+
Examples:
|
719 |
+
- 'PlanTL-GOB-ES/roberta-base-ca' -> 'roberta-base-ca'
|
720 |
+
- 'microsoft/deberta-v3-base' -> 'deberta-v3-base'
|
721 |
+
- 'bert-base-uncased' -> 'bert-base-uncased' (no change)
|
722 |
+
"""
|
723 |
+
if '/' in model_name:
|
724 |
+
return model_name.split('/')[-1]
|
725 |
+
return model_name
|
726 |
+
|
727 |
+
def _get_available_languages_local(self):
|
728 |
+
"""Get available languages from local cache"""
|
729 |
+
if not self.base_path.exists():
|
730 |
+
return []
|
731 |
+
result_dirs = [d.name for d in self.base_path.iterdir()
|
732 |
+
if d.is_dir() and d.name.startswith("results_")]
|
733 |
+
languages = [d.replace("results_", "") for d in result_dirs]
|
734 |
+
return sorted(languages)
|
735 |
+
|
736 |
+
def _ensure_specific_data_downloaded(self, language, config, model):
|
737 |
+
"""Download specific files for a language/config/model combination if not cached"""
|
738 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
739 |
+
base_path = f"results_{language}/{config}/{model}"
|
740 |
+
local_path = self.base_path / f"results_{language}" / config / folder_model_name
|
741 |
+
|
742 |
+
# Check if we already have this specific combination cached
|
743 |
+
if local_path.exists() and self.use_cache:
|
744 |
+
# Quick check if essential files exist
|
745 |
+
metadata_path = local_path / "metadata" / "metadata.json"
|
746 |
+
if metadata_path.exists():
|
747 |
+
return # Already have the data
|
748 |
+
|
749 |
+
with st.spinner(f"📥 Downloading data for {language.upper()}/{config}/{model}..."):
|
750 |
+
try:
|
751 |
+
self._download_specific_model_data(language, config, model)
|
752 |
+
st.success(f"✅ Downloaded {language.upper()}/{model} data!")
|
753 |
+
except Exception as e:
|
754 |
+
st.error(f"❌ Failed to download specific data: {str(e)}")
|
755 |
+
raise
|
756 |
+
|
757 |
+
def _download_specific_model_data(self, language, config, model):
|
758 |
+
"""Download only the specific model data needed"""
|
759 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
760 |
+
base_remote_path = f"results_{language}/{config}/{folder_model_name}"
|
761 |
+
|
762 |
+
# List of essential directories to download for a model
|
763 |
+
essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
|
764 |
+
|
765 |
+
for dir_name in essential_dirs:
|
766 |
+
remote_path = f"{base_remote_path}/{dir_name}"
|
767 |
+
try:
|
768 |
+
self._download_directory_targeted(dir_name, remote_path, language, config, model)
|
769 |
+
except Exception as e:
|
770 |
+
st.warning(f"Could not download {dir_name} for {model}: {str(e)}")
|
771 |
+
|
772 |
+
def _download_directory_targeted(self, dir_name, remote_path, language, config, model):
|
773 |
+
"""Download a specific directory for a model"""
|
774 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/{remote_path}"
|
775 |
+
|
776 |
+
response = self._make_github_request(api_url, f"directory {dir_name}", silent_404=True)
|
777 |
+
if response is None:
|
778 |
+
return # Rate limit, 404, or other error
|
779 |
+
|
780 |
+
try:
|
781 |
+
contents = response.json()
|
782 |
+
|
783 |
+
# Create local directory
|
784 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
785 |
+
local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
|
786 |
+
local_dir.mkdir(parents=True, exist_ok=True)
|
787 |
+
|
788 |
+
# Download all files in this directory
|
789 |
+
for item in contents:
|
790 |
+
if item['type'] == 'file':
|
791 |
+
self._download_file(item, local_dir)
|
792 |
+
|
793 |
+
except Exception as e:
|
794 |
+
st.warning(f"Could not download directory {dir_name}: {str(e)}")
|
795 |
+
|
796 |
+
def _get_available_configs_from_github(self, language):
|
797 |
+
"""Get available configurations for a language from GitHub"""
|
798 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
|
799 |
+
|
800 |
+
response = self._make_github_request(api_url, f"configurations for {language}")
|
801 |
+
if response is None:
|
802 |
+
return []
|
803 |
+
|
804 |
+
try:
|
805 |
+
contents = response.json()
|
806 |
+
configs = [item['name'] for item in contents if item['type'] == 'dir']
|
807 |
+
return sorted(configs)
|
808 |
+
|
809 |
+
except Exception as e:
|
810 |
+
st.warning(f"Could not parse configurations for {language}: {str(e)}")
|
811 |
+
return []
|
812 |
+
|
813 |
+
def _discover_config_parameters(self, language=None):
|
814 |
+
"""Dynamically discover configuration parameters from available configs
|
815 |
+
|
816 |
+
Now uses the first language-model pair from experiment config to discover
|
817 |
+
valid configuration parameters, since configurations are consistent across
|
818 |
+
all language-model combinations.
|
819 |
+
"""
|
820 |
+
try:
|
821 |
+
# Get the first language-model pair from experiment config
|
822 |
+
if language is None:
|
823 |
+
language, model = self._get_first_language_model_pair()
|
824 |
+
if language is None or model is None:
|
825 |
+
st.warning("Could not find any language-model pairs in experiment config")
|
826 |
+
return {}
|
827 |
+
st.info(f"🔍 Discovering configurations using {language.upper()}/{model} (configurations are consistent across all languages and models)")
|
828 |
+
else:
|
829 |
+
# If language is specified, try to get first model for that language
|
830 |
+
models = self._get_models_for_language(language)
|
831 |
+
if not models:
|
832 |
+
st.warning(f"No models found for language {language}")
|
833 |
+
return {}
|
834 |
+
model = models[0]
|
835 |
+
|
836 |
+
available_configs = self._get_experimental_configs(language)
|
837 |
+
if not available_configs:
|
838 |
+
return {}
|
839 |
+
|
840 |
+
# Parse all configurations to extract unique parameters
|
841 |
+
all_params = set()
|
842 |
+
param_values = {}
|
843 |
+
|
844 |
+
for config in available_configs:
|
845 |
+
params = self._parse_config_params(config)
|
846 |
+
for param, value in params.items():
|
847 |
+
all_params.add(param)
|
848 |
+
if param not in param_values:
|
849 |
+
param_values[param] = set()
|
850 |
+
param_values[param].add(value)
|
851 |
+
|
852 |
+
# Convert sets to sorted lists for consistent UI
|
853 |
+
return {param: sorted(list(values)) for param, values in param_values.items()}
|
854 |
+
|
855 |
+
except Exception as e:
|
856 |
+
st.warning(f"Could not discover configuration parameters: {str(e)}")
|
857 |
+
return {}
|
858 |
+
|
859 |
+
def _build_config_from_params(self, param_dict):
|
860 |
+
"""Build configuration string from parameter dictionary"""
|
861 |
+
config_parts = []
|
862 |
+
for param, value in sorted(param_dict.items()):
|
863 |
+
config_parts.append(f"{param}_{value}")
|
864 |
+
return "+".join(config_parts)
|
865 |
+
|
866 |
+
def _find_best_matching_config(self, language, target_params):
|
867 |
+
"""Find the configuration that best matches the target parameters"""
|
868 |
+
available_configs = self._get_experimental_configs(language)
|
869 |
+
|
870 |
+
best_match = None
|
871 |
+
best_score = -1
|
872 |
+
|
873 |
+
for config in available_configs:
|
874 |
+
config_params = self._parse_config_params(config)
|
875 |
+
|
876 |
+
# Calculate match score
|
877 |
+
score = 0
|
878 |
+
total_params = len(target_params)
|
879 |
+
|
880 |
+
for param, target_value in target_params.items():
|
881 |
+
if param in config_params and config_params[param] == target_value:
|
882 |
+
score += 1
|
883 |
+
|
884 |
+
# Prefer configs with exact parameter count
|
885 |
+
if len(config_params) == total_params:
|
886 |
+
score += 0.5
|
887 |
+
|
888 |
+
if score > best_score:
|
889 |
+
best_score = score
|
890 |
+
best_match = config
|
891 |
+
|
892 |
+
return best_match, best_score == len(target_params)
|
893 |
+
|
894 |
+
def _download_repository(self):
|
895 |
+
"""Download repository data from GitHub"""
|
896 |
+
st.info("🔄 Downloading results data from GitHub... This may take a moment.")
|
897 |
+
|
898 |
+
# GitHub API to get the repository contents
|
899 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
|
900 |
+
|
901 |
+
try:
|
902 |
+
# Get list of result directories
|
903 |
+
response = requests.get(api_url)
|
904 |
+
response.raise_for_status()
|
905 |
+
contents = response.json()
|
906 |
+
|
907 |
+
result_dirs = [item['name'] for item in contents
|
908 |
+
if item['type'] == 'dir' and item['name'].startswith('results_')]
|
909 |
+
|
910 |
+
st.write(f"Found {len(result_dirs)} result directories: {', '.join(result_dirs)}")
|
911 |
+
|
912 |
+
# Download each result directory
|
913 |
+
progress_bar = st.progress(0)
|
914 |
+
for i, result_dir in enumerate(result_dirs):
|
915 |
+
st.write(f"Downloading {result_dir}...")
|
916 |
+
self._download_directory(result_dir)
|
917 |
+
progress_bar.progress((i + 1) / len(result_dirs))
|
918 |
+
|
919 |
+
st.success("✅ Download completed!")
|
920 |
+
|
921 |
+
except Exception as e:
|
922 |
+
st.error(f"❌ Error downloading repository: {str(e)}")
|
923 |
+
st.error("Please check the repository URL and your internet connection.")
|
924 |
+
raise
|
925 |
+
|
926 |
+
def _parse_config_params(self, config_name):
|
927 |
+
"""Parse configuration parameters into a dictionary"""
|
928 |
+
parts = config_name.split('+')
|
929 |
+
params = {}
|
930 |
+
for part in parts:
|
931 |
+
if '_' in part:
|
932 |
+
key_parts = part.split('_')
|
933 |
+
if len(key_parts) >= 2:
|
934 |
+
key = '_'.join(key_parts[:-1])
|
935 |
+
value = key_parts[-1]
|
936 |
+
params[key] = value == 'True'
|
937 |
+
return params
|
938 |
+
|
939 |
+
def _download_directory(self, dir_name, path=""):
|
940 |
+
"""Recursively download a directory from GitHub"""
|
941 |
+
url = f"https://api.github.com/repos/{self.github_repo}/contents/{path}{dir_name}"
|
942 |
+
|
943 |
+
try:
|
944 |
+
response = requests.get(url)
|
945 |
+
response.raise_for_status()
|
946 |
+
contents = response.json()
|
947 |
+
|
948 |
+
local_dir = self.cache_dir / path / dir_name
|
949 |
+
local_dir.mkdir(parents=True, exist_ok=True)
|
950 |
+
|
951 |
+
for item in contents:
|
952 |
+
if item['type'] == 'file':
|
953 |
+
self._download_file(item, local_dir)
|
954 |
+
elif item['type'] == 'dir':
|
955 |
+
self._download_directory(item['name'], f"{path}{dir_name}/")
|
956 |
+
|
957 |
+
except Exception as e:
|
958 |
+
st.warning(f"Could not download {dir_name}: {str(e)}")
|
959 |
+
|
960 |
+
def _download_file(self, file_info, local_dir):
|
961 |
+
"""Download a single file from GitHub"""
|
962 |
+
try:
|
963 |
+
# Use the rate limit handling for file downloads too
|
964 |
+
file_response = self._make_github_request(file_info['download_url'], f"file {file_info['name']}")
|
965 |
+
if file_response is None:
|
966 |
+
return # Rate limit or other error
|
967 |
+
|
968 |
+
# Save to local cache
|
969 |
+
local_file = local_dir / file_info['name']
|
970 |
+
|
971 |
+
# Handle different file types
|
972 |
+
if file_info['name'].endswith(('.csv', '.json')):
|
973 |
+
with open(local_file, 'w', encoding='utf-8') as f:
|
974 |
+
f.write(file_response.text)
|
975 |
+
else: # Binary files like PDFs
|
976 |
+
with open(local_file, 'wb') as f:
|
977 |
+
f.write(file_response.content)
|
978 |
+
|
979 |
+
except Exception as e:
|
980 |
+
st.warning(f"Could not download file {file_info['name']}: {str(e)}")
|
981 |
+
|
982 |
+
def _get_available_languages(self):
|
983 |
+
"""Get all available language directories"""
|
984 |
+
return self.available_languages
|
985 |
+
|
986 |
+
def _get_experimental_configs(self, language):
|
987 |
+
"""Get all experimental configurations for a language from GitHub API"""
|
988 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
|
989 |
+
response = self._make_github_request(api_url, f"experimental configs for {language}")
|
990 |
+
|
991 |
+
if response is not None:
|
992 |
+
try:
|
993 |
+
contents = response.json()
|
994 |
+
configs = [item['name'] for item in contents if item['type'] == 'dir']
|
995 |
+
return sorted(configs)
|
996 |
+
except Exception as e:
|
997 |
+
st.warning(f"Could not parse experimental configs for {language}: {str(e)}")
|
998 |
+
|
999 |
+
# Fallback to local cache if available
|
1000 |
+
lang_dir = self.base_path / f"results_{language}"
|
1001 |
+
if lang_dir.exists():
|
1002 |
+
configs = [d.name for d in lang_dir.iterdir() if d.is_dir()]
|
1003 |
+
return sorted(configs)
|
1004 |
+
return []
|
1005 |
+
|
1006 |
+
def _find_matching_config(self, language, target_params):
|
1007 |
+
"""Find the first matching configuration from target parameters"""
|
1008 |
+
return self._find_best_matching_config(language, target_params)
|
1009 |
+
|
1010 |
+
def _get_models(self, language, config):
|
1011 |
+
"""Get all models for a language and configuration from experiment config"""
|
1012 |
+
# First try to get models from experiment config
|
1013 |
+
models = self._get_models_for_language(language)
|
1014 |
+
|
1015 |
+
if models:
|
1016 |
+
return models
|
1017 |
+
|
1018 |
+
# Fallback to GitHub API directory listing if config unavailable
|
1019 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
|
1020 |
+
response = self._make_github_request(api_url, f"models for {language}/{config}")
|
1021 |
+
|
1022 |
+
if response is not None:
|
1023 |
+
try:
|
1024 |
+
contents = response.json()
|
1025 |
+
models = [item['name'] for item in contents if item['type'] == 'dir']
|
1026 |
+
return sorted(models)
|
1027 |
+
except Exception as e:
|
1028 |
+
st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
|
1029 |
+
|
1030 |
+
# Final fallback to local cache if available
|
1031 |
+
config_dir = self.base_path / f"results_{language}" / config
|
1032 |
+
if config_dir.exists():
|
1033 |
+
models = [d.name for d in config_dir.iterdir() if d.is_dir()]
|
1034 |
+
return sorted(models)
|
1035 |
+
return []
|
1036 |
+
|
1037 |
+
def _parse_config_name(self, config_name):
|
1038 |
+
"""Parse configuration name into readable format"""
|
1039 |
+
parts = config_name.split('+')
|
1040 |
+
config_dict = {}
|
1041 |
+
for part in parts:
|
1042 |
+
if '_' in part:
|
1043 |
+
key, value = part.split('_', 1)
|
1044 |
+
config_dict[key.replace('_', ' ').title()] = value
|
1045 |
+
return config_dict
|
1046 |
+
|
1047 |
+
def _load_metadata(self, language, config, model):
|
1048 |
+
"""Load metadata for a specific combination"""
|
1049 |
+
# Ensure we have the specific data downloaded
|
1050 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
1051 |
+
|
1052 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
1053 |
+
metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
|
1054 |
+
if metadata_path.exists():
|
1055 |
+
with open(metadata_path, 'r') as f:
|
1056 |
+
return json.load(f)
|
1057 |
+
return None
|
1058 |
+
|
1059 |
+
def _load_uas_scores(self, language, config, model):
|
1060 |
+
"""Load UAS scores data"""
|
1061 |
+
# Ensure we have the specific data downloaded
|
1062 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
1063 |
+
|
1064 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
1065 |
+
uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
|
1066 |
+
if not uas_dir.exists():
|
1067 |
+
return {}
|
1068 |
+
|
1069 |
+
uas_data = {}
|
1070 |
+
csv_files = list(uas_dir.glob("uas_*.csv"))
|
1071 |
+
|
1072 |
+
if csv_files:
|
1073 |
+
with st.spinner("Loading UAS scores data..."):
|
1074 |
+
progress_bar = st.progress(0)
|
1075 |
+
status_text = st.empty()
|
1076 |
+
|
1077 |
+
for i, csv_file in enumerate(csv_files):
|
1078 |
+
relation = csv_file.stem.replace("uas_", "")
|
1079 |
+
status_text.text(f"Loading UAS data: {relation}")
|
1080 |
+
|
1081 |
+
try:
|
1082 |
+
df = pd.read_csv(csv_file, index_col=0)
|
1083 |
+
uas_data[relation] = df
|
1084 |
+
except Exception as e:
|
1085 |
+
st.warning(f"Could not load {csv_file.name}: {e}")
|
1086 |
+
|
1087 |
+
progress_bar.progress((i + 1) / len(csv_files))
|
1088 |
+
time.sleep(0.01) # Small delay for smoother progress
|
1089 |
+
|
1090 |
+
progress_bar.empty()
|
1091 |
+
status_text.empty()
|
1092 |
+
|
1093 |
+
return uas_data
|
1094 |
+
|
1095 |
+
def _load_head_matching(self, language, config, model):
|
1096 |
+
"""Load head matching data"""
|
1097 |
+
# Ensure we have the specific data downloaded
|
1098 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
1099 |
+
|
1100 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
1101 |
+
heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
|
1102 |
+
if not heads_dir.exists():
|
1103 |
+
return {}
|
1104 |
+
|
1105 |
+
heads_data = {}
|
1106 |
+
csv_files = list(heads_dir.glob("heads_matching_*.csv"))
|
1107 |
+
|
1108 |
+
if csv_files:
|
1109 |
+
with st.spinner("Loading head matching data..."):
|
1110 |
+
progress_bar = st.progress(0)
|
1111 |
+
status_text = st.empty()
|
1112 |
+
|
1113 |
+
for i, csv_file in enumerate(csv_files):
|
1114 |
+
relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
|
1115 |
+
status_text.text(f"Loading head matching data: {relation}")
|
1116 |
+
|
1117 |
+
try:
|
1118 |
+
df = pd.read_csv(csv_file, index_col=0)
|
1119 |
+
heads_data[relation] = df
|
1120 |
+
except Exception as e:
|
1121 |
+
st.warning(f"Could not load {csv_file.name}: {e}")
|
1122 |
+
|
1123 |
+
progress_bar.progress((i + 1) / len(csv_files))
|
1124 |
+
time.sleep(0.01) # Small delay for smoother progress
|
1125 |
+
|
1126 |
+
progress_bar.empty()
|
1127 |
+
status_text.empty()
|
1128 |
+
|
1129 |
+
return heads_data
|
1130 |
+
|
1131 |
+
def _load_variability(self, language, config, model):
|
1132 |
+
"""Load variability data"""
|
1133 |
+
# Ensure we have the specific data downloaded
|
1134 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
1135 |
+
|
1136 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
1137 |
+
var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
|
1138 |
+
if var_path.exists():
|
1139 |
+
try:
|
1140 |
+
return pd.read_csv(var_path, index_col=0)
|
1141 |
+
except Exception as e:
|
1142 |
+
st.warning(f"Could not load variability data: {e}")
|
1143 |
+
return None
|
1144 |
+
|
1145 |
+
def _get_available_figures(self, language, config, model):
|
1146 |
+
"""Get all available figure files"""
|
1147 |
+
# Ensure we have the specific data downloaded
|
1148 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
1149 |
+
|
1150 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
1151 |
+
figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
|
1152 |
if not figures_dir.exists():
|
1153 |
return []
|
1154 |
return list(figures_dir.glob("*.pdf"))
|