doctorecord / src /config /config_manager.py
levalencia's picture
feat: enhance app.py with session state management and extraction strategy selection
2d87de0
raw
history blame
7.48 kB
"""Configuration manager for file type configurations."""
import yaml
import os
from pathlib import Path
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger(__name__)
class ConfigManager:
"""Manages file type configurations."""
def __init__(self, config_file: str = "file_types.yaml"):
self.config_file = Path(__file__).parent / config_file
self.configs = self._load_configs()
def _load_configs(self) -> Dict[str, Any]:
"""Load configurations from YAML file."""
try:
if self.config_file.exists():
with open(self.config_file, 'r', encoding='utf-8') as f:
configs = yaml.safe_load(f) or {}
logger.info(f"Loaded {len(configs)} configurations from {self.config_file}")
return configs
else:
logger.warning(f"Configuration file {self.config_file} not found, creating default")
return self._create_default_config()
except Exception as e:
logger.error(f"Error loading configurations: {e}")
return self._create_default_config()
def _create_default_config(self) -> Dict[str, Any]:
"""Create default configuration if file doesn't exist."""
default_config = {
"default_type": {
"name": "Default Type",
"description": "Default configuration for biotech documents",
"fields": "Chain, Percentage, Seq Loc",
"field_descriptions": {
"Chain": {
"description": "Refers to either the heavy chain (HC) or light chain (LC) of an antibody or protein construct, each analyzed separately for structural integrity and chemical modifications.",
"format": "String",
"examples": "Heavy",
"possible_values": "Heavy, Light"
},
"Percentage": {
"description": "The relative abundance of a specific modification or peptide, typically quantified using extracted ion chromatograms (EICs) and expressed as a percentage of the total signal.",
"format": "Float",
"examples": "90.0",
"possible_values": ""
},
"Seq Loc": {
"description": "The specific amino acid position(s) within the protein sequence where a peptide or modification is located, often denoted by residue numbers and chain type (e.g., HC(88–125)).",
"format": "String",
"examples": "HC(1-31)",
"possible_values": ""
}
},
"unique_indices_descriptions": {
"Protein Lot": {
"description": "Protein lots are batches of protein constructs analyzed to detect potential liabilities affecting stability, efficacy, and safety. Key liabilities include clipping events, deamidation, cyclization, oxidation, thioether bond formation, and glycation. Analytical methods such as reduced protein analysis by RPLC-UV-MS and peptide map analysis in reducing conditions are used to identify and quantify these modifications.",
"format": "String",
"examples": "P066_FH0.7-0-hulgG-LALAPG-FJB",
"possible_values": ""
},
"Peptide": {
"description": "A fragment of the protein sequence, typically derived from enzymatic digestion, used to detect and localize specific modifications or features.",
"format": "String",
"examples": "QVQLQQSGPGLVQPSQSLSITCTVSDFSLAR",
"possible_values": ""
},
"Timepoint": {
"description": "A designated sampling moment in a stability or stress study, used to track changes in the protein over time under specific conditions.",
"format": "String",
"examples": "0w",
"possible_values": "0w, 2w, 4w, 6w"
},
"Modification": {
"description": "Any chemical or structural alteration to the protein or peptide, such as deamidation, oxidation, clipping, or glycation, which may affect function or stability.",
"format": "String",
"examples": "deamidation",
"possible_values": "Deamidation, Oxidation, Truncation, pyroE, Isomerization, N-glycosylation, NonConforming, pyroQ, Thioether, Clipping, O-glycosylation, Double deamidation"
}
}
}
}
# Save the default configuration
self._save_configs(default_config)
return default_config
def _save_configs(self, configs: Dict[str, Any]) -> None:
"""Save configurations to YAML file."""
try:
with open(self.config_file, 'w', encoding='utf-8') as f:
yaml.dump(configs, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
logger.info(f"Saved {len(configs)} configurations to {self.config_file}")
except Exception as e:
logger.error(f"Error saving configurations: {e}")
def get_config_names(self) -> List[str]:
"""Get list of available configuration names."""
return list(self.configs.keys())
def get_config(self, config_name: str) -> Optional[Dict[str, Any]]:
"""Get a specific configuration by name."""
return self.configs.get(config_name)
def save_config(self, config_name: str, config_data: Dict[str, Any]) -> bool:
"""Save a new configuration."""
try:
# Validate required fields
required_fields = ['name', 'fields', 'field_descriptions', 'unique_indices_descriptions']
for field in required_fields:
if field not in config_data:
logger.error(f"Missing required field: {field}")
return False
# Save the configuration
self.configs[config_name] = config_data
self._save_configs(self.configs)
logger.info(f"Saved configuration: {config_name}")
return True
except Exception as e:
logger.error(f"Error saving configuration {config_name}: {e}")
return False
def delete_config(self, config_name: str) -> bool:
"""Delete a configuration."""
try:
if config_name in self.configs:
del self.configs[config_name]
self._save_configs(self.configs)
logger.info(f"Deleted configuration: {config_name}")
return True
else:
logger.warning(f"Configuration {config_name} not found")
return False
except Exception as e:
logger.error(f"Error deleting configuration {config_name}: {e}")
return False
def reload_configs(self) -> None:
"""Reload configurations from file."""
self.configs = self._load_configs()
# Global instance
config_manager = ConfigManager()