from dataclasses import dataclass, field from typing import Optional from utils.snowflake_id import snowflake_id_str @dataclass class BaseBioDocument: """ 生物医学文档基础类 包含所有搜索类型共有的字段 """ bio_id: Optional[str] = field(default_factory=snowflake_id_str) title: Optional[str] = None text: Optional[str] = None source: Optional[str] = None source_id: Optional[str] = None @dataclass class PubMedDocument(BaseBioDocument): """ PubMed学术文献文档 包含学术文献特有的字段 """ abstract: Optional[str] = None authors: Optional[str] = None doi: Optional[str] = None journal: Optional[str] = None pub_date: Optional[str] = None if_score: Optional[float] = None url: Optional[str] = None def __post_init__(self): if self.source is None: self.source = "pubmed" @dataclass class PersonalDocument(BaseBioDocument): """ 个人向量搜索文档 包含个人文档特有的字段 """ if_score: Optional[float] = None doc_id: Optional[str] = None index: Optional[int] = 0 user_id: Optional[str] = None file_name: Optional[str] = None def __post_init__(self): if self.source is None: self.source = "personal_vector" @dataclass class WebDocument(BaseBioDocument): """ Web搜索文档 包含网页内容特有的字段 """ url: Optional[str] = None description: Optional[str] = None def __post_init__(self): if self.source is None: self.source = "web" # 为了保持向后兼容,保留原有的BioDocument类 @dataclass class BioDocument(BaseBioDocument): """ 生物医学文档(向后兼容) 包含所有可能的字段,但建议使用专门的文档类型 """ abstract: Optional[str] = None authors: Optional[str] = None doi: Optional[str] = None journal: Optional[str] = None pub_date: Optional[str] = None if_score: Optional[float] = None url: Optional[str] = None doc_id: Optional[str] = None # 工厂函数,根据source类型创建相应的文档对象 def create_bio_document(source: str, **kwargs) -> BaseBioDocument: """ 根据source类型创建相应的文档对象 Args: source: 文档来源类型 ("pubmed", "personal_vector", "web") **kwargs: 文档字段 Returns: 相应的文档对象 """ if source == "pubmed": return PubMedDocument(**kwargs) elif source == "personal_vector": return PersonalDocument(**kwargs) elif source == "web": return WebDocument(**kwargs) else: # 默认使用通用BioDocument return BioDocument(**kwargs)