import streamlit as st from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification # Инициализация состояния сессии if 'show_all' not in st.session_state: st.session_state.show_all = False if 'results' not in st.session_state: st.session_state.results = [] id_to_cat = {0: 'Performance', 1: 'Molecular Networks', 2: 'Operating Systems', 3: 'High Energy Astrophysical Phenomena', 4: 'Computational Finance', 5: 'General Finance', 6: 'Astrophysics of Galaxies', 7: 'Portfolio Management', 8: 'Functional Analysis', 9: 'Quantitative Methods', 10: 'Mathematical Software', 11: 'Computation', 12: 'Chemical Physics', 13: 'Information Theory', 14: 'Classical Physics', 15: 'Subcellular Processes', 16: 'Medical Physics', 17: 'Differential Geometry', 18: 'Biomolecules', 19: 'Metric Geometry', 20: 'Cryptography and Security', 21: 'Instrumentation and Methods for Astrophysics', 22: 'General Mathematics', 23: 'Computational Complexity', 24: 'Soft Condensed Matter', 25: 'Analysis of PDEs', 26: 'Human-Computer Interaction', 27: 'Classical Analysis and ODEs', 28: 'Genomics', 29: 'Optimization and Control', 30: 'Applied Physics', 31: 'Computational Engineering, Finance, and Science', 32: 'Quantum Algebra', 33: 'Other Condensed Matter', 34: 'Category Theory', 35: 'Popular Physics', 36: 'General Topology', 37: 'Algebraic Topology', 38: 'Trading and Market Microstructure', 39: 'Numerical Analysis', 40: 'Applications', 41: 'Group Theory', 42: 'Cosmology and Nongalactic Astrophysics', 43: 'Mathematical Physics', 44: 'Econometrics', 45: 'Systems and Control', 46: 'Graphics', 47: 'Data Structures and Algorithms', 48: 'Operator Algebras', 49: 'Number Theory', 50: 'Robotics', 51: 'Nuclear Theory', 52: 'Neural and Evolutionary Computing', 53: 'Multimedia', 54: 'Information Retrieval', 55: 'Image and Video Processing', 56: 'Rings and Algebras', 57: 'Instrumentation and Detectors', 58: 'Social and Information Networks', 59: 'High Energy Physics - Lattice', 60: 'Emerging Technologies', 61: 'Strongly Correlated Electrons', 62: 'Representation Theory', 63: 'Space Physics', 64: 'Risk Management', 65: 'Disordered Systems and Neural Networks', 66: 'Databases', 67: 'Networking and Internet Architecture', 68: 'Computers and Society', 69: 'Hardware Architecture', 70: 'Chaotic Dynamics', 71: 'Mesoscale and Nanoscale Physics', 72: 'Computational Geometry', 73: 'Commutative Algebra', 74: 'Statistics Theory', 75: 'General Literature', 76: 'Physics and Society', 77: 'Geophysics', 78: 'Economics', 79: 'Quantum Physics', 80: 'Symbolic Computation', 81: 'Computational Physics', 82: 'Sound', 83: 'Multiagent Systems', 84: 'Signal Processing', 85: 'Adaptation and Self-Organizing Systems', 86: 'Other Computer Science', 87: 'Other Quantitative Biology', 88: 'Formal Languages and Automata Theory', 89: 'Populations and Evolution', 90: 'Spectral Theory', 91: 'Pattern Formation and Solitons', 92: 'Methodology', 93: 'Biological Physics', 94: 'General Physics', 95: 'Logic in Computer Science', 96: 'Complex Variables', 97: 'Optics', 98: 'Discrete Mathematics', 99: 'History and Overview', 100: 'Programming Languages', 101: 'Audio and Speech Processing', 102: 'Algebraic Geometry', 103: 'Neurons and Cognition', 104: 'High Energy Physics - Phenomenology', 105: 'History and Philosophy of Physics', 106: 'Earth and Planetary Astrophysics', 107: 'Pricing of Securities', 108: 'Distributed, Parallel, and Cluster Computing', 109: 'Tissues and Organs', 110: 'Cellular Automata and Lattice Gases', 111: 'Statistical Finance', 112: 'Materials Science', 113: 'High Energy Physics - Theory', 114: 'Digital Libraries', 115: 'Other Statistics', 116: 'Superconductivity', 117: 'Cell Behavior', 118: 'General Relativity and Quantum Cosmology', 119: 'Dynamical Systems', 120: 'Statistical Mechanics', 121: 'Fluid Dynamics', 122: 'Computer Science and Game Theory', 123: 'Logic', 124: 'Computer Vision and Pattern Recognition', 125: 'Solar and Stellar Astrophysics', 126: 'High Energy Physics - Experiment', 127: 'Software Engineering', 128: 'Combinatorics', 129: 'Data Analysis, Statistics and Probability', 130: 'Machine Learning', 131: 'Probability', 132: 'Atmospheric and Oceanic Physics', 133: 'Geometric Topology', 134: 'Computation and Language', 135: 'Quantum Gases', 136: 'Nuclear Experiment', 137: 'Artificial Intelligence'} id_to_cat = {0: 'Performance', ...} # Ваш полный словарь категорий # Загружаем модель model_name = 'checkpoint' try: tokenizer = AutoTokenizer.from_pretrained('distilbert-base-cased') model = AutoModelForSequenceClassification.from_pretrained( model_name, num_labels=len(id_to_cat), problem_type="multi_label_classification" ) except OSError as e: st.error(f"Ошибка загрузки модели: {e}. Убедитесь, что модель доступна или укажите другую.") st.stop() def classify_text(title, description): text = f"{title} {description}" topic_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=len(id_to_cat)) try: results = topic_classifier(text) processed = [] for item in results[0]: label_id = int(item['label'].split('_')[1]) processed.append((id_to_cat[label_id], item['score'])) return sorted(processed, key=lambda x: x[1], reverse=True) except Exception as e: st.error(f"Ошибка классификации: {e}") return [] # --- Интерфейс Streamlit --- st.title("Классификация статей 1") # Ввод данных title = st.text_input("Заголовок статьи") description = st.text_area("Краткое описание статьи", height=150) # Кнопка классификации if st.button("Классифицировать"): if not title and not description: st.warning("Пожалуйста, заполните хотя бы одно поле.") else: with st.spinner("Идет классификация..."): st.session_state.results = classify_text(title, description) st.session_state.show_all = False # Отображение результатов if st.session_state.results: st.subheader("Результаты классификации:") # Определение порога отображения cumulative = 0 shown_results = [] for label, score in st.session_state.results: if not st.session_state.show_all and cumulative < 0.95: shown_results.append((label, score)) cumulative += score else: shown_results.append((label, score)) # Отображение результатов for label, score in shown_results: st.write(f"- **{label}**: {score:.4f}") # Кнопка переключения режима отображения if st.session_state.show_all: if st.button("Скрыть подробности"): st.session_state.show_all = False st.experimental_rerun() else: if st.button("Показать все категории"): st.session_state.show_all = True st.experimental_rerun() # Отображение предупреждения только после первой попытки elif any([title, description]) and not st.session_state.results: st.warning("Пожалуйста, нажмите кнопку 'Классифицировать'")