1NEYRON1's picture
Update app.py
0855d6f
raw
history blame
7.84 kB
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
# Инициализация состояния сессии
if 'show_all' not in st.session_state:
st.session_state.show_all = False
if 'results' not in st.session_state:
st.session_state.results = []
id_to_cat = {0: 'Performance',
1: 'Molecular Networks',
2: 'Operating Systems',
3: 'High Energy Astrophysical Phenomena',
4: 'Computational Finance',
5: 'General Finance',
6: 'Astrophysics of Galaxies',
7: 'Portfolio Management',
8: 'Functional Analysis',
9: 'Quantitative Methods',
10: 'Mathematical Software',
11: 'Computation',
12: 'Chemical Physics',
13: 'Information Theory',
14: 'Classical Physics',
15: 'Subcellular Processes',
16: 'Medical Physics',
17: 'Differential Geometry',
18: 'Biomolecules',
19: 'Metric Geometry',
20: 'Cryptography and Security',
21: 'Instrumentation and Methods for Astrophysics',
22: 'General Mathematics',
23: 'Computational Complexity',
24: 'Soft Condensed Matter',
25: 'Analysis of PDEs',
26: 'Human-Computer Interaction',
27: 'Classical Analysis and ODEs',
28: 'Genomics',
29: 'Optimization and Control',
30: 'Applied Physics',
31: 'Computational Engineering, Finance, and Science',
32: 'Quantum Algebra',
33: 'Other Condensed Matter',
34: 'Category Theory',
35: 'Popular Physics',
36: 'General Topology',
37: 'Algebraic Topology',
38: 'Trading and Market Microstructure',
39: 'Numerical Analysis',
40: 'Applications',
41: 'Group Theory',
42: 'Cosmology and Nongalactic Astrophysics',
43: 'Mathematical Physics',
44: 'Econometrics',
45: 'Systems and Control',
46: 'Graphics',
47: 'Data Structures and Algorithms',
48: 'Operator Algebras',
49: 'Number Theory',
50: 'Robotics',
51: 'Nuclear Theory',
52: 'Neural and Evolutionary Computing',
53: 'Multimedia',
54: 'Information Retrieval',
55: 'Image and Video Processing',
56: 'Rings and Algebras',
57: 'Instrumentation and Detectors',
58: 'Social and Information Networks',
59: 'High Energy Physics - Lattice',
60: 'Emerging Technologies',
61: 'Strongly Correlated Electrons',
62: 'Representation Theory',
63: 'Space Physics',
64: 'Risk Management',
65: 'Disordered Systems and Neural Networks',
66: 'Databases',
67: 'Networking and Internet Architecture',
68: 'Computers and Society',
69: 'Hardware Architecture',
70: 'Chaotic Dynamics',
71: 'Mesoscale and Nanoscale Physics',
72: 'Computational Geometry',
73: 'Commutative Algebra',
74: 'Statistics Theory',
75: 'General Literature',
76: 'Physics and Society',
77: 'Geophysics',
78: 'Economics',
79: 'Quantum Physics',
80: 'Symbolic Computation',
81: 'Computational Physics',
82: 'Sound',
83: 'Multiagent Systems',
84: 'Signal Processing',
85: 'Adaptation and Self-Organizing Systems',
86: 'Other Computer Science',
87: 'Other Quantitative Biology',
88: 'Formal Languages and Automata Theory',
89: 'Populations and Evolution',
90: 'Spectral Theory',
91: 'Pattern Formation and Solitons',
92: 'Methodology',
93: 'Biological Physics',
94: 'General Physics',
95: 'Logic in Computer Science',
96: 'Complex Variables',
97: 'Optics',
98: 'Discrete Mathematics',
99: 'History and Overview',
100: 'Programming Languages',
101: 'Audio and Speech Processing',
102: 'Algebraic Geometry',
103: 'Neurons and Cognition',
104: 'High Energy Physics - Phenomenology',
105: 'History and Philosophy of Physics',
106: 'Earth and Planetary Astrophysics',
107: 'Pricing of Securities',
108: 'Distributed, Parallel, and Cluster Computing',
109: 'Tissues and Organs',
110: 'Cellular Automata and Lattice Gases',
111: 'Statistical Finance',
112: 'Materials Science',
113: 'High Energy Physics - Theory',
114: 'Digital Libraries',
115: 'Other Statistics',
116: 'Superconductivity',
117: 'Cell Behavior',
118: 'General Relativity and Quantum Cosmology',
119: 'Dynamical Systems',
120: 'Statistical Mechanics',
121: 'Fluid Dynamics',
122: 'Computer Science and Game Theory',
123: 'Logic',
124: 'Computer Vision and Pattern Recognition',
125: 'Solar and Stellar Astrophysics',
126: 'High Energy Physics - Experiment',
127: 'Software Engineering',
128: 'Combinatorics',
129: 'Data Analysis, Statistics and Probability',
130: 'Machine Learning',
131: 'Probability',
132: 'Atmospheric and Oceanic Physics',
133: 'Geometric Topology',
134: 'Computation and Language',
135: 'Quantum Gases',
136: 'Nuclear Experiment',
137: 'Artificial Intelligence'}
id_to_cat = {0: 'Performance', ...} # Ваш полный словарь категорий
# Загружаем модель
model_name = 'checkpoint'
try:
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-cased')
model = AutoModelForSequenceClassification.from_pretrained(
model_name,
num_labels=len(id_to_cat),
problem_type="multi_label_classification"
)
except OSError as e:
st.error(f"Ошибка загрузки модели: {e}. Убедитесь, что модель доступна или укажите другую.")
st.stop()
def classify_text(title, description):
text = f"{title} {description}"
topic_classifier = pipeline("text-classification",
model=model,
tokenizer=tokenizer,
top_k=len(id_to_cat))
try:
results = topic_classifier(text)
processed = []
for item in results[0]:
label_id = int(item['label'].split('_')[1])
processed.append((id_to_cat[label_id], item['score']))
return sorted(processed, key=lambda x: x[1], reverse=True)
except Exception as e:
st.error(f"Ошибка классификации: {e}")
return []
# --- Интерфейс Streamlit ---
st.title("Классификация статей 1")
# Ввод данных
title = st.text_input("Заголовок статьи")
description = st.text_area("Краткое описание статьи", height=150)
# Кнопка классификации
if st.button("Классифицировать"):
if not title and not description:
st.warning("Пожалуйста, заполните хотя бы одно поле.")
else:
with st.spinner("Идет классификация..."):
st.session_state.results = classify_text(title, description)
st.session_state.show_all = False
# Отображение результатов
if st.session_state.results:
st.subheader("Результаты классификации:")
# Определение порога отображения
cumulative = 0
shown_results = []
for label, score in st.session_state.results:
if not st.session_state.show_all and cumulative < 0.95:
shown_results.append((label, score))
cumulative += score
else:
shown_results.append((label, score))
# Отображение результатов
for label, score in shown_results:
st.write(f"- **{label}**: {score:.4f}")
# Кнопка переключения режима отображения
if st.session_state.show_all:
if st.button("Скрыть подробности"):
st.session_state.show_all = False
st.experimental_rerun()
else:
if st.button("Показать все категории"):
st.session_state.show_all = True
st.experimental_rerun()
# Отображение предупреждения только после первой попытки
elif any([title, description]) and not st.session_state.results:
st.warning("Пожалуйста, нажмите кнопку 'Классифицировать'")