1NEYRON1's picture
Update app.py
c4aa0f7
raw
history blame
8.05 kB
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
# Initialize session state
if 'show_all' not in st.session_state:
st.session_state.show_all = False
if 'results' not in st.session_state:
st.session_state.results = []
id_to_cat = {0: 'Performance',
1: 'Molecular Networks',
2: 'Operating Systems',
3: 'High Energy Astrophysical Phenomena',
4: 'Computational Finance',
5: 'General Finance',
6: 'Astrophysics of Galaxies',
7: 'Portfolio Management',
8: 'Functional Analysis',
9: 'Quantitative Methods',
10: 'Mathematical Software',
11: 'Computation',
12: 'Chemical Physics',
13: 'Information Theory',
14: 'Classical Physics',
15: 'Subcellular Processes',
16: 'Medical Physics',
17: 'Differential Geometry',
18: 'Biomolecules',
19: 'Metric Geometry',
20: 'Cryptography and Security',
21: 'Instrumentation and Methods for Astrophysics',
22: 'General Mathematics',
23: 'Computational Complexity',
24: 'Soft Condensed Matter',
25: 'Analysis of PDEs',
26: 'Human-Computer Interaction',
27: 'Classical Analysis and ODEs',
28: 'Genomics',
29: 'Optimization and Control',
30: 'Applied Physics',
31: 'Computational Engineering, Finance, and Science',
32: 'Quantum Algebra',
33: 'Other Condensed Matter',
34: 'Category Theory',
35: 'Popular Physics',
36: 'General Topology',
37: 'Algebraic Topology',
38: 'Trading and Market Microstructure',
39: 'Numerical Analysis',
40: 'Applications',
41: 'Group Theory',
42: 'Cosmology and Nongalactic Astrophysics',
43: 'Mathematical Physics',
44: 'Econometrics',
45: 'Systems and Control',
46: 'Graphics',
47: 'Data Structures and Algorithms',
48: 'Operator Algebras',
49: 'Number Theory',
50: 'Robotics',
51: 'Nuclear Theory',
52: 'Neural and Evolutionary Computing',
53: 'Multimedia',
54: 'Information Retrieval',
55: 'Image and Video Processing',
56: 'Rings and Algebras',
57: 'Instrumentation and Detectors',
58: 'Social and Information Networks',
59: 'High Energy Physics - Lattice',
60: 'Emerging Technologies',
61: 'Strongly Correlated Electrons',
62: 'Representation Theory',
63: 'Space Physics',
64: 'Risk Management',
65: 'Disordered Systems and Neural Networks',
66: 'Databases',
67: 'Networking and Internet Architecture',
68: 'Computers and Society',
69: 'Hardware Architecture',
70: 'Chaotic Dynamics',
71: 'Mesoscale and Nanoscale Physics',
72: 'Computational Geometry',
73: 'Commutative Algebra',
74: 'Statistics Theory',
75: 'General Literature',
76: 'Physics and Society',
77: 'Geophysics',
78: 'Economics',
79: 'Quantum Physics',
80: 'Symbolic Computation',
81: 'Computational Physics',
82: 'Sound',
83: 'Multiagent Systems',
84: 'Signal Processing',
85: 'Adaptation and Self-Organizing Systems',
86: 'Other Computer Science',
87: 'Other Quantitative Biology',
88: 'Formal Languages and Automata Theory',
89: 'Populations and Evolution',
90: 'Spectral Theory',
91: 'Pattern Formation and Solitons',
92: 'Methodology',
93: 'Biological Physics',
94: 'General Physics',
95: 'Logic in Computer Science',
96: 'Complex Variables',
97: 'Optics',
98: 'Discrete Mathematics',
99: 'History and Overview',
100: 'Programming Languages',
101: 'Audio and Speech Processing',
102: 'Algebraic Geometry',
103: 'Neurons and Cognition',
104: 'High Energy Physics - Phenomenology',
105: 'History and Philosophy of Physics',
106: 'Earth and Planetary Astrophysics',
107: 'Pricing of Securities',
108: 'Distributed, Parallel, and Cluster Computing',
109: 'Tissues and Organs',
110: 'Cellular Automata and Lattice Gases',
111: 'Statistical Finance',
112: 'Materials Science',
113: 'High Energy Physics - Theory',
114: 'Digital Libraries',
115: 'Other Statistics',
116: 'Superconductivity',
117: 'Cell Behavior',
118: 'General Relativity and Quantum Cosmology',
119: 'Dynamical Systems',
120: 'Statistical Mechanics',
121: 'Fluid Dynamics',
122: 'Computer Science and Game Theory',
123: 'Logic',
124: 'Computer Vision and Pattern Recognition',
125: 'Solar and Stellar Astrophysics',
126: 'High Energy Physics - Experiment',
127: 'Software Engineering',
128: 'Combinatorics',
129: 'Data Analysis, Statistics and Probability',
130: 'Machine Learning',
131: 'Probability',
132: 'Atmospheric and Oceanic Physics',
133: 'Geometric Topology',
134: 'Computation and Language',
135: 'Quantum Gases',
136: 'Nuclear Experiment',
137: 'Artificial Intelligence'}
# Загружаем модель (замените на вашу модель, если нужно)
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-cased')
model = AutoModelForSequenceClassification.from_pretrained(
'checkpoint',
num_labels=len(id_to_cat),
problem_type="multi_label_classification"
)
return model, tokenizer
# Load model/tokenizer once and cache it
try:
model, tokenizer = load_model()
except OSError as e:
st.error(f"Ошибка загрузки модели: {e}. Убедитесь, что модель доступна или укажите другую.")
st.stop()
def classify_text(title, description):
"""
Классифицирует текст и возвращает результаты в отсортированном виде.
Args:
title (str): Заголовок текста.
description (str): Краткое описание текста.
Returns:
list: Отсортированный список результатов классификации.
"""
text = f"{title} {description}" # Объединяем заголовок и описание
topic_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k = len(id_to_cat))
try:
results = topic_classifier(text)
except Exception as e:
st.error(f"Ошибка классификации: {e}")
return []
for i in results[0]:
i['label'] = id_to_cat[int(i['label'].split('_')[1])]
filtered_results = []
for i in results[0]:
filtered_results.append((i['label'], i['score']))
return filtered_results
# --- Интерфейс Streamlit ---
st.title("Классификация статей 1")
# Ввод данных
title = st.text_input("Заголовок статьи")
description = st.text_area("Краткое описание статьи", height=150)
# Кнопка "Классифицировать"
if st.button("Классифицировать"):
if not title and not description:
st.warning("Пожалуйста, заполните хотя бы одно поле.")
else:
with st.spinner("Идет классификация..."): # Индикатор загрузки
st.session_state.results = classify_text(title, description)
st.session_state.show_all = False # Reset to show only top 95%
# Display results if available
if st.session_state.results:
if st.session_state.show_all:
st.subheader("Полные результаты классификации:")
for label, score in st.session_state.results:
st.write(f"- **{label}**: {score:.4f}")
else:
st.subheader("Результаты классификации (top 95%):")
cumulative_prob = 0
for label, score in st.session_state.results:
st.write(f"- **{label}**: {score:.4f}")
cumulative_prob += score
if cumulative_prob >= 0.95:
break
# Renamed button that refreshes the page
if st.button("Покажи все"):
st.session_state.show_all = True
st.experimental_rerun() # Refresh the page
elif title or description: # небольшой костыль, чтобы при старте не было предупреждения
st.warning("Пожалуйста, нажмите кнопку 'Классифицировать'.")