Spaces:
Running
Running
Add Space config for Hugging Face5
Browse files- README.md +9 -3
- app.py +28 -17
- requirements.txt +6 -7
README.md
CHANGED
@@ -1,9 +1,15 @@
|
|
1 |
---
|
2 |
-
title: Capybara Classifier
|
3 |
emoji: 🦫
|
4 |
-
colorFrom:
|
5 |
colorTo: blue
|
6 |
-
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Capybara Classifier
|
3 |
emoji: 🦫
|
4 |
+
colorFrom: gray
|
5 |
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
---
|
10 |
+
|
11 |
+
Capybara-3B модель для классификации текста как `запрос` или `реклама`.
|
12 |
+
|
13 |
+
Пример:
|
14 |
+
`Сколько стоит консультация?` → **запрос**
|
15 |
+
`Скидка 50% на айфон!` → **реклама**
|
app.py
CHANGED
@@ -1,37 +1,48 @@
|
|
1 |
-
# app.py
|
2 |
import os
|
|
|
|
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
4 |
-
from fastapi import FastAPI
|
5 |
-
from pydantic import BaseModel
|
6 |
|
7 |
-
|
8 |
-
|
|
|
9 |
|
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained(
|
11 |
model_path,
|
12 |
token=hf_token,
|
13 |
trust_remote_code=True,
|
14 |
-
use_fast=
|
15 |
)
|
16 |
|
17 |
model = AutoModelForCausalLM.from_pretrained(
|
18 |
model_path,
|
19 |
token=hf_token,
|
|
|
20 |
device_map="auto",
|
21 |
-
|
22 |
-
trust_remote_code=True
|
23 |
)
|
24 |
|
|
|
25 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
async def classify(input: Input):
|
34 |
-
prompt = f"### Вопрос:\n{input.text}\n\n### Класс:"
|
35 |
-
output = pipe(prompt, max_new_tokens=10, do_sample=False)[0]["generated_text"]
|
36 |
-
label = output.split("### Класс:")[-1].strip().split()[0].lower()
|
37 |
-
return {"label": label}
|
|
|
|
|
1 |
import os
|
2 |
+
import gradio as gr
|
3 |
+
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
|
|
|
5 |
|
6 |
+
# Токен, если модель приватная
|
7 |
+
hf_token = os.getenv("HF_TOKEN", None)
|
8 |
+
model_path = "./capybara-finetuned" # или HF-репозиторий, например: "NousResearch/Nous-Capybara-3B-V1.9"
|
9 |
|
10 |
+
# Загружаем модель и токенизатор
|
11 |
tokenizer = AutoTokenizer.from_pretrained(
|
12 |
model_path,
|
13 |
token=hf_token,
|
14 |
trust_remote_code=True,
|
15 |
+
use_fast=True,
|
16 |
)
|
17 |
|
18 |
model = AutoModelForCausalLM.from_pretrained(
|
19 |
model_path,
|
20 |
token=hf_token,
|
21 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
22 |
device_map="auto",
|
23 |
+
trust_remote_code=True,
|
|
|
24 |
)
|
25 |
|
26 |
+
# Создаём пайплайн
|
27 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
28 |
|
29 |
+
# Функция классификации
|
30 |
+
def classify(text):
|
31 |
+
prompt = f"### Вопрос:\n{text}\n\n### Класс:"
|
32 |
+
try:
|
33 |
+
result = pipe(prompt, max_new_tokens=10, do_sample=False)[0]["generated_text"]
|
34 |
+
label = result.split("### Класс:")[-1].strip().split()[0].lower()
|
35 |
+
return f"🔍 Класс: **{label}**"
|
36 |
+
except Exception as e:
|
37 |
+
return f"❌ Ошибка: {str(e)}"
|
38 |
|
39 |
+
# Интерфейс Gradio
|
40 |
+
iface = gr.Interface(
|
41 |
+
fn=classify,
|
42 |
+
inputs=gr.Textbox(lines=4, placeholder="Введите сообщение..."),
|
43 |
+
outputs="markdown",
|
44 |
+
title="Capybara Text Classifier 🦫",
|
45 |
+
description="Классификация текста как 'запрос' или 'реклама' с помощью Capybara-3B",
|
46 |
+
)
|
47 |
|
48 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
peft
|
5 |
-
|
6 |
-
|
7 |
-
fastapi
|
|
|
1 |
+
gradio
|
2 |
+
torch
|
3 |
+
transformers
|
4 |
+
peft
|
5 |
+
accelerate
|
6 |
+
bitsandbytes
|
|