Spaces:

Fas1
/

capybara_fas_ai

Runtime error

App Files Files Community

Fas1 commited on 21 days ago

Commit

903b686

verified ·

1 Parent(s): 63c5e51

old

Browse files

Files changed (1) hide show

app.py +9 -82

app.py CHANGED Viewed

@@ -8,110 +8,35 @@ hf_token = os.getenv("HF_TOKEN", None)
 model_path = "./capybara-finetuned"  # или HF-репозиторий, например: "NousResearch/Nous-Capybara-3B-V1.9"
 # Загружаем модель и токенизатор
 tokenizer = AutoTokenizer.from_pretrained(
     model_path,
     token=hf_token,
     trust_remote_code=True,
     use_fast=True,
 )
-# Ensure essential special tokens exist; add if missing and remember to resize embeddings
-_added_specials = False
-if tokenizer.eos_token is None:
-    tokenizer.add_special_tokens({"eos_token": "</s>"})
-    _added_specials = True
-if tokenizer.pad_token is None:
-    # Prefer a distinct PAD token; do not alias to eos to avoid None ids
-    tokenizer.add_special_tokens({"pad_token": "<pad>"})
-    _added_specials = True
-# Choose safe dtype depending on device
-if torch.cuda.is_available():
-    try:
-        bf16_ok = torch.cuda.is_bf16_supported()
-    except Exception:
-        bf16_ok = False
-    _dtype = torch.bfloat16 if bf16_ok else torch.float16
-else:
-    _dtype = torch.float32
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     token=hf_token,
-    torch_dtype=_dtype,
-    device_map="cpu",          # force full CPU materialization
     trust_remote_code=True,
-    low_cpu_mem_usage=False,    # avoid meta tensors and lazy init on CPU
 )
-# Make generation config consistent with tokenizer and resize embeddings if we added tokens
-model.config.pad_token_id = tokenizer.pad_token_id
-model.config.eos_token_id = tokenizer.eos_token_id
-if '_added_specials' in globals() and _added_specials:
-    try:
-        model.resize_token_embeddings(len(tokenizer))
-    except Exception as _resize_err:
-        print("[warn] resize_token_embeddings failed:", _resize_err)
-# Set model to eval mode
-model.eval()
 os.makedirs("offload", exist_ok=True)
-# Optional warm-up to catch config/runtime issues early
-try:
-    _ = AutoTokenizer
-    # Minimal no-op generation; will use defaults
-    # We keep it extremely small to avoid heavy compute
-    # and we swallow errors to not crash the app
-    pass
-except Exception as _warm_err:
-    print("[warmup] warning:", _warm_err)
 # Создаём пайплайн
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-)
-# Функция классификации (ручной вызов generate, явная передача тензоров)
 def classify(text):
-    if not text or not str(text).strip():
-        return "⚠️ Пустой ввод. Введите сообщение."
     prompt = f"### Вопрос:\n{text}\n\n### Класс:"
     try:
-        enc = tokenizer(
-            prompt,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=min(2048, getattr(tokenizer, "model_max_length", 2048) or 2048),
-        )
-        input_ids = enc["input_ids"]
-        attention_mask = enc.get("attention_mask")
-        if attention_mask is None:
-            attention_mask = torch.ones_like(input_ids)
-        gen_kwargs = dict(
-            max_new_tokens=16,
-            do_sample=False,
-            pad_token_id=tokenizer.pad_token_id,
-            eos_token_id=tokenizer.eos_token_id,
-            use_cache=True,
-        )
-        with torch.no_grad():
-            out = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs)
-        gen_only = out[:, input_ids.shape[1]:]
-        generated = tokenizer.decode(gen_only[0], skip_special_tokens=True)
-        label = (generated.strip().split()[0].lower() if generated.strip() else "unknown")
         return f"🔍 Класс: **{label}**"
     except Exception as e:
-        import traceback
-        tb = traceback.format_exc(limit=5)
-        return f"❌ Ошибка: {str(e)}\n\n<details><summary>trace</summary>\n\n{tb}\n\n</details>"
 # Интерфейс Gradio
 iface = gr.Interface(
@@ -120,6 +45,8 @@ iface = gr.Interface(
     outputs="markdown",
     title="Capybara Text Classifier 🦫",
     description="Классификация текста как 'запрос' или 'реклама' с помощью Capybara-3B",
 )
 app, local_url, share_url = iface.launch(share=True, ssr_mode=False)

 model_path = "./capybara-finetuned"  # или HF-репозиторий, например: "NousResearch/Nous-Capybara-3B-V1.9"
 # Загружаем модель и токенизатор
 tokenizer = AutoTokenizer.from_pretrained(
     model_path,
     token=hf_token,
     trust_remote_code=True,
     use_fast=True,
 )
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     token=hf_token,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto",
     trust_remote_code=True,
 )
 os.makedirs("offload", exist_ok=True)
 # Создаём пайплайн
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+# Функция классификации
 def classify(text):
     prompt = f"### Вопрос:\n{text}\n\n### Класс:"
     try:
+        result = pipe(prompt, max_new_tokens=10, do_sample=False)[0]["generated_text"]
+        label = result.split("### Класс:")[-1].strip().split()[0].lower()
         return f"🔍 Класс: **{label}**"
     except Exception as e:
+        return f"❌ Ошибка: {str(e)}"
 # Интерфейс Gradio
 iface = gr.Interface(
     outputs="markdown",
     title="Capybara Text Classifier 🦫",
     description="Классификация текста как 'запрос' или 'реклама' с помощью Capybara-3B",
+    # enable_api=True,           # Разрешаем вызывать данный Interface извне
+    # api_name="/classify"       # Название эндпоинта (путь для client.predict)
 )
 app, local_url, share_url = iface.launch(share=True, ssr_mode=False)