File size: 4,124 Bytes
abbab7a
404886a
 
b605fd6
404886a
b605fd6
 
 
 
abbab7a
b605fd6
 
abbab7a
404886a
b605fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404886a
 
 
 
 
 
 
 
b605fd6
 
 
 
 
 
6161aaf
 
 
 
 
 
 
 
b605fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404886a
 
 
 
b605fd6
404886a
b605fd6
 
404886a
b605fd6
404886a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import gradio as gr
from huggingface_hub import InferenceClient
from huggingface_hub.utils import HfHubHTTPError

# Modelo Mistral Instruct disponível no Hub
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"

# token vindo do secret HF_TOKEN do Space (ou env local)
token = os.environ.get("HF_TOKEN")

# Cliente (se token for None, o client tenta usar config local)
client = InferenceClient(model=MODEL_ID, token=token)

def _extract_text_from_response(resp):
    """
    Tenta extrair texto de várias possíveis formas de retorno da API.
    Retorna string sempre.
    """
    # string direta
    if isinstance(resp, str):
        return resp

    # dataclass-like (possível)
    try:
        # alguns SDKs retornam objeto com atributo 'generated_text' ou 'text'
        if hasattr(resp, "generated_text"):
            return getattr(resp, "generated_text") or ""
        if hasattr(resp, "text"):
            return getattr(resp, "text") or ""
    except Exception:
        pass

    # dict-like formas comuns
    if isinstance(resp, dict):
        # chaves óbvias
        for key in ("generated_text", "generated_texts", "text", "output_text", "result"):
            if key in resp:
                v = resp[key]
                if isinstance(v, list) and v:
                    return v[0] if isinstance(v[0], str) else str(v[0])
                if isinstance(v, str):
                    return v

        # choices -> message -> content (formato chat-like)
        if "choices" in resp and isinstance(resp["choices"], list) and resp["choices"]:
            first = resp["choices"][0]
            if isinstance(first, dict):
                # try message.content
                if "message" in first and isinstance(first["message"], dict) and "content" in first["message"]:
                    maybe = first["message"]["content"]
                    if isinstance(maybe, str):
                        return maybe
                # try text or content directly
                for k in ("text", "content", "generated_text"):
                    if k in first and isinstance(first[k], str):
                        return first[k]

    # fallback
    try:
        return str(resp)
    except Exception:
        return "<unable to decode response>"

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # valida token
    if not token:
        yield "ERRO: variável de ambiente HF_TOKEN não encontrada. Adicione um secret HF_TOKEN no Settings do Space."
        return

    # monta prompt estilo chat (simples)
    prompt = f"{system_message}\n\n"
    for user_msg, bot_msg in history:
        if user_msg:
            prompt += f"User: {user_msg}\n"
        if bot_msg:
            prompt += f"Assistant: {bot_msg}\n"
    prompt += f"User: {message}\nAssistant:"

    try:
        # chamada sem streaming (resposta completa)
        out = client.text_generation(
            prompt,
            max_new_tokens=int(max_tokens),
            temperature=float(temperature),
            top_p=float(top_p),
            do_sample=True,
        )
    except HfHubHTTPError as e:
        # captura erros HTTP da Hugging Face e retorna mensagem legível
        yield f"ERRO na chamada de inferência: {e}\n(verifique HF_TOKEN, permissões e se o modelo está disponível via Inference API)"
        return
    except Exception as e:
        yield f"Erro inesperado ao chamar a API: {e}"
        return

    # extrai texto (robusto a vários formatos de retorno)
    text = _extract_text_from_response(out)
    yield text


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a helpful assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
        gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    title="Chat com Mistral-7B",
)

if __name__ == "__main__":
    demo.launch()