File size: 2,188 Bytes
89aa92f
145e38d
5f26a0b
71b8478
 
 
382a945
 
145e38d
 
 
 
 
b0099e4
145e38d
 
89aa92f
145e38d
 
 
89aa92f
145e38d
 
 
89aa92f
 
145e38d
 
 
 
 
 
8aaf228
bf2a6ba
8aaf228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145e38d
bf2a6ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import login
import os
my_hf_token = os.getenv("hf_token")
login(token=my_hf_token)
import gradio
print("Gradio version:", gradio.__version__)

# Load the model (only once)
llm = Llama.from_pretrained(
    repo_id="google/gemma-3-1b-it-qat-q4_0-gguf",
    filename="gemma-3-1b-it-q4_0.gguf",
    n_ctx=500,
    verbose=False  # Mute llama.cpp logs
)

# Define the function that runs the model
def chat_with_gemma(user_input, temperature, top_p, frequency_penalty, presence_penalty):
    full_prompt = f"{user_input}\nAnswer in no more than 150 words."

    response = llm.create_chat_completion(
        messages=[{"role": "user", "content": full_prompt}],
        max_tokens=200,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        presence_penalty=presence_penalty
    )

    return response["choices"][0]["message"]["content"].strip()

with gr.Blocks() as demo:
    user_input = gr.Textbox(label="Enter your message to Gemma. If you want to use this space via api, duplicate it and look in the app.py file to see an easy (and secure) way to enable api :)")
    temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
    top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p (Nucleus Sampling)")
    freq_penalty = gr.Slider(0.0, 2.0, value=0.4, step=0.1, label="Frequency Penalty")
    pres_penalty = gr.Slider(0.0, 2.0, value=0.2, step=0.1, label="Presence Penalty")
    output = gr.Textbox(label="Gemma's Response", lines=8)

    submit_button = gr.Button("Submit")

    submit_button.click(
        chat_with_gemma,
        inputs=[user_input, temperature, top_p, freq_penalty, pres_penalty],
        outputs=output,
        api_name=False  # <---- This disables API for this endpoint
    )

demo.launch(share=True)

#demo.launch(auth=("username", "password"))
#enable the above, remove the current demo.launch settings, and set api to true right above to enable api useage, but enable a password and username to prevent someone form using your api. Currently set to default username 'username' and default password 'password'.