Gemma1B / app.py
Dagriffpatchfan's picture
Update app.py
b0099e4 verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import login
import os
my_hf_token = os.getenv("hf_token")
login(token=my_hf_token)
import gradio
print("Gradio version:", gradio.__version__)
# Load the model (only once)
llm = Llama.from_pretrained(
repo_id="google/gemma-3-1b-it-qat-q4_0-gguf",
filename="gemma-3-1b-it-q4_0.gguf",
n_ctx=500,
verbose=False # Mute llama.cpp logs
)
# Define the function that runs the model
def chat_with_gemma(user_input, temperature, top_p, frequency_penalty, presence_penalty):
full_prompt = f"{user_input}\nAnswer in no more than 150 words."
response = llm.create_chat_completion(
messages=[{"role": "user", "content": full_prompt}],
max_tokens=200,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty
)
return response["choices"][0]["message"]["content"].strip()
with gr.Blocks() as demo:
user_input = gr.Textbox(label="Enter your message to Gemma. If you want to use this space via api, duplicate it and look in the app.py file to see an easy (and secure) way to enable api :)")
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p (Nucleus Sampling)")
freq_penalty = gr.Slider(0.0, 2.0, value=0.4, step=0.1, label="Frequency Penalty")
pres_penalty = gr.Slider(0.0, 2.0, value=0.2, step=0.1, label="Presence Penalty")
output = gr.Textbox(label="Gemma's Response", lines=8)
submit_button = gr.Button("Submit")
submit_button.click(
chat_with_gemma,
inputs=[user_input, temperature, top_p, freq_penalty, pres_penalty],
outputs=output,
api_name=False # <---- This disables API for this endpoint
)
demo.launch(share=True)
#demo.launch(auth=("username", "password"))
#enable the above, remove the current demo.launch settings, and set api to true right above to enable api useage, but enable a password and username to prevent someone form using your api. Currently set to default username 'username' and default password 'password'.