Spaces:
Running
Running
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import login | |
import contextlib | |
import io | |
with contextlib.redirect_stdout(io.StringIO()): | |
login(token=hf_token) | |
# Load the model (only once) | |
llm = Llama.from_pretrained( | |
repo_id="google/gemma-3-1b-it-qat-q4_0-gguf", | |
filename="gemma-3-1b-it-q4_0.gguf", | |
n_ctx=32768, | |
verbose=False # Mute llama.cpp logs | |
) | |
# Define the function that runs the model | |
def chat_with_gemma(user_input, temperature, top_p, frequency_penalty, presence_penalty): | |
full_prompt = f"{user_input}\nAnswer in no more than 150 words." | |
response = llm.create_chat_completion( | |
messages=[{"role": "user", "content": full_prompt}], | |
max_tokens=200, | |
temperature=temperature, | |
top_p=top_p, | |
frequency_penalty=frequency_penalty, | |
presence_penalty=presence_penalty | |
) | |
return response["choices"][0]["message"]["content"].strip() | |
# Set up the Gradio interface | |
demo = gr.Interface( | |
fn=chat_with_gemma, | |
inputs=[ | |
gr.Textbox(label="Enter your message to Gemma"), | |
gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"), | |
gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p (Nucleus Sampling)"), | |
gr.Slider(0.0, 2.0, value=0.4, step=0.1, label="Frequency Penalty"), | |
gr.Slider(0.0, 2.0, value=0.2, step=0.1, label="Presence Penalty") | |
], | |
outputs=gr.Textbox(label="Gemma's Response", lines=8), | |
title="Talk to Gemma", | |
description="Generate short responses using Google's Gemma model with adjustable settings. If you want to use this space via api, duplicate it, then look at the end of the app.py file to find a commented out line that you can renable to turn on api useage and protect it with a username and password so trolls don't ruin your api :)" | |
) | |
# Launch the app | |
demo.launch(share=True, enable_api=False) | |
#demo.launch(auth=("username", "password")) | |
#enable the above and remove the current demo.launch settings to enable api useage, but enable a password and username to prevent someone form using your api. Currently set to default username 'username' and default password 'password'. |