Spaces:
Running
Running
File size: 2,188 Bytes
89aa92f 145e38d 5f26a0b 71b8478 382a945 145e38d b0099e4 145e38d 89aa92f 145e38d 89aa92f 145e38d 89aa92f 145e38d 8aaf228 bf2a6ba 8aaf228 145e38d bf2a6ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import login
import os
my_hf_token = os.getenv("hf_token")
login(token=my_hf_token)
import gradio
print("Gradio version:", gradio.__version__)
# Load the model (only once)
llm = Llama.from_pretrained(
repo_id="google/gemma-3-1b-it-qat-q4_0-gguf",
filename="gemma-3-1b-it-q4_0.gguf",
n_ctx=500,
verbose=False # Mute llama.cpp logs
)
# Define the function that runs the model
def chat_with_gemma(user_input, temperature, top_p, frequency_penalty, presence_penalty):
full_prompt = f"{user_input}\nAnswer in no more than 150 words."
response = llm.create_chat_completion(
messages=[{"role": "user", "content": full_prompt}],
max_tokens=200,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty
)
return response["choices"][0]["message"]["content"].strip()
with gr.Blocks() as demo:
user_input = gr.Textbox(label="Enter your message to Gemma. If you want to use this space via api, duplicate it and look in the app.py file to see an easy (and secure) way to enable api :)")
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p (Nucleus Sampling)")
freq_penalty = gr.Slider(0.0, 2.0, value=0.4, step=0.1, label="Frequency Penalty")
pres_penalty = gr.Slider(0.0, 2.0, value=0.2, step=0.1, label="Presence Penalty")
output = gr.Textbox(label="Gemma's Response", lines=8)
submit_button = gr.Button("Submit")
submit_button.click(
chat_with_gemma,
inputs=[user_input, temperature, top_p, freq_penalty, pres_penalty],
outputs=output,
api_name=False # <---- This disables API for this endpoint
)
demo.launch(share=True)
#demo.launch(auth=("username", "password"))
#enable the above, remove the current demo.launch settings, and set api to true right above to enable api useage, but enable a password and username to prevent someone form using your api. Currently set to default username 'username' and default password 'password'. |