GGUF_CPU_Test_bench

Sleeping

Dread2Poor commited on Apr 3

Commit

da838d5

verified ·

1 Parent(s): c6d1330

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ if not os.path.exists(MODEL_PATH):
 llm = Llama(
     model_path=MODEL_PATH,
-    n_ctx=4096, # increased context window.
     n_threads=2,
     n_threads_batch=2,
     verbose=False,
@@ -38,7 +38,8 @@ def generate_response(message, history, system_prompt, max_tokens, temperature,
         top_p=top_p,
         echo=False,
     )
-    return output["choices"][0]["text"].strip()
 with gr.Blocks() as iface:
     system_prompt_input = gr.Textbox(
@@ -50,7 +51,7 @@ with gr.Blocks() as iface:
         label="Max Tokens",
         minimum=32,
         maximum=4096,
-        value=128,
         step=32,
     )
     temperature_slider = gr.Slider(

 llm = Llama(
     model_path=MODEL_PATH,
+    n_ctx=4096,
     n_threads=2,
     n_threads_batch=2,
     verbose=False,
         top_p=top_p,
         echo=False,
     )
+    history.append({"role": "assistant", "content": output["choices"][0]["text"].strip()})
+    return history, history
 with gr.Blocks() as iface:
     system_prompt_input = gr.Textbox(
         label="Max Tokens",
         minimum=32,
         maximum=4096,
+        value=1024,
         step=32,
     )
     temperature_slider = gr.Slider(