Qwen-2-llamacpp

Sleeping

TobDeBer commited on Aug 16, 2024

Commit

a0e3a54

1 Parent(s): 672d17d

local models

Files changed (1) hide show

app_local.py CHANGED Viewed

@@ -10,38 +10,6 @@ from llama_cpp_agent.providers import LlamaCppPythonProvider
 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
-from huggingface_hub import hf_hub_download
-huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
-#hf_hub_download(
-#    repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
-#    filename="qwen2-0_5b-instruct-q4_k_m.gguf",
-#    local_dir="./models"
-#)
-#hf_hub_download(
-#    repo_id="TobDeBer/gpt2-Q4_K_M-GGUF",
-#    filename="gpt2-q4_k_m.gguf",
-#    local_dir="./models"
-#)
-#hf_hub_download(
-#    repo_id="TobDeBer/Meta-Llama-3.1-8B-Instruct-Q4_K_M-GGUF",
-#    filename="meta-llama-3.1-8b-instruct-q4_k_m.gguf",
-#    local_dir="./models",
-#    token=huggingface_token
-#)
-# 5GB
-# RichardErkhov/ibm-granite_-_granite-7b-base-gguf
-# granite-7b-base.Q4_K_M.gguf
-# 4GB
-# TobDeBer/granite-8b-code-instruct-128k-Q4_K_M-GGUF
-# granite-8b-code-instruct-128k-q4_k_m.gguf
-# 5GB
 llm = None
 llm_model = None
@@ -128,8 +96,10 @@ model_selection = gr.Interface(
     inputs=[
         gr.Dropdown([
             'qwen2-0_5b-instruct-q4_k_m.gguf',
-            'gpt2-q4_k_m.gguf',
-            'meta-llama-3.1-8b-instruct-q4_k_m.gguf',
         ],
         value="qwen2-0_5b-instruct-q4_k_m.gguf",
         label="Model"

 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 llm = None
 llm_model = None
     inputs=[
         gr.Dropdown([
             'qwen2-0_5b-instruct-q4_k_m.gguf',
+            'mistrallite.Q4_K_M.gguf',
+            'zephyr_Q3_K.gguf',
+            'zephyr_Q3_K_M.gguf',
+            'zephyr_int8.gguf',
         ],
         value="qwen2-0_5b-instruct-q4_k_m.gguf",
         label="Model"