GGUF_CPU_Test_bench

Sleeping

App Files Files Community

DreadPoor commited on Apr 5

Commit

3605ca4

verified ·

1 Parent(s): 27be339

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -90

app.py CHANGED Viewed

@@ -6,111 +6,76 @@ import sys
 import time
 import requests
 from tqdm import tqdm  # For progress bars
-from typing import Optional, List, Dict
-MODEL_PATH = "./"  # Default model path
-llm = None  # Initialize llm outside the try block
-api = HfApi() #initialize
-def download_file(url: str, local_filename: str) -> bool:
     """Downloads a file from a URL with a progress bar."""
     try:
         with requests.get(url, stream=True) as r:
-            r.raise_for_status()  # Raise an exception for bad status codes
             total_length = int(r.headers.get("content-length"))
             with open(local_filename, "wb") as f:
                 with tqdm(total=total_length, unit="B", unit_scale=True, desc=local_filename) as pbar:
                     for chunk in r.iter_content(chunk_size=8192):
-                        if chunk:  # filter out keep-alive new chunks
                             f.write(chunk)
                             pbar.update(len(chunk))
-        return True  # Return True on success
     except Exception as e:
-        error_message = f"Error downloading {url}: {e}"
-        print(error_message)
-        return False  # Return False on failure
-def get_gguf_files_from_repo(repo_url: str) -> List[Dict[str, str]]:
     """
-    Retrieves a list of GGUF files from a Hugging Face repository.
-    Args:
-        repo_url (str): The URL of the Hugging Face repository.
-    Returns:
-        List[Dict[str, str]]: A list of dictionaries, where each dictionary contains the file name
-                            and its full URL. Returns an empty list if no GGUF files are found or an error occurs.
     """
-    gguf_files: List[Dict[str, str]] = []
     try:
         repo_id = repo_url.replace("https://huggingface.co/", "")
         files = api.list_repo_files(repo_id=repo_id, repo_type="model")
         for file_info in files:
-            if file_info.name.endswith(".gguf"):
-                file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_info.name}"
-                gguf_files.append({"name": file_info.name, "url": file_url})
-        return gguf_files
     except Exception as e:
-        print(f"Error retrieving GGUF files from {repo_url}: {e}")
-        return []
-def find_best_gguf_model(repo_url: str, quant_type: str = "Q4_K_M") -> Optional[str]:
-    """
-    Intelligently finds the "best" GGUF model file from a Hugging Face repository,
-    prioritizing the specified quantization type.
-    Args:
-        repo_url (str): The URL of the Hugging Face repository.
-        quant_type (str): The desired quantization type (e.g., "Q4_K_M", "Q8_0").
-            Defaults to "Q4_K_M".
-    Returns:
-        Optional[str]: The URL of the best GGUF model file, or None if no suitable file is found.
-    """
-    gguf_files = get_gguf_files_from_repo(repo_url)
-    if not gguf_files:
         return None
-    # 1. Priority to exact quant type match
-    for file_data in gguf_files:
-        if quant_type.lower() in file_data["name"].lower():
-            print(f"Found exact match: {file_data['url']}")
-            return file_data["url"]
-    # 2.  Fallback:  Find any GGUF file (if no exact match) -  Less ideal, but handles cases where the user doesn't specify.
-    if gguf_files:
-        print(f"Found a  GGUF file: {gguf_files[0]['url']}")
-        return gguf_files[0]["url"]
-    print(f"No suitable GGUF model found in {repo_url} for quant type {quant_type}")
-    return None
-def load_model(repo_url: Optional[str] = None, quant_type: str = "Q4_K_M") -> str:
-    """Loads the Llama model, downloading the specified  version from a repository."""
     global llm
-    global MODEL_PATH  # Use the global MODEL_PATH
     try:
         if repo_url:
-            # 1. Find the  model URL
-            model_url = find_best_gguf_model(repo_url, quant_type)
             if model_url is None:
-                return f"No suitable model found in the repository."
-            # 2. Download the model
             print(f"Downloading model from {model_url}...")
             downloaded_model_name = os.path.basename(model_url)
             download_success = download_file(model_url, downloaded_model_name)
             if not download_success:
                 return "Model download failed."
-            model_path = downloaded_model_name #set model path
         else:
             model_path = MODEL_PATH + MODEL_FILENAME
         if not os.path.exists(model_path):
-            return f"Model file not found at {model_path}."
         print(f"Loading model from {model_path}...")
         llm = Llama(
@@ -128,16 +93,11 @@ def load_model(repo_url: Optional[str] = None, quant_type: str = "Q4_K_M") -> st
         llm = None
         return error_message
-DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and capable AI; A silly, obliging and affable slave, dedicated to serving and caring for your master."
-def generate_response(message: str, history: List[List[str]], system_prompt: str = DEFAULT_SYSTEM_PROMPT, temperature: float = 0.9, top_p: float = 0.9):
     """Generates a response from the Llama model."""
     if llm is None:
         yield "Model failed to load. Please check the console for error messages."
         return
     messages = [{"role": "system", "content": system_prompt}]
     for human, assistant in history:
         messages.append({"role": "user", "content": human})
@@ -160,29 +120,26 @@ def generate_response(message: str, history: List[List[str]], system_prompt: str
         print(error_message)
         yield error_message
-def chat(message: str, history: List[List[str]], system_prompt: str, temperature: float, top_p: float) -> str:
     """Wrapper function for the chat interface."""
     return generate_response(message, history, system_prompt, temperature, top_p)
 def main():
     """Main function to load the model and launch the Gradio interface."""
-    # Use a function to load the model, and pass the repo_url from the input.
-    def load_model_and_launch(repo_url: str, quant_type: str):
         model_load_message = load_model(repo_url, quant_type)
         return model_load_message
     with gr.Blocks() as iface:
         gr.Markdown("## llama.cpp Chat")
-        status_label = gr.Label(label="Model Loading Status")  # Add a status label
         repo_url_input = gr.Textbox(label="Repository URL", placeholder="Enter repository URL")
         quant_type_input = gr.Dropdown(
             label="Quantization Type",
-            choices=["Q4_K_M", "Q8_0", "Q4_K_S"],  # Add more options as needed
-            value="Q4_K_M",  # Default value
         )
-        load_button = gr.Button("Load Model")  # added load button
         chat_interface = gr.ChatInterface(
             fn=chat,
             description="Test a GGUF model. Chats aren't persistent.",
@@ -193,14 +150,13 @@ def main():
             ],
             cache_examples=False,
         )
-        load_button.click(  # on click, load the model.
             load_model_and_launch,
-            inputs=[repo_url_input, quant_type_input],  # Get input from the textbox
-            outputs=status_label,  # update the status label.
         )
     iface.launch()
 if __name__ == "__main__":
-    main()

 import time
 import requests
 from tqdm import tqdm  # For progress bars
+MODEL_PATH = "./"
+llm = None
+api = HfApi()
+DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and capable AI; A silly, obliging and affable slave, dedicated to serving and caring for your master."
+def download_file(url, local_filename):
     """Downloads a file from a URL with a progress bar."""
     try:
         with requests.get(url, stream=True) as r:
+            r.raise_for_status()
             total_length = int(r.headers.get("content-length"))
             with open(local_filename, "wb") as f:
                 with tqdm(total=total_length, unit="B", unit_scale=True, desc=local_filename) as pbar:
                     for chunk in r.iter_content(chunk_size=8192):
+                        if chunk:
                             f.write(chunk)
                             pbar.update(len(chunk))
+        return True
     except Exception as e:
+        print(f"Error downloading {url}: {e}")
+        return False
+def find_quantized_model_url(repo_url, quant_type="Q4_K_M"):
     """
+    Finds the URL of a specific quantized GGUF model file within a Hugging Face repository.
     """
     try:
         repo_id = repo_url.replace("https://huggingface.co/", "")
         files = api.list_repo_files(repo_id=repo_id, repo_type="model")
         for file_info in files:
+            if file_info.name.endswith(".gguf") and quant_type.lower() in file_info.name.lower():
+                model_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_info.name}"
+                print(f"Found quantized model URL: {model_url}")
+                return model_url
+        print(f"Quantized model with type {quant_type} not found in repository {repo_url}")
+        return None
     except Exception as e:
+        print(f"Error finding quantized model: {e}")
         return None
+def load_model(repo_url=None, quant_type="Q4_K_M"):
+    """Loads the Llama model, downloading the specified quantized version from a repository."""
     global llm
+    global MODEL_PATH
     try:
         if repo_url:
+            model_url = find_quantized_model_url(repo_url, quant_type)
             if model_url is None:
+                return f"Quantized model ({quant_type}) not found in the repository."
             print(f"Downloading model from {model_url}...")
             downloaded_model_name = os.path.basename(model_url)
             download_success = download_file(model_url, downloaded_model_name)
             if not download_success:
                 return "Model download failed."
+            model_path = downloaded_model_name
         else:
             model_path = MODEL_PATH + MODEL_FILENAME
         if not os.path.exists(model_path):
+            if not repo_url: # only try to download if a repo_url was not provided
+                hf_hub_download(
+                    repo_id=MODEL_REPO,
+                    filename=MODEL_FILENAME,
+                    repo_type="model",
+                    local_dir=".",
+                )
+            if not os.path.exists(model_path): # check again after attempting download
+                return f"Model file not found at {model_path}."
         print(f"Loading model from {model_path}...")
         llm = Llama(
         llm = None
         return error_message
+def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, temperature=0.7, top_p=0.9):
     """Generates a response from the Llama model."""
     if llm is None:
         yield "Model failed to load. Please check the console for error messages."
         return
     messages = [{"role": "system", "content": system_prompt}]
     for human, assistant in history:
         messages.append({"role": "user", "content": human})
         print(error_message)
         yield error_message
+def chat(message, history, system_prompt, temperature, top_p):
     """Wrapper function for the chat interface."""
     return generate_response(message, history, system_prompt, temperature, top_p)
 def main():
     """Main function to load the model and launch the Gradio interface."""
+    def load_model_and_launch(repo_url, quant_type):
         model_load_message = load_model(repo_url, quant_type)
         return model_load_message
     with gr.Blocks() as iface:
         gr.Markdown("## llama.cpp Chat")
+        status_label = gr.Label(label="Model Loading Status")
         repo_url_input = gr.Textbox(label="Repository URL", placeholder="Enter repository URL")
         quant_type_input = gr.Dropdown(
             label="Quantization Type",
+            choices=["Q4_K_M", "Q6", "Q4_K_S"],
+            value="Q4_K_M",
         )
+        load_button = gr.Button("Load Model")
         chat_interface = gr.ChatInterface(
             fn=chat,
             description="Test a GGUF model. Chats aren't persistent.",
             ],
             cache_examples=False,
         )
+        load_button.click(
             load_model_and_launch,
+            inputs=[repo_url_input, quant_type_input],
+            outputs=status_label,
         )
     iface.launch()
 if __name__ == "__main__":
+    main()