GGUF_CPU_Test_bench

Sleeping

App Files Files Community

DreadPoor commited on Apr 5

Commit

27be339

verified ·

1 Parent(s): 370ab1e

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -25

app.py CHANGED Viewed

@@ -6,12 +6,13 @@ import sys
 import time
 import requests
 from tqdm import tqdm  # For progress bars
 MODEL_PATH = "./"  # Default model path
 llm = None  # Initialize llm outside the try block
 api = HfApi() #initialize
-def download_file(url, local_filename):
     """Downloads a file from a URL with a progress bar."""
     try:
         with requests.get(url, stream=True) as r:
@@ -28,44 +29,74 @@ def download_file(url, local_filename):
         error_message = f"Error downloading {url}: {e}"
         print(error_message)
         return False  # Return False on failure
-def find_quantized_model_url(repo_url, quant_type="Q4_K_M"):
     """
-    Finds the URL of a specific quantized GGUF model file within a Hugging Face repository.
     Args:
         repo_url (str): The URL of the Hugging Face repository.
-        quant_type (str): The quantization type (e.g., "Q4_K_M", "Q8_0").  Defaults to "Q4_K_M".
     Returns:
-        str: The URL of the quantized model file, or None if not found.
     """
     try:
         repo_id = repo_url.replace("https://huggingface.co/", "")
         files = api.list_repo_files(repo_id=repo_id, repo_type="model")
         for file_info in files:
-            if file_info.name.endswith(".gguf") and quant_type.lower() in file_info.name.lower():
-                # Construct the full URL.  This is crucial.
-                model_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_info.name}"
-                print(f"Found quantized model URL: {model_url}")
-                return model_url
-        print(f"Quantized model with type {quant_type} not found in repository {repo_url}")
-        return None
     except Exception as e:
-        print(f"Error finding quantized model: {e}")
         return None
-def load_model(repo_url=None, quant_type="Q4_K_M"):
-    """Loads the Llama model, downloading the specified quantized version from a repository."""
     global llm
     global MODEL_PATH  # Use the global MODEL_PATH
     try:
         if repo_url:
-            # 1. Find the quantized model URL
-            model_url = find_quantized_model_url(repo_url, quant_type)
             if model_url is None:
-                return f"Quantized model ({quant_type}) not found in the repository."
             # 2. Download the model
             print(f"Downloading model from {model_url}...")
@@ -101,7 +132,7 @@ def load_model(repo_url=None, quant_type="Q4_K_M"):
 DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and capable AI; A silly, obliging and affable slave, dedicated to serving and caring for your master."
-def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, temperature=0.9, top_p=0.9):
     """Generates a response from the Llama model."""
     if llm is None:
         yield "Model failed to load. Please check the console for error messages."
@@ -130,15 +161,15 @@ def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, tem
         yield error_message
-def chat(message, history, system_prompt, temperature, top_p):
     """Wrapper function for the chat interface."""
     return generate_response(message, history, system_prompt, temperature, top_p)
 def main():
     """Main function to load the model and launch the Gradio interface."""
-    # Use a function to load the model, and pass the model_url from the text box.
-    def load_model_and_launch(repo_url, quant_type):
         model_load_message = load_model(repo_url, quant_type)
         return model_load_message
@@ -148,7 +179,7 @@ def main():
         repo_url_input = gr.Textbox(label="Repository URL", placeholder="Enter repository URL")
         quant_type_input = gr.Dropdown(
             label="Quantization Type",
-            choices=["Q4_K_M", "Q6_K", "Q4_K_S"],  # Add more options as needed
             value="Q4_K_M",  # Default value
         )
         load_button = gr.Button("Load Model")  # added load button
@@ -172,4 +203,4 @@ def main():
 if __name__ == "__main__":
-    main()

 import time
 import requests
 from tqdm import tqdm  # For progress bars
+from typing import Optional, List, Dict
 MODEL_PATH = "./"  # Default model path
 llm = None  # Initialize llm outside the try block
 api = HfApi() #initialize
+def download_file(url: str, local_filename: str) -> bool:
     """Downloads a file from a URL with a progress bar."""
     try:
         with requests.get(url, stream=True) as r:
         error_message = f"Error downloading {url}: {e}"
         print(error_message)
         return False  # Return False on failure
+def get_gguf_files_from_repo(repo_url: str) -> List[Dict[str, str]]:
     """
+    Retrieves a list of GGUF files from a Hugging Face repository.
     Args:
         repo_url (str): The URL of the Hugging Face repository.
     Returns:
+        List[Dict[str, str]]: A list of dictionaries, where each dictionary contains the file name
+                            and its full URL. Returns an empty list if no GGUF files are found or an error occurs.
     """
+    gguf_files: List[Dict[str, str]] = []
     try:
         repo_id = repo_url.replace("https://huggingface.co/", "")
         files = api.list_repo_files(repo_id=repo_id, repo_type="model")
         for file_info in files:
+            if file_info.name.endswith(".gguf"):
+                file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_info.name}"
+                gguf_files.append({"name": file_info.name, "url": file_url})
+        return gguf_files
     except Exception as e:
+        print(f"Error retrieving GGUF files from {repo_url}: {e}")
+        return []
+def find_best_gguf_model(repo_url: str, quant_type: str = "Q4_K_M") -> Optional[str]:
+    """
+    Intelligently finds the "best" GGUF model file from a Hugging Face repository,
+    prioritizing the specified quantization type.
+    Args:
+        repo_url (str): The URL of the Hugging Face repository.
+        quant_type (str): The desired quantization type (e.g., "Q4_K_M", "Q8_0").
+            Defaults to "Q4_K_M".
+    Returns:
+        Optional[str]: The URL of the best GGUF model file, or None if no suitable file is found.
+    """
+    gguf_files = get_gguf_files_from_repo(repo_url)
+    if not gguf_files:
         return None
+    # 1. Priority to exact quant type match
+    for file_data in gguf_files:
+        if quant_type.lower() in file_data["name"].lower():
+            print(f"Found exact match: {file_data['url']}")
+            return file_data["url"]
+    # 2.  Fallback:  Find any GGUF file (if no exact match) -  Less ideal, but handles cases where the user doesn't specify.
+    if gguf_files:
+        print(f"Found a  GGUF file: {gguf_files[0]['url']}")
+        return gguf_files[0]["url"]
+    print(f"No suitable GGUF model found in {repo_url} for quant type {quant_type}")
+    return None
+def load_model(repo_url: Optional[str] = None, quant_type: str = "Q4_K_M") -> str:
+    """Loads the Llama model, downloading the specified  version from a repository."""
     global llm
     global MODEL_PATH  # Use the global MODEL_PATH
     try:
         if repo_url:
+            # 1. Find the  model URL
+            model_url = find_best_gguf_model(repo_url, quant_type)
             if model_url is None:
+                return f"No suitable model found in the repository."
             # 2. Download the model
             print(f"Downloading model from {model_url}...")
 DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and capable AI; A silly, obliging and affable slave, dedicated to serving and caring for your master."
+def generate_response(message: str, history: List[List[str]], system_prompt: str = DEFAULT_SYSTEM_PROMPT, temperature: float = 0.9, top_p: float = 0.9):
     """Generates a response from the Llama model."""
     if llm is None:
         yield "Model failed to load. Please check the console for error messages."
         yield error_message
+def chat(message: str, history: List[List[str]], system_prompt: str, temperature: float, top_p: float) -> str:
     """Wrapper function for the chat interface."""
     return generate_response(message, history, system_prompt, temperature, top_p)
 def main():
     """Main function to load the model and launch the Gradio interface."""
+    # Use a function to load the model, and pass the repo_url from the input.
+    def load_model_and_launch(repo_url: str, quant_type: str):
         model_load_message = load_model(repo_url, quant_type)
         return model_load_message
         repo_url_input = gr.Textbox(label="Repository URL", placeholder="Enter repository URL")
         quant_type_input = gr.Dropdown(
             label="Quantization Type",
+            choices=["Q4_K_M", "Q8_0", "Q4_K_S"],  # Add more options as needed
             value="Q4_K_M",  # Default value
         )
         load_button = gr.Button("Load Model")  # added load button
 if __name__ == "__main__":
+    main()