Spaces:

dylanebert
/

huggingface-mcp

Running

File size: 10,287 Bytes

import json
from typing import Literal
from datetime import datetime

import gradio as gr
from huggingface_hub import list_models, model_info, hf_hub_download


def search_models(
    search: str = None,
    library: str = None,
    tags: str = None,
    pipeline_tag: str = None,
    sort: Literal[
        "trending_score", "last_modified", "created_at", "downloads", "likes"
    ] = "trending_score",
    direction: Literal["descending", "ascending"] = "descending",
    limit: int = 20,
) -> str:
    """
    Search models on Hugging Face Hub.

    Use this tool to search for models by name, tags, or other filters, and to get a list of model IDs.
    This is the first step when you need to find a specific model before retrieving its details.

    Parameters:
        search (str, optional): A string to search for in model IDs or names (e.g., "deepseek").
        library (list[str], optional): List of libraries the models use (e.g., ["pytorch", "tensorflow"]).
        tags (list[str], optional): List of tags to filter models by (e.g., ["text-generation", "llama"]).
        pipeline_tag (str, optional): Filter by pipeline tag (e.g., "text-generation").
        sort (Literal["trending_score", "last_modified", "created_at", "downloads", "likes"], default="trending_score"): Sort models by the specified key.
        direction (int, default=-1): Sort direction: -1 for descending, 1 for ascending.
        limit (int, default=20): Maximum number of models to return.

    Returns:
        list[str]: A list of model IDs matching the search criteria.

    Examples:
        - To find trending models: search_models(sort="trending_score", limit=10)
        - To search for models related to "deepseek": search_models(search="deepseek", sort="likes", limit=5)
        - To filter by tag: search_models(tags=["text-generation"], pipeline_tag="text-generation")
    """
    try:
        library = library.split(",") if library else None
        tags = tags.split(",") if tags else None
        direction = -1 if direction == "descending" else 1
        models = list_models(
            library=library,
            tags=tags,
            search=search,
            pipeline_tag=pipeline_tag,
            sort=sort,
            direction=direction,
            limit=limit,
        )
        return json.dumps([model.modelId for model in models])
    except Exception as e:
        return f"Error: {e}"


def get_model_info(model_id: str) -> dict:
    """
    Get structured metadata about a model on the Hugging Face Hub.

    Use this when you need specific fields like downloads, tags, or other metadata.
    For comprehensive model information, use `get_model_card`.

    This tool requires the exact model ID, which can be obtained using `search_models`.
    If you have a partial name or tag, use `search_models` first to find the exact ID.

    Parameters:
        model_id (str): The exact model ID in the format "organization/model-name" (e.g., "DeepSeek/DeepSeek-R1").

    Returns:
        dict: A dictionary containing model information including available fields such as:
            - id: The model ID
            - author: The author of the model
            - created_at: The creation date
            - last_modified: The last modified date
            - downloads: Number of downloads
            - likes: Number of likes
            - tags: List of tags
            - pipeline_tag: The pipeline tag
            - library_name: The library name
            - license: The model license
            - base_model: The base model (if available)
            - siblings: List of repository files (if available)
            - datasets: Datasets used to train the model (if available)
            - spaces: List of spaces using this model (if available)
            - xet_enabled: Whether XET is enabled (if available)

    Raises:
        Exception: If the model_id is invalid or not found. Use search_models to find the correct ID.

    Example:
        - First, find the model ID: search_models(search="deepseek", sort="likes", limit=1)
        - Then, get the model info: get_model_info("DeepSeek/DeepSeek-R1")
    """
    try:
        model = model_info(model_id)
        result = {}

        if hasattr(model, "id") and model.id is not None:
            result["id"] = model.id

        if hasattr(model, "author") and model.author is not None:
            result["author"] = model.author

        if hasattr(model, "created_at") and model.created_at is not None:
            result["created_at"] = str(model.created_at)

        if hasattr(model, "last_modified") and model.last_modified is not None:
            result["last_modified"] = str(model.last_modified)

        if hasattr(model, "downloads") and model.downloads is not None:
            result["downloads"] = model.downloads

        if hasattr(model, "likes") and model.likes is not None:
            result["likes"] = model.likes

        if hasattr(model, "tags") and model.tags is not None:
            result["tags"] = model.tags

        if hasattr(model, "pipeline_tag") and model.pipeline_tag is not None:
            result["pipeline_tag"] = model.pipeline_tag

        if hasattr(model, "library_name") and model.library_name is not None:
            result["library_name"] = model.library_name

        if hasattr(model, "card_data") and model.card_data is not None:
            if (
                hasattr(model.card_data, "license")
                and model.card_data.license is not None
            ):
                result["license"] = model.card_data.license

            if (
                hasattr(model.card_data, "base_model")
                and model.card_data.base_model is not None
            ):
                result["base_model"] = model.card_data.base_model

            if (
                hasattr(model.card_data, "datasets")
                and model.card_data.datasets is not None
            ):
                result["datasets"] = model.card_data.datasets

        if hasattr(model, "siblings") and model.siblings is not None:
            result["siblings"] = []
            for s in model.siblings:
                if isinstance(s, str):
                    result["siblings"].append(s)
                else:
                    result["siblings"].append({
                        k: str(v) if isinstance(v, datetime) else v 
                        for k, v in s.__dict__.items() if not k.startswith('_')
                    })

        if hasattr(model, "spaces") and model.spaces is not None:
            result["spaces"] = []
            for s in model.spaces:
                if isinstance(s, str):
                    result["spaces"].append(s)
                else:
                    result["spaces"].append({
                        k: str(v) if isinstance(v, datetime) else v 
                        for k, v in s.__dict__.items() if not k.startswith('_')
                    })

        if hasattr(model, "xet_enabled") and model.xet_enabled is not None:
            result["xet_enabled"] = model.xet_enabled

        return json.dumps(result)
    except Exception as e:
        return f"Error: {e}"


def get_model_card(model_id: str) -> str:
    """
    Get the complete model card (README.md) for a specific model on Hugging Face Hub.

    Use this when you need comprehensive model documentation including usage examples, model limitations, etc.
    For only structured metadata, use `get_model_info` instead.

    This tool requires the exact model ID, which can be obtained using `search_models`.
    If you have a partial name or tag, use `search_models` first to find the exact ID.

    Args:
        model_id (str): The model ID in the format "organization/model-name" (e.g., "DeepSeek/DeepSeek-R1").

    Returns:
        str: The markdown content of the model card.

    Example:
        - First, find the model ID: search_models(search="deepseek", sort="likes", limit=1)
        - Then, get the model card: get_model_card("DeepSeek/DeepSeek-R1")
    """
    try:
        filepath = hf_hub_download(model_id, "README.md")
        with open(filepath, "r", encoding="utf-8") as f:
            content = f.read()

        return content
    except Exception as e:
        return f"Error: {e}"


description_html = """
<h1>🤗 Hugging Face MCP Server</h1>
<p>Use AI Agents to interact with the Hugging Face Hub.</p>

<h3>Available tools:</h3>
<ul>
    <li><strong>search_models</strong>: Find models by name, tags, etc.</li>
    <li><strong>get_model_info</strong>: Get model metadata</li>
    <li><strong>get_model_card</strong>: View model documentation</li>
</ul>

<h3>Setup (for Cursor):</h3>
<ol>
    <li>Click <strong>"Use via API"</strong> → <strong>"MCP"</strong> tab</li>
    <li>Copy SSE config:
    <pre>
{
  "mcpServers": {
    "gradio": {
      "url": "https://dylanebert-huggingface-mcp.hf.space/gradio_api/mcp/sse"
    }
  }
}
</pre>
    </li>
    <li>Paste in: <strong>Cursor Settings</strong> → <strong>MCP</strong> → <strong>Add Global MCP Server</strong></li>
    <li>Chat in agent mode with prompts like: "list top 10 trending models on Hugging Face"</li>
</ol>
<hr>
"""

search_models = gr.Interface(
    fn=search_models,
    description=description_html,
    inputs=[
        gr.Textbox(label="search", value=""),
        gr.Textbox(label="library", value=""),
        gr.Textbox(label="tags", value=""),
        gr.Textbox(label="pipeline_tag", value=""),
        gr.Radio(label="sort", choices=["trending_score", "last_modified", "created_at", "downloads", "likes"], value="trending_score"),
        gr.Radio(label="direction", choices=["descending", "ascending"], value="descending"),
        gr.Number(label="limit", value=20),
    ],
    outputs="text")

get_model_info = gr.Interface(
    fn=get_model_info,
    description=description_html,
    inputs=[
        gr.Textbox(label="model_id", value=""),
    ],
    outputs="text")

get_model_card = gr.Interface(
    fn=get_model_card,
    description=description_html,
    inputs=[
        gr.Textbox(label="model_id", value=""),
    ],
    outputs="text")

demo = gr.TabbedInterface(
    interface_list=[search_models, get_model_info, get_model_card], 
    tab_names=["search_models", "get_model_info", "get_model_card"]
)
demo.launch(mcp_server=True)