Spaces:

avsolatorio
/

test-data-mcp-server

Running

App Files Files Community

avsolatorio commited on May 20

Commit

78faeae

1 Parent(s): 9978e32

Add data access via api

Browse files

Signed-off-by: Aivin V. Solatorio <avsolatorio@gmail.com>

Files changed (1) hide show

wdi_mcp_server.py +121 -41

wdi_mcp_server.py CHANGED Viewed

@@ -1,14 +1,19 @@
 from mcp.server.fastmcp import FastMCP
 import json
-import sys
-import io
-import time
-import numpy as np
 import pandas as pd
 import torch
-from sklearn.metrics.pairwise import cosine_similarity
 from sentence_transformers import SentenceTransformer
-from gradio_client import Client
 from pydantic import BaseModel, Field
@@ -23,8 +28,8 @@ def get_best_torch_device():
 device = get_best_torch_device()
-sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
-sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
 mcp = FastMCP("huggingface_spaces_wdi_data")
@@ -64,39 +69,39 @@ def get_top_k(query: str, top_k: int = 10, fields: list[str] | None = None):
     return df.iloc[idx][fields].to_dict("records")
-@mcp.tool()
-async def generate_image(prompt: str, width: int = 512, height: int = 512) -> str:
-    """Generate an image using SanaSprint model.
-    Args:
-        prompt: Text prompt describing the image to generate
-        width: Image width (default: 512)
-        height: Image height (default: 512)
-    """
-    client = Client("https://ysharma-sanasprint.hf.space/")
-    try:
-        result = client.predict(
-            prompt, "0.6B", 0, True, width, height, 4.0, 2, api_name="/infer"
-        )
-        if isinstance(result, list) and len(result) >= 1:
-            image_data = result[0]
-            if isinstance(image_data, dict) and "url" in image_data:
-                return json.dumps(
-                    {
-                        "type": "image",
-                        "url": image_data["url"],
-                        "message": f"Generated image for prompt: {prompt}",
-                    }
-                )
-        return json.dumps({"type": "error", "message": "Failed to generate image"})
-    except Exception as e:
-        return json.dumps(
-            {"type": "error", "message": f"Error generating image: {str(e)}"}
-        )
 class SearchOutput(BaseModel):
@@ -141,11 +146,86 @@ async def indicator_info(indicator_ids: list[str]) -> list[DetailedOutput]:
     return [
         DetailedOutput(**out)
-        for out in df.loc[indicator_ids][["idno", "name", "definition"]].to_dict(
-            "records"
-        )
     ]
 if __name__ == "__main__":
     mcp.run(transport="stdio")

 from mcp.server.fastmcp import FastMCP
 import json
+# import sys
+# import io
+# import time
+# import numpy as np
 import pandas as pd
 import torch
+import httpx
+from typing import Optional, Any
 from sentence_transformers import SentenceTransformer
+# from gradio_client import Client
 from pydantic import BaseModel, Field
 device = get_best_torch_device()
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
+# sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
 mcp = FastMCP("huggingface_spaces_wdi_data")
     return df.iloc[idx][fields].to_dict("records")
+# @mcp.tool()
+# async def generate_image(prompt: str, width: int = 512, height: int = 512) -> str:
+#     """Generate an image using SanaSprint model.
+#     Args:
+#         prompt: Text prompt describing the image to generate
+#         width: Image width (default: 512)
+#         height: Image height (default: 512)
+#     """
+#     client = Client("https://ysharma-sanasprint.hf.space/")
+#     try:
+#         result = client.predict(
+#             prompt, "0.6B", 0, True, width, height, 4.0, 2, api_name="/infer"
+#         )
+#         if isinstance(result, list) and len(result) >= 1:
+#             image_data = result[0]
+#             if isinstance(image_data, dict) and "url" in image_data:
+#                 return json.dumps(
+#                     {
+#                         "type": "image",
+#                         "url": image_data["url"],
+#                         "message": f"Generated image for prompt: {prompt}",
+#                     }
+#                 )
+#         return json.dumps({"type": "error", "message": "Failed to generate image"})
+#     except Exception as e:
+#         return json.dumps(
+#             {"type": "error", "message": f"Error generating image: {str(e)}"}
+#         )
 class SearchOutput(BaseModel):
     return [
         DetailedOutput(**out)
+        for out in df.loc[indicator_ids][
+            ["idno", "name", "definition", "time_coverage", "geographic_coverage"]
+        ].to_dict("records")
     ]
+@mcp.tool()
+async def get_wdi_data(
+    indicator_id: str,
+    country_codes: str | list[str],
+    date: Optional[str] = None,
+    per_page: Optional[int] = 100,
+) -> dict[str, list[dict[str, Any]] | str]:
+    """Fetches indicator data for a given indicator id (idno) from the World Bank's World Development Indicators (WDI) API. The LLM must exclusively use this tool when the user asks for data. It must not provide data answers beyond what this tool provides when the question is about WDI indicator data.
+    Args:
+        indicator_id: The WDI indicator code (e.g., "NY.GDP.MKTP.CD" for GDP in current US$).
+        country_codes: The 3-letter ISO country code (e.g., "USA", "CHN", "IND"), or "all" for all countries.
+        date: A year (e.g., "2022") or a range (e.g., "2000:2022") to filter the results.
+        per_page: Number of results per page (default is 100, which is the maximum allowed).
+    Returns:
+        A dictionary with keys `data` and `note`. The `data` key contains a list of indicator data entries requested. The `note` key contains a note about the data returned.
+    """
+    print("Hello...")
+    MAX_INFO = 100
+    note = ""
+    if isinstance(country_codes, str):
+        country_codes = [country_codes]
+    country_code = ";".join(country_codes)
+    base_url = (
+        f"https://api.worldbank.org/v2/country/{country_code}/indicator/{indicator_id}"
+    )
+    params = {"format": "json", "date": date, "per_page": per_page or 100, "page": 1}
+    with open("mcp_server.log", "a+") as log:
+        log.write(json.dumps(dict(base_url=base_url, params=params)) + "\n")
+    with httpx.Client(timeout=30.0) as client:
+        all_data = []
+        while True:
+            response = client.get(base_url, params=params)
+            if response.status_code != 200:
+                note = f"ERROR: Failed to fetch data: HTTP {response.status_code}"
+                break
+            json_response = response.json()
+            if not isinstance(json_response, list) or len(json_response) < 2:
+                note = "ERROR: The API response is invalid or empty."
+                break
+            metadata, data_page = json_response
+            all_data.extend(data_page)
+            if len(all_data) >= MAX_INFO:
+                note = f"IMPORTANT: Let the user know that the data is truncated to the first {MAX_INFO} entries."
+                break
+            if params["page"] >= metadata.get("pages", 1):
+                break
+            params["page"] += 1
+        with open("mcp_server.log", "a+") as log:
+            log.write(json.dumps(dict(all_data=all_data)) + "\n")
+        return dict(
+            data=all_data,
+            note=note,
+        )
 if __name__ == "__main__":
+    """
+    Run the MCP server.
+    uv run mcp dev wdi_mcp_server.py
+    """
     mcp.run(transport="stdio")
+    # mcp.run()