File size: 1,277 Bytes
f016beb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import io, os, mimetypes, requests, pandas as pd
from langchain.tools import StructuredTool
def _load_dataset(url_or_path: str, head_only: bool = True) -> str:
"""Fetch a CSV/JSON file (local or remote) and return either
the first ~5 rows or basic stats."""
# Grab bytes
if url_or_path.startswith(("http://", "https://")):
resp = requests.get(url_or_path, timeout=10)
resp.raise_for_status()
raw = io.BytesIO(resp.content)
else: # local
raw = open(os.path.expanduser(url_or_path), "rb")
# Detect type
mime = mimetypes.guess_type(url_or_path)[0] or ""
if "json" in mime or url_or_path.lower().endswith(".json"):
df = pd.read_json(raw)
elif "csv" in mime or url_or_path.lower().endswith(".csv"):
df = pd.read_csv(raw)
else:
raise ValueError("Only CSV or JSON supported.")
if head_only:
return df.head().to_markdown(index=False)
return df.describe(include="all").to_markdown()
file_loader_tool = StructuredTool.from_function(
name="load_dataset",
description=(
"Load a CSV or JSON file from a URL or local path. "
"Returns a markdown preview of the data."
),
func=_load_dataset,
return_direct=True, # skip extra narration
)
|