import io, os, mimetypes, requests, pandas as pd from langchain.tools import StructuredTool def _load_dataset(url_or_path: str, head_only: bool = True) -> str: """Fetch a CSV/JSON file (local or remote) and return either the first ~5 rows or basic stats.""" # Grab bytes if url_or_path.startswith(("http://", "https://")): resp = requests.get(url_or_path, timeout=10) resp.raise_for_status() raw = io.BytesIO(resp.content) else: # local raw = open(os.path.expanduser(url_or_path), "rb") # Detect type mime = mimetypes.guess_type(url_or_path)[0] or "" if "json" in mime or url_or_path.lower().endswith(".json"): df = pd.read_json(raw) elif "csv" in mime or url_or_path.lower().endswith(".csv"): df = pd.read_csv(raw) else: raise ValueError("Only CSV or JSON supported.") if head_only: return df.head().to_markdown(index=False) return df.describe(include="all").to_markdown() file_loader_tool = StructuredTool.from_function( name="load_dataset", description=( "Load a CSV or JSON file from a URL or local path. " "Returns a markdown preview of the data." ), func=_load_dataset, return_direct=True, # skip extra narration )