File size: 1,277 Bytes
f016beb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import io, os, mimetypes, requests, pandas as pd
from langchain.tools import StructuredTool

def _load_dataset(url_or_path: str, head_only: bool = True) -> str:
    """Fetch a CSV/JSON file (local or remote) and return either
    the first ~5 rows or basic stats."""
    # Grab bytes
    if url_or_path.startswith(("http://", "https://")):
        resp = requests.get(url_or_path, timeout=10)
        resp.raise_for_status()
        raw = io.BytesIO(resp.content)
    else:  # local
        raw = open(os.path.expanduser(url_or_path), "rb")

    # Detect type
    mime = mimetypes.guess_type(url_or_path)[0] or ""
    if "json" in mime or url_or_path.lower().endswith(".json"):
        df = pd.read_json(raw)
    elif "csv" in mime or url_or_path.lower().endswith(".csv"):
        df = pd.read_csv(raw)
    else:
        raise ValueError("Only CSV or JSON supported.")

    if head_only:
        return df.head().to_markdown(index=False)
    return df.describe(include="all").to_markdown()

file_loader_tool = StructuredTool.from_function(
    name="load_dataset",
    description=(
        "Load a CSV or JSON file from a URL or local path. "
        "Returns a markdown preview of the data."
    ),
    func=_load_dataset,
    return_direct=True,    # skip extra narration
)