Spaces:
Sleeping
Sleeping
Commit
·
51b48a8
1
Parent(s):
62b003e
updated the smolagents
Browse files- Oracle/DataSmolAgent.py +28 -3
Oracle/DataSmolAgent.py
CHANGED
@@ -5,14 +5,30 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
5 |
|
6 |
@tool
|
7 |
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
|
8 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
df.columns = df.columns.str.strip()
|
10 |
df = df.dropna(how="all")
|
11 |
return df
|
12 |
|
13 |
@tool
|
14 |
def extract_features(df: pd.DataFrame) -> pd.DataFrame:
|
15 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Numeric columns: log transformation
|
17 |
numeric_cols = df.select_dtypes(include=[np.number]).columns.to_list()
|
18 |
for col in numeric_cols:
|
@@ -45,7 +61,16 @@ def extract_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
45 |
|
46 |
@tool
|
47 |
def save_to_csv(df: pd.DataFrame, filename: str = "output.csv") -> str:
|
48 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
df.to_csv(filename, index=False)
|
50 |
return filename
|
51 |
|
|
|
5 |
|
6 |
@tool
|
7 |
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
|
8 |
+
"""
|
9 |
+
Cleans the input DataFrame by stripping whitespace from column names and dropping rows that are completely empty.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
df: The input DataFrame containing the raw data.
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
A cleaned DataFrame with stripped column names and without completely empty rows.
|
16 |
+
"""
|
17 |
df.columns = df.columns.str.strip()
|
18 |
df = df.dropna(how="all")
|
19 |
return df
|
20 |
|
21 |
@tool
|
22 |
def extract_features(df: pd.DataFrame) -> pd.DataFrame:
|
23 |
+
"""
|
24 |
+
Dynamically extracts features from the input DataFrame.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
df: The input DataFrame containing the raw data.
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
The DataFrame updated with new dynamically engineered features.
|
31 |
+
"""
|
32 |
# Numeric columns: log transformation
|
33 |
numeric_cols = df.select_dtypes(include=[np.number]).columns.to_list()
|
34 |
for col in numeric_cols:
|
|
|
61 |
|
62 |
@tool
|
63 |
def save_to_csv(df: pd.DataFrame, filename: str = "output.csv") -> str:
|
64 |
+
"""
|
65 |
+
Saves the input DataFrame to a CSV file and returns the file path.
|
66 |
+
|
67 |
+
Args:
|
68 |
+
df: The DataFrame to save.
|
69 |
+
filename: The name of the output CSV file (default is "output.csv").
|
70 |
+
|
71 |
+
Returns:
|
72 |
+
The file path of the saved CSV.
|
73 |
+
"""
|
74 |
df.to_csv(filename, index=False)
|
75 |
return filename
|
76 |
|