Spaces:
Starting
Starting
from langchain_core.tools import tool | |
import pandas as pd | |
import PyPDF2 | |
import logging | |
import os | |
logger = logging.getLogger(__name__) | |
async def file_parser_tool(task_id: str, file_type: str) -> str: | |
"""Parse a file based on task_id and file_type""" | |
try: | |
file_path = f"temp_{task_id}.{file_type}" | |
if not os.path.exists(file_path): | |
logger.warning(f"File not found: {file_path}") | |
return "File not found" | |
if file_type == "csv": | |
df = pd.read_csv(file_path) | |
return df.to_string() | |
elif file_type == "txt": | |
with open(file_path, "r", encoding="utf-8") as f: | |
return f.read() | |
elif file_type == "pdf": | |
with open(file_path, "rb") as f: | |
reader = PyPDF2.PdfReader(f) | |
text = "".join(page.extract_text() for page in reader.pages) | |
return text | |
else: | |
return f"Unsupported file type: {file_type}" | |
except Exception as e: | |
logger.error(f"Error parsing file for task {task_id}: {e}") | |
return f"Error: {str(e)}" |