from langchain_core.tools import tool import pandas as pd import PyPDF2 import logging import os logger = logging.getLogger(__name__) @tool async def file_parser_tool(task_id: str, file_type: str) -> str: """Parse a file based on task_id and file_type""" try: file_path = f"temp_{task_id}.{file_type}" if not os.path.exists(file_path): logger.warning(f"File not found: {file_path}") return "File not found" if file_type == "csv": df = pd.read_csv(file_path) return df.to_string() elif file_type == "txt": with open(file_path, "r", encoding="utf-8") as f: return f.read() elif file_type == "pdf": with open(file_path, "rb") as f: reader = PyPDF2.PdfReader(f) text = "".join(page.extract_text() for page in reader.pages) return text else: return f"Unsupported file type: {file_type}" except Exception as e: logger.error(f"Error parsing file for task {task_id}: {e}") return f"Error: {str(e)}"