from langchain_core.tools import tool
import pandas as pd
import PyPDF2
import logging
import os

logger = logging.getLogger(__name__)

@tool
async def file_parser_tool(task_id: str, file_type: str) -> str:
    """Parse a file based on task_id and file_type"""
    try:
        file_path = f"temp_{task_id}.{file_type}"
        if not os.path.exists(file_path):
            logger.warning(f"File not found: {file_path}")
            return "File not found"
        
        if file_type == "csv":
            df = pd.read_csv(file_path)
            return df.to_string()
        elif file_type == "txt":
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read()
        elif file_type == "pdf":
            with open(file_path, "rb") as f:
                reader = PyPDF2.PdfReader(f)
                text = "".join(page.extract_text() for page in reader.pages)
                return text
        else:
            return f"Unsupported file type: {file_type}"
    except Exception as e:
        logger.error(f"Error parsing file for task {task_id}: {e}")
        return f"Error: {str(e)}"