doctorecord / src /agents /pdf_agent.py
levalencia's picture
Add cost tracking functionality across various components, including Executor, Planner, and FieldMapperAgent. Integrate CostTracker to monitor LLM and document intelligence costs, enhancing logging for cost-related metrics and providing detailed cost breakdowns in the user interface.
966ffcd
"""Extract raw text from the uploaded PDF using PyMuPDF.
This keeps the implementation minimal for a POC while remaining easy to extend.
"""
from typing import Dict, Any, List
import fitz # PyMuPDF
from .base_agent import BaseAgent
class PDFAgent(BaseAgent):
"""Reads the PDF, concatenates all page text and stores it under ``ctx['text']``."""
def _extract_text(self, pdf_bytes: bytes) -> str:
doc = fitz.open(stream=pdf_bytes, filetype="pdf") # type: ignore[arg-type]
pages: List[str] = [page.get_text() for page in doc] # list-comp for clarity
return "\n".join(pages)
# -----------------------------------------------------
def execute(self, ctx: Dict[str, Any]): # noqa: D401
pdf_file = ctx.get("pdf_file")
if pdf_file is None:
raise ValueError("PDFAgent expected 'pdf_file' in context but none provided.")
pdf_bytes = pdf_file.read()
text = self._extract_text(pdf_bytes)
ctx["text"] = text
# After extracting pages
num_pages = len(fitz.open(stream=pdf_bytes, filetype="pdf")) # type: ignore[arg-type]
if "cost_tracker" in ctx:
ctx["cost_tracker"].add_di_pages(num_pages)
return text