doctorecord / src /agents /pdf_agent.py
levalencia's picture
Add cost tracking functionality across various components, including Executor, Planner, and FieldMapperAgent. Integrate CostTracker to monitor LLM and document intelligence costs, enhancing logging for cost-related metrics and providing detailed cost breakdowns in the user interface.
966ffcd
raw
history blame
1.24 kB
"""Extract raw text from the uploaded PDF using PyMuPDF.
This keeps the implementation minimal for a POC while remaining easy to extend.
"""
from typing import Dict, Any, List
import fitz # PyMuPDF
from .base_agent import BaseAgent
class PDFAgent(BaseAgent):
"""Reads the PDF, concatenates all page text and stores it under ``ctx['text']``."""
def _extract_text(self, pdf_bytes: bytes) -> str:
doc = fitz.open(stream=pdf_bytes, filetype="pdf") # type: ignore[arg-type]
pages: List[str] = [page.get_text() for page in doc] # list-comp for clarity
return "\n".join(pages)
# -----------------------------------------------------
def execute(self, ctx: Dict[str, Any]): # noqa: D401
pdf_file = ctx.get("pdf_file")
if pdf_file is None:
raise ValueError("PDFAgent expected 'pdf_file' in context but none provided.")
pdf_bytes = pdf_file.read()
text = self._extract_text(pdf_bytes)
ctx["text"] = text
# After extracting pages
num_pages = len(fitz.open(stream=pdf_bytes, filetype="pdf")) # type: ignore[arg-type]
if "cost_tracker" in ctx:
ctx["cost_tracker"].add_di_pages(num_pages)
return text