Spaces:
Sleeping
Sleeping
Commit
·
966ffcd
1
Parent(s):
2eac01a
Add cost tracking functionality across various components, including Executor, Planner, and FieldMapperAgent. Integrate CostTracker to monitor LLM and document intelligence costs, enhancing logging for cost-related metrics and providing detailed cost breakdowns in the user interface.
Browse files- src/agents/__pycache__/field_mapper_agent.cpython-312.pyc +0 -0
- src/agents/__pycache__/pdf_agent.cpython-312.pyc +0 -0
- src/agents/__pycache__/table_agent.cpython-312.pyc +0 -0
- src/agents/field_mapper_agent.py +57 -3
- src/agents/pdf_agent.py +6 -0
- src/app.py +25 -2
- src/orchestrator/__pycache__/executor.cpython-312.pyc +0 -0
- src/orchestrator/__pycache__/planner.cpython-312.pyc +0 -0
- src/orchestrator/executor.py +12 -1
- src/orchestrator/planner.py +9 -2
- src/services/__pycache__/llm_client.cpython-312.pyc +0 -0
- src/services/cost_tracker.py +221 -0
- src/services/llm_client.py +29 -2
src/agents/__pycache__/field_mapper_agent.cpython-312.pyc
CHANGED
Binary files a/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc and b/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc differ
|
|
src/agents/__pycache__/pdf_agent.cpython-312.pyc
CHANGED
Binary files a/src/agents/__pycache__/pdf_agent.cpython-312.pyc and b/src/agents/__pycache__/pdf_agent.cpython-312.pyc differ
|
|
src/agents/__pycache__/table_agent.cpython-312.pyc
CHANGED
Binary files a/src/agents/__pycache__/table_agent.cpython-312.pyc and b/src/agents/__pycache__/table_agent.cpython-312.pyc differ
|
|
src/agents/field_mapper_agent.py
CHANGED
@@ -35,7 +35,25 @@ class FieldMapperAgent(BaseAgent):
|
|
35 |
try:
|
36 |
self.logger.info("Inferring document context...")
|
37 |
self.logger.debug(f"Using text preview: {text[:500]}...")
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
self.logger.info(f"Inferred context: {context}")
|
40 |
return context
|
41 |
except Exception as e:
|
@@ -142,7 +160,25 @@ class FieldMapperAgent(BaseAgent):
|
|
142 |
try:
|
143 |
self.logger.info(f"Calling LLM to extract value for field '{field}'")
|
144 |
self.logger.debug(f"Using prompt: {prompt}")
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
self.logger.debug(f"Raw LLM response: {value}")
|
147 |
|
148 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
@@ -191,7 +227,25 @@ class FieldMapperAgent(BaseAgent):
|
|
191 |
|
192 |
try:
|
193 |
self.logger.info(f"Calling LLM to extract value for field '{field}' from page")
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
self.logger.debug(f"Raw LLM response: {value}")
|
196 |
|
197 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
|
|
35 |
try:
|
36 |
self.logger.info("Inferring document context...")
|
37 |
self.logger.debug(f"Using text preview: {text[:500]}...")
|
38 |
+
|
39 |
+
# Get cost tracker from context
|
40 |
+
cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
|
41 |
+
if cost_tracker:
|
42 |
+
self.logger.info("Cost tracker found in context")
|
43 |
+
else:
|
44 |
+
self.logger.warning("No cost tracker found in context")
|
45 |
+
|
46 |
+
context = self.llm.responses(
|
47 |
+
prompt, temperature=0.0,
|
48 |
+
ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
|
49 |
+
description="Document Context Inference"
|
50 |
+
)
|
51 |
+
|
52 |
+
# Log cost tracking results if available
|
53 |
+
if cost_tracker:
|
54 |
+
self.logger.info(f"Context inference costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
|
55 |
+
self.logger.info(f"Context inference cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
|
56 |
+
|
57 |
self.logger.info(f"Inferred context: {context}")
|
58 |
return context
|
59 |
except Exception as e:
|
|
|
160 |
try:
|
161 |
self.logger.info(f"Calling LLM to extract value for field '{field}'")
|
162 |
self.logger.debug(f"Using prompt: {prompt}")
|
163 |
+
|
164 |
+
# Get cost tracker from context
|
165 |
+
cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
|
166 |
+
if cost_tracker:
|
167 |
+
self.logger.info("Cost tracker found in context")
|
168 |
+
else:
|
169 |
+
self.logger.warning("No cost tracker found in context")
|
170 |
+
|
171 |
+
value = self.llm.responses(
|
172 |
+
prompt, temperature=0.0,
|
173 |
+
ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
|
174 |
+
description=f"Field Extraction - {field} (Search)"
|
175 |
+
)
|
176 |
+
|
177 |
+
# Log cost tracking results if available
|
178 |
+
if cost_tracker:
|
179 |
+
self.logger.info(f"Field extraction costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
|
180 |
+
self.logger.info(f"Field extraction cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
|
181 |
+
|
182 |
self.logger.debug(f"Raw LLM response: {value}")
|
183 |
|
184 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
|
|
227 |
|
228 |
try:
|
229 |
self.logger.info(f"Calling LLM to extract value for field '{field}' from page")
|
230 |
+
|
231 |
+
# Get cost tracker from context
|
232 |
+
cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
|
233 |
+
if cost_tracker:
|
234 |
+
self.logger.info("Cost tracker found in context")
|
235 |
+
else:
|
236 |
+
self.logger.warning("No cost tracker found in context")
|
237 |
+
|
238 |
+
value = self.llm.responses(
|
239 |
+
prompt, temperature=0.0,
|
240 |
+
ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
|
241 |
+
description=f"Field Extraction - {field} (Page)"
|
242 |
+
)
|
243 |
+
|
244 |
+
# Log cost tracking results if available
|
245 |
+
if cost_tracker:
|
246 |
+
self.logger.info(f"Page extraction costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
|
247 |
+
self.logger.info(f"Page extraction cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
|
248 |
+
|
249 |
self.logger.debug(f"Raw LLM response: {value}")
|
250 |
|
251 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
src/agents/pdf_agent.py
CHANGED
@@ -25,4 +25,10 @@ class PDFAgent(BaseAgent):
|
|
25 |
pdf_bytes = pdf_file.read()
|
26 |
text = self._extract_text(pdf_bytes)
|
27 |
ctx["text"] = text
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
return text
|
|
|
25 |
pdf_bytes = pdf_file.read()
|
26 |
text = self._extract_text(pdf_bytes)
|
27 |
ctx["text"] = text
|
28 |
+
|
29 |
+
# After extracting pages
|
30 |
+
num_pages = len(fitz.open(stream=pdf_bytes, filetype="pdf")) # type: ignore[arg-type]
|
31 |
+
if "cost_tracker" in ctx:
|
32 |
+
ctx["cost_tracker"].add_di_pages(num_pages)
|
33 |
+
|
34 |
return text
|
src/app.py
CHANGED
@@ -13,6 +13,7 @@ from datetime import datetime
|
|
13 |
import io
|
14 |
import sys
|
15 |
from io import StringIO
|
|
|
16 |
|
17 |
# Create a custom stream handler to capture logs
|
18 |
class LogCaptureHandler(logging.StreamHandler):
|
@@ -317,7 +318,10 @@ else: # page == "Execution"
|
|
317 |
doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") # type: ignore[arg-type]
|
318 |
preview = "\n".join(page.get_text() for page in doc[:10])[:20000] # first 2 pages, 2k chars
|
319 |
|
320 |
-
|
|
|
|
|
|
|
321 |
plan = planner.build_plan(
|
322 |
pdf_meta={"filename": pdf_file.name},
|
323 |
doc_preview=preview,
|
@@ -329,9 +333,28 @@ else: # page == "Execution"
|
|
329 |
st.markdown("---")
|
330 |
|
331 |
with st.spinner("Executing …"):
|
332 |
-
executor = Executor(settings=settings)
|
333 |
results, logs = executor.run(plan, pdf_file)
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
# Add detailed logging about what executor returned
|
336 |
logger.info(f"Executor returned results of type: {type(results)}")
|
337 |
logger.info(f"Results content: {results}")
|
|
|
13 |
import io
|
14 |
import sys
|
15 |
from io import StringIO
|
16 |
+
from services.cost_tracker import CostTracker
|
17 |
|
18 |
# Create a custom stream handler to capture logs
|
19 |
class LogCaptureHandler(logging.StreamHandler):
|
|
|
318 |
doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") # type: ignore[arg-type]
|
319 |
preview = "\n".join(page.get_text() for page in doc[:10])[:20000] # first 2 pages, 2k chars
|
320 |
|
321 |
+
# Create a cost tracker for this run
|
322 |
+
cost_tracker = CostTracker()
|
323 |
+
|
324 |
+
planner = Planner(cost_tracker=cost_tracker)
|
325 |
plan = planner.build_plan(
|
326 |
pdf_meta={"filename": pdf_file.name},
|
327 |
doc_preview=preview,
|
|
|
333 |
st.markdown("---")
|
334 |
|
335 |
with st.spinner("Executing …"):
|
336 |
+
executor = Executor(settings=settings, cost_tracker=cost_tracker)
|
337 |
results, logs = executor.run(plan, pdf_file)
|
338 |
|
339 |
+
# Get detailed costs
|
340 |
+
costs = executor.cost_tracker.calculate_current_file_costs()
|
341 |
+
model_cost = costs["openai"]["total_cost"]
|
342 |
+
di_cost = costs["document_intelligence"]["total_cost"]
|
343 |
+
|
344 |
+
# Display detailed costs table
|
345 |
+
st.subheader("Detailed Costs")
|
346 |
+
costs_df = executor.cost_tracker.get_detailed_costs_table()
|
347 |
+
st.dataframe(costs_df, use_container_width=True)
|
348 |
+
|
349 |
+
st.info(
|
350 |
+
f"LLM input tokens: {executor.cost_tracker.llm_input_tokens}, "
|
351 |
+
f"LLM output tokens: {executor.cost_tracker.llm_output_tokens}, "
|
352 |
+
f"DI pages: {executor.cost_tracker.di_pages}, "
|
353 |
+
f"Model cost: ${model_cost:.4f}, "
|
354 |
+
f"DI cost: ${di_cost:.4f}, "
|
355 |
+
f"Total cost: ${model_cost + di_cost:.4f}"
|
356 |
+
)
|
357 |
+
|
358 |
# Add detailed logging about what executor returned
|
359 |
logger.info(f"Executor returned results of type: {type(results)}")
|
360 |
logger.info(f"Results content: {results}")
|
src/orchestrator/__pycache__/executor.cpython-312.pyc
CHANGED
Binary files a/src/orchestrator/__pycache__/executor.cpython-312.pyc and b/src/orchestrator/__pycache__/executor.cpython-312.pyc differ
|
|
src/orchestrator/__pycache__/planner.cpython-312.pyc
CHANGED
Binary files a/src/orchestrator/__pycache__/planner.cpython-312.pyc and b/src/orchestrator/__pycache__/planner.cpython-312.pyc differ
|
|
src/orchestrator/executor.py
CHANGED
@@ -16,9 +16,12 @@ from agents.semantic_reasoner import SemanticReasonerAgent
|
|
16 |
from agents.confidence_scorer import ConfidenceScorer
|
17 |
from agents.query_generator import QueryGenerator
|
18 |
|
|
|
|
|
|
|
19 |
|
20 |
class Executor:
|
21 |
-
def __init__(self, settings):
|
22 |
# map name → instance
|
23 |
self.tools = {
|
24 |
"PDFAgent": PDFAgent(),
|
@@ -32,6 +35,7 @@ class Executor:
|
|
32 |
|
33 |
self.logs: List[Dict[str, Any]] = []
|
34 |
self.logger = logging.getLogger(__name__)
|
|
|
35 |
|
36 |
# ---------------------------------------------------------
|
37 |
def run(self, plan: Dict[str, Any], pdf_file) -> tuple[pd.DataFrame, List[Dict[str, Any]]]: # noqa: D401
|
@@ -47,6 +51,7 @@ class Executor:
|
|
47 |
"results": [],
|
48 |
"conf": 1.0,
|
49 |
"pdf_meta": plan.get("pdf_meta", {}), # Include the plan's metadata
|
|
|
50 |
}
|
51 |
|
52 |
try:
|
@@ -112,6 +117,12 @@ class Executor:
|
|
112 |
else:
|
113 |
df = pd.DataFrame()
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
return df, self.logs
|
116 |
|
117 |
# ---------------------------------------------------------
|
|
|
16 |
from agents.confidence_scorer import ConfidenceScorer
|
17 |
from agents.query_generator import QueryGenerator
|
18 |
|
19 |
+
# Add import for CostTracker
|
20 |
+
from services.cost_tracker import CostTracker
|
21 |
+
|
22 |
|
23 |
class Executor:
|
24 |
+
def __init__(self, settings, cost_tracker=None):
|
25 |
# map name → instance
|
26 |
self.tools = {
|
27 |
"PDFAgent": PDFAgent(),
|
|
|
35 |
|
36 |
self.logs: List[Dict[str, Any]] = []
|
37 |
self.logger = logging.getLogger(__name__)
|
38 |
+
self.cost_tracker = cost_tracker or CostTracker()
|
39 |
|
40 |
# ---------------------------------------------------------
|
41 |
def run(self, plan: Dict[str, Any], pdf_file) -> tuple[pd.DataFrame, List[Dict[str, Any]]]: # noqa: D401
|
|
|
51 |
"results": [],
|
52 |
"conf": 1.0,
|
53 |
"pdf_meta": plan.get("pdf_meta", {}), # Include the plan's metadata
|
54 |
+
"cost_tracker": self.cost_tracker,
|
55 |
}
|
56 |
|
57 |
try:
|
|
|
117 |
else:
|
118 |
df = pd.DataFrame()
|
119 |
|
120 |
+
# At the end, log the costs
|
121 |
+
self.logger.info(f"Total LLM input tokens: {self.cost_tracker.llm_input_tokens}")
|
122 |
+
self.logger.info(f"Total LLM output tokens: {self.cost_tracker.llm_output_tokens}")
|
123 |
+
self.logger.info(f"Total DI pages: {self.cost_tracker.di_pages}")
|
124 |
+
self.logger.info(f"Total cost: ${self.cost_tracker.total_cost():.4f}")
|
125 |
+
|
126 |
return df, self.logs
|
127 |
|
128 |
# ---------------------------------------------------------
|
src/orchestrator/planner.py
CHANGED
@@ -11,6 +11,7 @@ import yaml
|
|
11 |
|
12 |
from services.llm_client import LLMClient
|
13 |
from config.settings import settings
|
|
|
14 |
|
15 |
|
16 |
_PROMPTS_FILE = Path(__file__).parent.parent / "config" / "prompts.yaml"
|
@@ -23,9 +24,10 @@ logger = logging.getLogger(__name__)
|
|
23 |
class Planner:
|
24 |
"""Generate a plan with the Responses API; fall back to a static template if parsing fails."""
|
25 |
|
26 |
-
def __init__(self) -> None:
|
27 |
self.prompt_template = self._load_prompt("planner")
|
28 |
self.llm = LLMClient(settings)
|
|
|
29 |
logger.info("Planner initialized with prompt template")
|
30 |
|
31 |
# --------------------------------------------------
|
@@ -53,7 +55,12 @@ class Planner:
|
|
53 |
|
54 |
try:
|
55 |
logger.info("Calling LLM to generate plan")
|
56 |
-
raw = self.llm.responses(
|
|
|
|
|
|
|
|
|
|
|
57 |
logger.debug(f"Raw LLM response: {raw}")
|
58 |
|
59 |
try:
|
|
|
11 |
|
12 |
from services.llm_client import LLMClient
|
13 |
from config.settings import settings
|
14 |
+
from services.cost_tracker import CostTracker
|
15 |
|
16 |
|
17 |
_PROMPTS_FILE = Path(__file__).parent.parent / "config" / "prompts.yaml"
|
|
|
24 |
class Planner:
|
25 |
"""Generate a plan with the Responses API; fall back to a static template if parsing fails."""
|
26 |
|
27 |
+
def __init__(self, cost_tracker=None) -> None:
|
28 |
self.prompt_template = self._load_prompt("planner")
|
29 |
self.llm = LLMClient(settings)
|
30 |
+
self.cost_tracker = cost_tracker or CostTracker()
|
31 |
logger.info("Planner initialized with prompt template")
|
32 |
|
33 |
# --------------------------------------------------
|
|
|
55 |
|
56 |
try:
|
57 |
logger.info("Calling LLM to generate plan")
|
58 |
+
raw = self.llm.responses(
|
59 |
+
prompt,
|
60 |
+
temperature=0.0,
|
61 |
+
ctx={"cost_tracker": self.cost_tracker},
|
62 |
+
description="Execution Plan Generation"
|
63 |
+
)
|
64 |
logger.debug(f"Raw LLM response: {raw}")
|
65 |
|
66 |
try:
|
src/services/__pycache__/llm_client.cpython-312.pyc
CHANGED
Binary files a/src/services/__pycache__/llm_client.cpython-312.pyc and b/src/services/__pycache__/llm_client.cpython-312.pyc differ
|
|
src/services/cost_tracker.py
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Cost tracking service for Azure OpenAI and Document Intelligence."""
|
2 |
+
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from typing import Dict, Optional, List
|
5 |
+
import logging
|
6 |
+
from datetime import datetime
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
@dataclass
|
12 |
+
class TokenCosts:
|
13 |
+
"""Costs per 1M tokens for different models."""
|
14 |
+
GPT41: float = 2.0 # $10 per 1M input tokens for GPT-4.1
|
15 |
+
GPT41_OUTPUT: float = 8.0 # $30 per 1M output tokens for GPT-4.1
|
16 |
+
|
17 |
+
@dataclass
|
18 |
+
class DocumentIntelligenceCosts:
|
19 |
+
"""Costs for Document Intelligence."""
|
20 |
+
READ: float = 1.5 # $1.5 per 1,000 pages
|
21 |
+
LAYOUT: float = 10.0 # $10 per 1,000 pages
|
22 |
+
CUSTOM: float = 30.0 # $30 per 1,000 pages
|
23 |
+
|
24 |
+
@dataclass
|
25 |
+
class LLMCall:
|
26 |
+
"""Represents a single LLM call with its details."""
|
27 |
+
description: str
|
28 |
+
input_tokens: int
|
29 |
+
output_tokens: int
|
30 |
+
timestamp: datetime = datetime.now()
|
31 |
+
|
32 |
+
@property
|
33 |
+
def input_cost(self) -> float:
|
34 |
+
return (self.input_tokens / 1_000_000) * TokenCosts.GPT41
|
35 |
+
|
36 |
+
@property
|
37 |
+
def output_cost(self) -> float:
|
38 |
+
return (self.output_tokens / 1_000_000) * TokenCosts.GPT41_OUTPUT
|
39 |
+
|
40 |
+
@property
|
41 |
+
def total_cost(self) -> float:
|
42 |
+
return self.input_cost + self.output_cost
|
43 |
+
|
44 |
+
class CostTracker:
|
45 |
+
"""Tracks costs for Azure OpenAI and Document Intelligence usage."""
|
46 |
+
|
47 |
+
def __init__(self):
|
48 |
+
self.token_costs = TokenCosts()
|
49 |
+
self.di_costs = DocumentIntelligenceCosts()
|
50 |
+
self.current_file_costs = {
|
51 |
+
"input_tokens": 0,
|
52 |
+
"output_tokens": 0,
|
53 |
+
"di_pages": 0,
|
54 |
+
"di_operations": {} # Track different DI operations
|
55 |
+
}
|
56 |
+
self.total_costs = {
|
57 |
+
"input_tokens": 0,
|
58 |
+
"output_tokens": 0,
|
59 |
+
"di_pages": 0,
|
60 |
+
"di_operations": {}
|
61 |
+
}
|
62 |
+
self.llm_input_tokens = 0
|
63 |
+
self.llm_output_tokens = 0
|
64 |
+
self.di_pages = 0
|
65 |
+
self.llm_calls: List[LLMCall] = [] # Track individual LLM calls
|
66 |
+
|
67 |
+
def reset_current_file(self):
|
68 |
+
"""Reset costs for current file."""
|
69 |
+
self.current_file_costs = {
|
70 |
+
"input_tokens": 0,
|
71 |
+
"output_tokens": 0,
|
72 |
+
"di_pages": 0,
|
73 |
+
"di_operations": {}
|
74 |
+
}
|
75 |
+
self.llm_calls = [] # Reset LLM calls for new file
|
76 |
+
|
77 |
+
def add_tokens(self, input_tokens: int, output_tokens: int, model: str = "GPT41"):
|
78 |
+
"""Add tokens for current file and total."""
|
79 |
+
self.current_file_costs["input_tokens"] += input_tokens
|
80 |
+
self.current_file_costs["output_tokens"] += output_tokens
|
81 |
+
self.total_costs["input_tokens"] += input_tokens
|
82 |
+
self.total_costs["output_tokens"] += output_tokens
|
83 |
+
|
84 |
+
logger.info(f"Added tokens - Input: {input_tokens}, Output: {output_tokens} for model {model}")
|
85 |
+
|
86 |
+
def add_di_operation(self, operation: str, pages: int):
|
87 |
+
"""Add Document Intelligence operation costs."""
|
88 |
+
if operation not in self.current_file_costs["di_operations"]:
|
89 |
+
self.current_file_costs["di_operations"][operation] = 0
|
90 |
+
self.total_costs["di_operations"][operation] = 0
|
91 |
+
|
92 |
+
self.current_file_costs["di_operations"][operation] += pages
|
93 |
+
self.current_file_costs["di_pages"] += pages
|
94 |
+
self.total_costs["di_operations"][operation] += pages
|
95 |
+
self.total_costs["di_pages"] += pages
|
96 |
+
|
97 |
+
logger.info(f"Added DI operation - {operation}: {pages} pages")
|
98 |
+
|
99 |
+
def calculate_current_file_costs(self) -> Dict:
|
100 |
+
"""Calculate costs for current file."""
|
101 |
+
costs = {
|
102 |
+
"openai": {
|
103 |
+
"input_tokens": self.current_file_costs["input_tokens"],
|
104 |
+
"output_tokens": self.current_file_costs["output_tokens"],
|
105 |
+
"input_cost": (self.current_file_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
|
106 |
+
"output_cost": (self.current_file_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
|
107 |
+
"total_cost": 0,
|
108 |
+
"calls": [call.__dict__ for call in self.llm_calls] # Include detailed call information
|
109 |
+
},
|
110 |
+
"document_intelligence": {
|
111 |
+
"total_pages": self.current_file_costs["di_pages"],
|
112 |
+
"operations": {},
|
113 |
+
"total_cost": 0
|
114 |
+
}
|
115 |
+
}
|
116 |
+
|
117 |
+
# Calculate OpenAI total cost
|
118 |
+
costs["openai"]["total_cost"] = costs["openai"]["input_cost"] + costs["openai"]["output_cost"]
|
119 |
+
|
120 |
+
# Calculate Document Intelligence costs
|
121 |
+
for operation, pages in self.current_file_costs["di_operations"].items():
|
122 |
+
cost = (pages / 1000) * getattr(self.di_costs, operation.upper(), self.di_costs.READ)
|
123 |
+
costs["document_intelligence"]["operations"][operation] = {
|
124 |
+
"pages": pages,
|
125 |
+
"cost": cost
|
126 |
+
}
|
127 |
+
costs["document_intelligence"]["total_cost"] += cost
|
128 |
+
|
129 |
+
return costs
|
130 |
+
|
131 |
+
def get_total_costs(self) -> Dict:
|
132 |
+
"""Get total costs across all files."""
|
133 |
+
return {
|
134 |
+
"openai": {
|
135 |
+
"input_tokens": self.total_costs["input_tokens"],
|
136 |
+
"output_tokens": self.total_costs["output_tokens"],
|
137 |
+
"input_cost": (self.total_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
|
138 |
+
"output_cost": (self.total_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
|
139 |
+
"total_cost": 0
|
140 |
+
},
|
141 |
+
"document_intelligence": {
|
142 |
+
"total_pages": self.total_costs["di_pages"],
|
143 |
+
"operations": {},
|
144 |
+
"total_cost": 0
|
145 |
+
}
|
146 |
+
}
|
147 |
+
|
148 |
+
def add_llm_tokens(self, input_tokens, output_tokens, description: str = "LLM Call"):
|
149 |
+
"""Add tokens for an LLM call with a description."""
|
150 |
+
self.llm_input_tokens += input_tokens
|
151 |
+
self.llm_output_tokens += output_tokens
|
152 |
+
# Also update the current file costs
|
153 |
+
self.current_file_costs["input_tokens"] += input_tokens
|
154 |
+
self.current_file_costs["output_tokens"] += output_tokens
|
155 |
+
self.total_costs["input_tokens"] += input_tokens
|
156 |
+
self.total_costs["output_tokens"] += output_tokens
|
157 |
+
# Add to LLM calls list
|
158 |
+
self.llm_calls.append(LLMCall(
|
159 |
+
description=description,
|
160 |
+
input_tokens=input_tokens,
|
161 |
+
output_tokens=output_tokens
|
162 |
+
))
|
163 |
+
|
164 |
+
def add_di_pages(self, num_pages):
|
165 |
+
self.di_pages += num_pages
|
166 |
+
# Also update the current file costs
|
167 |
+
self.current_file_costs["di_pages"] += num_pages
|
168 |
+
self.total_costs["di_pages"] += num_pages
|
169 |
+
# Add to LAYOUT operation by default since we're using layout analysis
|
170 |
+
if "LAYOUT" not in self.current_file_costs["di_operations"]:
|
171 |
+
self.current_file_costs["di_operations"]["LAYOUT"] = 0
|
172 |
+
self.total_costs["di_operations"]["LAYOUT"] = 0
|
173 |
+
self.current_file_costs["di_operations"]["LAYOUT"] += num_pages
|
174 |
+
self.total_costs["di_operations"]["LAYOUT"] += num_pages
|
175 |
+
|
176 |
+
def total_cost(self):
|
177 |
+
"""Calculate total cost using the same pricing as calculate_current_file_costs."""
|
178 |
+
costs = self.calculate_current_file_costs()
|
179 |
+
return costs["openai"]["total_cost"] + costs["document_intelligence"]["total_cost"]
|
180 |
+
|
181 |
+
def get_detailed_costs_table(self) -> pd.DataFrame:
|
182 |
+
"""Return a DataFrame of detailed costs."""
|
183 |
+
if not self.llm_calls:
|
184 |
+
return pd.DataFrame()
|
185 |
+
|
186 |
+
# Create list of dictionaries for DataFrame
|
187 |
+
rows = []
|
188 |
+
for call in self.llm_calls:
|
189 |
+
rows.append({
|
190 |
+
'Description': call.description,
|
191 |
+
'Input Tokens': call.input_tokens,
|
192 |
+
'Output Tokens': call.output_tokens,
|
193 |
+
'Input Cost': f"${call.input_cost:.4f}",
|
194 |
+
'Output Cost': f"${call.output_cost:.4f}",
|
195 |
+
'Total Cost': f"${call.total_cost:.4f}"
|
196 |
+
})
|
197 |
+
|
198 |
+
# Calculate totals
|
199 |
+
total_input = sum(call.input_tokens for call in self.llm_calls)
|
200 |
+
total_output = sum(call.output_tokens for call in self.llm_calls)
|
201 |
+
total_input_cost = sum(call.input_cost for call in self.llm_calls)
|
202 |
+
total_output_cost = sum(call.output_cost for call in self.llm_calls)
|
203 |
+
total_cost = total_input_cost + total_output_cost
|
204 |
+
|
205 |
+
# Add total row
|
206 |
+
rows.append({
|
207 |
+
'Description': 'TOTAL',
|
208 |
+
'Input Tokens': total_input,
|
209 |
+
'Output Tokens': total_output,
|
210 |
+
'Input Cost': f"${total_input_cost:.4f}",
|
211 |
+
'Output Cost': f"${total_output_cost:.4f}",
|
212 |
+
'Total Cost': f"${total_cost:.4f}"
|
213 |
+
})
|
214 |
+
|
215 |
+
# Create DataFrame
|
216 |
+
df = pd.DataFrame(rows)
|
217 |
+
|
218 |
+
# Set column order
|
219 |
+
df = df[['Description', 'Input Tokens', 'Output Tokens', 'Input Cost', 'Output Cost', 'Total Cost']]
|
220 |
+
|
221 |
+
return df
|
src/services/llm_client.py
CHANGED
@@ -35,12 +35,15 @@ class LLMClient:
|
|
35 |
logger.info(f"API Key length: {len(openai.api_key) if openai.api_key else 0}")
|
36 |
|
37 |
# --------------------------------------------------
|
38 |
-
def responses(self, prompt: str, tools: List[dict] | None = None, **kwargs: Any) -> str:
|
39 |
"""Call the Responses API and return the assistant content as string."""
|
40 |
logger = logging.getLogger(__name__)
|
41 |
logger.info(f"Making request with API version: {openai.api_version}")
|
42 |
logger.info(f"Request URL will be: {openai.api_base}/openai/responses?api-version={openai.api_version}")
|
43 |
|
|
|
|
|
|
|
44 |
resp = openai.responses.create(
|
45 |
input=prompt,
|
46 |
model=self._deployment,
|
@@ -49,7 +52,31 @@ class LLMClient:
|
|
49 |
)
|
50 |
# Log the raw response for debugging
|
51 |
logging.debug(f"LLM raw response: {resp}")
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# Extract the text content from the response
|
54 |
if hasattr(resp, "output") and isinstance(resp.output, list):
|
55 |
# Handle list of ResponseOutputMessage objects
|
|
|
35 |
logger.info(f"API Key length: {len(openai.api_key) if openai.api_key else 0}")
|
36 |
|
37 |
# --------------------------------------------------
|
38 |
+
def responses(self, prompt: str, tools: List[dict] | None = None, description: str = "LLM Call", **kwargs: Any) -> str:
|
39 |
"""Call the Responses API and return the assistant content as string."""
|
40 |
logger = logging.getLogger(__name__)
|
41 |
logger.info(f"Making request with API version: {openai.api_version}")
|
42 |
logger.info(f"Request URL will be: {openai.api_base}/openai/responses?api-version={openai.api_version}")
|
43 |
|
44 |
+
# Remove ctx from kwargs before passing to openai
|
45 |
+
ctx = kwargs.pop("ctx", None)
|
46 |
+
|
47 |
resp = openai.responses.create(
|
48 |
input=prompt,
|
49 |
model=self._deployment,
|
|
|
52 |
)
|
53 |
# Log the raw response for debugging
|
54 |
logging.debug(f"LLM raw response: {resp}")
|
55 |
+
|
56 |
+
# --- Cost tracking: must be BEFORE any return! ---
|
57 |
+
logger.info(f"LLMClient.responses: ctx is {ctx}")
|
58 |
+
if ctx and "cost_tracker" in ctx:
|
59 |
+
logger.info(f"LLMClient.responses: cost_tracker is {ctx['cost_tracker']}")
|
60 |
+
usage = getattr(resp, "usage", None)
|
61 |
+
if usage:
|
62 |
+
logger.info(f"LLMClient.responses: usage is {usage}")
|
63 |
+
ctx["cost_tracker"].add_llm_tokens(
|
64 |
+
input_tokens=getattr(usage, "input_tokens", 0),
|
65 |
+
output_tokens=getattr(usage, "output_tokens", 0),
|
66 |
+
description=description
|
67 |
+
)
|
68 |
+
logger.info(f"LLMClient.responses: prompt: {prompt[:200]}...") # Log first 200 chars
|
69 |
+
logger.info(f"LLMClient.responses: resp: {str(resp)[:200]}...") # Log first 200 chars
|
70 |
+
if usage:
|
71 |
+
logger.info(f"LLMClient.responses: usage.input_tokens={getattr(usage, 'input_tokens', None)}, usage.output_tokens={getattr(usage, 'output_tokens', None)}, usage.total_tokens={getattr(usage, 'total_tokens', None)}")
|
72 |
+
else:
|
73 |
+
# Fallback: estimate tokens (very rough)
|
74 |
+
ctx["cost_tracker"].add_llm_tokens(
|
75 |
+
input_tokens=len(prompt.split()),
|
76 |
+
output_tokens=len(str(resp).split()),
|
77 |
+
description=description
|
78 |
+
)
|
79 |
+
|
80 |
# Extract the text content from the response
|
81 |
if hasattr(resp, "output") and isinstance(resp.output, list):
|
82 |
# Handle list of ResponseOutputMessage objects
|