levalencia commited on
Commit
966ffcd
·
1 Parent(s): 2eac01a

Add cost tracking functionality across various components, including Executor, Planner, and FieldMapperAgent. Integrate CostTracker to monitor LLM and document intelligence costs, enhancing logging for cost-related metrics and providing detailed cost breakdowns in the user interface.

Browse files
src/agents/__pycache__/field_mapper_agent.cpython-312.pyc CHANGED
Binary files a/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc and b/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc differ
 
src/agents/__pycache__/pdf_agent.cpython-312.pyc CHANGED
Binary files a/src/agents/__pycache__/pdf_agent.cpython-312.pyc and b/src/agents/__pycache__/pdf_agent.cpython-312.pyc differ
 
src/agents/__pycache__/table_agent.cpython-312.pyc CHANGED
Binary files a/src/agents/__pycache__/table_agent.cpython-312.pyc and b/src/agents/__pycache__/table_agent.cpython-312.pyc differ
 
src/agents/field_mapper_agent.py CHANGED
@@ -35,7 +35,25 @@ class FieldMapperAgent(BaseAgent):
35
  try:
36
  self.logger.info("Inferring document context...")
37
  self.logger.debug(f"Using text preview: {text[:500]}...")
38
- context = self.llm.responses(prompt, temperature=0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  self.logger.info(f"Inferred context: {context}")
40
  return context
41
  except Exception as e:
@@ -142,7 +160,25 @@ class FieldMapperAgent(BaseAgent):
142
  try:
143
  self.logger.info(f"Calling LLM to extract value for field '{field}'")
144
  self.logger.debug(f"Using prompt: {prompt}")
145
- value = self.llm.responses(prompt, temperature=0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  self.logger.debug(f"Raw LLM response: {value}")
147
 
148
  if value and value.lower() not in ["none", "null", "n/a"]:
@@ -191,7 +227,25 @@ class FieldMapperAgent(BaseAgent):
191
 
192
  try:
193
  self.logger.info(f"Calling LLM to extract value for field '{field}' from page")
194
- value = self.llm.responses(prompt, temperature=0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  self.logger.debug(f"Raw LLM response: {value}")
196
 
197
  if value and value.lower() not in ["none", "null", "n/a"]:
 
35
  try:
36
  self.logger.info("Inferring document context...")
37
  self.logger.debug(f"Using text preview: {text[:500]}...")
38
+
39
+ # Get cost tracker from context
40
+ cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
41
+ if cost_tracker:
42
+ self.logger.info("Cost tracker found in context")
43
+ else:
44
+ self.logger.warning("No cost tracker found in context")
45
+
46
+ context = self.llm.responses(
47
+ prompt, temperature=0.0,
48
+ ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
49
+ description="Document Context Inference"
50
+ )
51
+
52
+ # Log cost tracking results if available
53
+ if cost_tracker:
54
+ self.logger.info(f"Context inference costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
55
+ self.logger.info(f"Context inference cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
56
+
57
  self.logger.info(f"Inferred context: {context}")
58
  return context
59
  except Exception as e:
 
160
  try:
161
  self.logger.info(f"Calling LLM to extract value for field '{field}'")
162
  self.logger.debug(f"Using prompt: {prompt}")
163
+
164
+ # Get cost tracker from context
165
+ cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
166
+ if cost_tracker:
167
+ self.logger.info("Cost tracker found in context")
168
+ else:
169
+ self.logger.warning("No cost tracker found in context")
170
+
171
+ value = self.llm.responses(
172
+ prompt, temperature=0.0,
173
+ ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
174
+ description=f"Field Extraction - {field} (Search)"
175
+ )
176
+
177
+ # Log cost tracking results if available
178
+ if cost_tracker:
179
+ self.logger.info(f"Field extraction costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
180
+ self.logger.info(f"Field extraction cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
181
+
182
  self.logger.debug(f"Raw LLM response: {value}")
183
 
184
  if value and value.lower() not in ["none", "null", "n/a"]:
 
227
 
228
  try:
229
  self.logger.info(f"Calling LLM to extract value for field '{field}' from page")
230
+
231
+ # Get cost tracker from context
232
+ cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
233
+ if cost_tracker:
234
+ self.logger.info("Cost tracker found in context")
235
+ else:
236
+ self.logger.warning("No cost tracker found in context")
237
+
238
+ value = self.llm.responses(
239
+ prompt, temperature=0.0,
240
+ ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
241
+ description=f"Field Extraction - {field} (Page)"
242
+ )
243
+
244
+ # Log cost tracking results if available
245
+ if cost_tracker:
246
+ self.logger.info(f"Page extraction costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
247
+ self.logger.info(f"Page extraction cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
248
+
249
  self.logger.debug(f"Raw LLM response: {value}")
250
 
251
  if value and value.lower() not in ["none", "null", "n/a"]:
src/agents/pdf_agent.py CHANGED
@@ -25,4 +25,10 @@ class PDFAgent(BaseAgent):
25
  pdf_bytes = pdf_file.read()
26
  text = self._extract_text(pdf_bytes)
27
  ctx["text"] = text
 
 
 
 
 
 
28
  return text
 
25
  pdf_bytes = pdf_file.read()
26
  text = self._extract_text(pdf_bytes)
27
  ctx["text"] = text
28
+
29
+ # After extracting pages
30
+ num_pages = len(fitz.open(stream=pdf_bytes, filetype="pdf")) # type: ignore[arg-type]
31
+ if "cost_tracker" in ctx:
32
+ ctx["cost_tracker"].add_di_pages(num_pages)
33
+
34
  return text
src/app.py CHANGED
@@ -13,6 +13,7 @@ from datetime import datetime
13
  import io
14
  import sys
15
  from io import StringIO
 
16
 
17
  # Create a custom stream handler to capture logs
18
  class LogCaptureHandler(logging.StreamHandler):
@@ -317,7 +318,10 @@ else: # page == "Execution"
317
  doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") # type: ignore[arg-type]
318
  preview = "\n".join(page.get_text() for page in doc[:10])[:20000] # first 2 pages, 2k chars
319
 
320
- planner = Planner()
 
 
 
321
  plan = planner.build_plan(
322
  pdf_meta={"filename": pdf_file.name},
323
  doc_preview=preview,
@@ -329,9 +333,28 @@ else: # page == "Execution"
329
  st.markdown("---")
330
 
331
  with st.spinner("Executing …"):
332
- executor = Executor(settings=settings)
333
  results, logs = executor.run(plan, pdf_file)
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  # Add detailed logging about what executor returned
336
  logger.info(f"Executor returned results of type: {type(results)}")
337
  logger.info(f"Results content: {results}")
 
13
  import io
14
  import sys
15
  from io import StringIO
16
+ from services.cost_tracker import CostTracker
17
 
18
  # Create a custom stream handler to capture logs
19
  class LogCaptureHandler(logging.StreamHandler):
 
318
  doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") # type: ignore[arg-type]
319
  preview = "\n".join(page.get_text() for page in doc[:10])[:20000] # first 2 pages, 2k chars
320
 
321
+ # Create a cost tracker for this run
322
+ cost_tracker = CostTracker()
323
+
324
+ planner = Planner(cost_tracker=cost_tracker)
325
  plan = planner.build_plan(
326
  pdf_meta={"filename": pdf_file.name},
327
  doc_preview=preview,
 
333
  st.markdown("---")
334
 
335
  with st.spinner("Executing …"):
336
+ executor = Executor(settings=settings, cost_tracker=cost_tracker)
337
  results, logs = executor.run(plan, pdf_file)
338
 
339
+ # Get detailed costs
340
+ costs = executor.cost_tracker.calculate_current_file_costs()
341
+ model_cost = costs["openai"]["total_cost"]
342
+ di_cost = costs["document_intelligence"]["total_cost"]
343
+
344
+ # Display detailed costs table
345
+ st.subheader("Detailed Costs")
346
+ costs_df = executor.cost_tracker.get_detailed_costs_table()
347
+ st.dataframe(costs_df, use_container_width=True)
348
+
349
+ st.info(
350
+ f"LLM input tokens: {executor.cost_tracker.llm_input_tokens}, "
351
+ f"LLM output tokens: {executor.cost_tracker.llm_output_tokens}, "
352
+ f"DI pages: {executor.cost_tracker.di_pages}, "
353
+ f"Model cost: ${model_cost:.4f}, "
354
+ f"DI cost: ${di_cost:.4f}, "
355
+ f"Total cost: ${model_cost + di_cost:.4f}"
356
+ )
357
+
358
  # Add detailed logging about what executor returned
359
  logger.info(f"Executor returned results of type: {type(results)}")
360
  logger.info(f"Results content: {results}")
src/orchestrator/__pycache__/executor.cpython-312.pyc CHANGED
Binary files a/src/orchestrator/__pycache__/executor.cpython-312.pyc and b/src/orchestrator/__pycache__/executor.cpython-312.pyc differ
 
src/orchestrator/__pycache__/planner.cpython-312.pyc CHANGED
Binary files a/src/orchestrator/__pycache__/planner.cpython-312.pyc and b/src/orchestrator/__pycache__/planner.cpython-312.pyc differ
 
src/orchestrator/executor.py CHANGED
@@ -16,9 +16,12 @@ from agents.semantic_reasoner import SemanticReasonerAgent
16
  from agents.confidence_scorer import ConfidenceScorer
17
  from agents.query_generator import QueryGenerator
18
 
 
 
 
19
 
20
  class Executor:
21
- def __init__(self, settings):
22
  # map name → instance
23
  self.tools = {
24
  "PDFAgent": PDFAgent(),
@@ -32,6 +35,7 @@ class Executor:
32
 
33
  self.logs: List[Dict[str, Any]] = []
34
  self.logger = logging.getLogger(__name__)
 
35
 
36
  # ---------------------------------------------------------
37
  def run(self, plan: Dict[str, Any], pdf_file) -> tuple[pd.DataFrame, List[Dict[str, Any]]]: # noqa: D401
@@ -47,6 +51,7 @@ class Executor:
47
  "results": [],
48
  "conf": 1.0,
49
  "pdf_meta": plan.get("pdf_meta", {}), # Include the plan's metadata
 
50
  }
51
 
52
  try:
@@ -112,6 +117,12 @@ class Executor:
112
  else:
113
  df = pd.DataFrame()
114
 
 
 
 
 
 
 
115
  return df, self.logs
116
 
117
  # ---------------------------------------------------------
 
16
  from agents.confidence_scorer import ConfidenceScorer
17
  from agents.query_generator import QueryGenerator
18
 
19
+ # Add import for CostTracker
20
+ from services.cost_tracker import CostTracker
21
+
22
 
23
  class Executor:
24
+ def __init__(self, settings, cost_tracker=None):
25
  # map name → instance
26
  self.tools = {
27
  "PDFAgent": PDFAgent(),
 
35
 
36
  self.logs: List[Dict[str, Any]] = []
37
  self.logger = logging.getLogger(__name__)
38
+ self.cost_tracker = cost_tracker or CostTracker()
39
 
40
  # ---------------------------------------------------------
41
  def run(self, plan: Dict[str, Any], pdf_file) -> tuple[pd.DataFrame, List[Dict[str, Any]]]: # noqa: D401
 
51
  "results": [],
52
  "conf": 1.0,
53
  "pdf_meta": plan.get("pdf_meta", {}), # Include the plan's metadata
54
+ "cost_tracker": self.cost_tracker,
55
  }
56
 
57
  try:
 
117
  else:
118
  df = pd.DataFrame()
119
 
120
+ # At the end, log the costs
121
+ self.logger.info(f"Total LLM input tokens: {self.cost_tracker.llm_input_tokens}")
122
+ self.logger.info(f"Total LLM output tokens: {self.cost_tracker.llm_output_tokens}")
123
+ self.logger.info(f"Total DI pages: {self.cost_tracker.di_pages}")
124
+ self.logger.info(f"Total cost: ${self.cost_tracker.total_cost():.4f}")
125
+
126
  return df, self.logs
127
 
128
  # ---------------------------------------------------------
src/orchestrator/planner.py CHANGED
@@ -11,6 +11,7 @@ import yaml
11
 
12
  from services.llm_client import LLMClient
13
  from config.settings import settings
 
14
 
15
 
16
  _PROMPTS_FILE = Path(__file__).parent.parent / "config" / "prompts.yaml"
@@ -23,9 +24,10 @@ logger = logging.getLogger(__name__)
23
  class Planner:
24
  """Generate a plan with the Responses API; fall back to a static template if parsing fails."""
25
 
26
- def __init__(self) -> None:
27
  self.prompt_template = self._load_prompt("planner")
28
  self.llm = LLMClient(settings)
 
29
  logger.info("Planner initialized with prompt template")
30
 
31
  # --------------------------------------------------
@@ -53,7 +55,12 @@ class Planner:
53
 
54
  try:
55
  logger.info("Calling LLM to generate plan")
56
- raw = self.llm.responses(prompt, temperature=0.0)
 
 
 
 
 
57
  logger.debug(f"Raw LLM response: {raw}")
58
 
59
  try:
 
11
 
12
  from services.llm_client import LLMClient
13
  from config.settings import settings
14
+ from services.cost_tracker import CostTracker
15
 
16
 
17
  _PROMPTS_FILE = Path(__file__).parent.parent / "config" / "prompts.yaml"
 
24
  class Planner:
25
  """Generate a plan with the Responses API; fall back to a static template if parsing fails."""
26
 
27
+ def __init__(self, cost_tracker=None) -> None:
28
  self.prompt_template = self._load_prompt("planner")
29
  self.llm = LLMClient(settings)
30
+ self.cost_tracker = cost_tracker or CostTracker()
31
  logger.info("Planner initialized with prompt template")
32
 
33
  # --------------------------------------------------
 
55
 
56
  try:
57
  logger.info("Calling LLM to generate plan")
58
+ raw = self.llm.responses(
59
+ prompt,
60
+ temperature=0.0,
61
+ ctx={"cost_tracker": self.cost_tracker},
62
+ description="Execution Plan Generation"
63
+ )
64
  logger.debug(f"Raw LLM response: {raw}")
65
 
66
  try:
src/services/__pycache__/llm_client.cpython-312.pyc CHANGED
Binary files a/src/services/__pycache__/llm_client.cpython-312.pyc and b/src/services/__pycache__/llm_client.cpython-312.pyc differ
 
src/services/cost_tracker.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cost tracking service for Azure OpenAI and Document Intelligence."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Optional, List
5
+ import logging
6
+ from datetime import datetime
7
+ import pandas as pd
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ @dataclass
12
+ class TokenCosts:
13
+ """Costs per 1M tokens for different models."""
14
+ GPT41: float = 2.0 # $10 per 1M input tokens for GPT-4.1
15
+ GPT41_OUTPUT: float = 8.0 # $30 per 1M output tokens for GPT-4.1
16
+
17
+ @dataclass
18
+ class DocumentIntelligenceCosts:
19
+ """Costs for Document Intelligence."""
20
+ READ: float = 1.5 # $1.5 per 1,000 pages
21
+ LAYOUT: float = 10.0 # $10 per 1,000 pages
22
+ CUSTOM: float = 30.0 # $30 per 1,000 pages
23
+
24
+ @dataclass
25
+ class LLMCall:
26
+ """Represents a single LLM call with its details."""
27
+ description: str
28
+ input_tokens: int
29
+ output_tokens: int
30
+ timestamp: datetime = datetime.now()
31
+
32
+ @property
33
+ def input_cost(self) -> float:
34
+ return (self.input_tokens / 1_000_000) * TokenCosts.GPT41
35
+
36
+ @property
37
+ def output_cost(self) -> float:
38
+ return (self.output_tokens / 1_000_000) * TokenCosts.GPT41_OUTPUT
39
+
40
+ @property
41
+ def total_cost(self) -> float:
42
+ return self.input_cost + self.output_cost
43
+
44
+ class CostTracker:
45
+ """Tracks costs for Azure OpenAI and Document Intelligence usage."""
46
+
47
+ def __init__(self):
48
+ self.token_costs = TokenCosts()
49
+ self.di_costs = DocumentIntelligenceCosts()
50
+ self.current_file_costs = {
51
+ "input_tokens": 0,
52
+ "output_tokens": 0,
53
+ "di_pages": 0,
54
+ "di_operations": {} # Track different DI operations
55
+ }
56
+ self.total_costs = {
57
+ "input_tokens": 0,
58
+ "output_tokens": 0,
59
+ "di_pages": 0,
60
+ "di_operations": {}
61
+ }
62
+ self.llm_input_tokens = 0
63
+ self.llm_output_tokens = 0
64
+ self.di_pages = 0
65
+ self.llm_calls: List[LLMCall] = [] # Track individual LLM calls
66
+
67
+ def reset_current_file(self):
68
+ """Reset costs for current file."""
69
+ self.current_file_costs = {
70
+ "input_tokens": 0,
71
+ "output_tokens": 0,
72
+ "di_pages": 0,
73
+ "di_operations": {}
74
+ }
75
+ self.llm_calls = [] # Reset LLM calls for new file
76
+
77
+ def add_tokens(self, input_tokens: int, output_tokens: int, model: str = "GPT41"):
78
+ """Add tokens for current file and total."""
79
+ self.current_file_costs["input_tokens"] += input_tokens
80
+ self.current_file_costs["output_tokens"] += output_tokens
81
+ self.total_costs["input_tokens"] += input_tokens
82
+ self.total_costs["output_tokens"] += output_tokens
83
+
84
+ logger.info(f"Added tokens - Input: {input_tokens}, Output: {output_tokens} for model {model}")
85
+
86
+ def add_di_operation(self, operation: str, pages: int):
87
+ """Add Document Intelligence operation costs."""
88
+ if operation not in self.current_file_costs["di_operations"]:
89
+ self.current_file_costs["di_operations"][operation] = 0
90
+ self.total_costs["di_operations"][operation] = 0
91
+
92
+ self.current_file_costs["di_operations"][operation] += pages
93
+ self.current_file_costs["di_pages"] += pages
94
+ self.total_costs["di_operations"][operation] += pages
95
+ self.total_costs["di_pages"] += pages
96
+
97
+ logger.info(f"Added DI operation - {operation}: {pages} pages")
98
+
99
+ def calculate_current_file_costs(self) -> Dict:
100
+ """Calculate costs for current file."""
101
+ costs = {
102
+ "openai": {
103
+ "input_tokens": self.current_file_costs["input_tokens"],
104
+ "output_tokens": self.current_file_costs["output_tokens"],
105
+ "input_cost": (self.current_file_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
106
+ "output_cost": (self.current_file_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
107
+ "total_cost": 0,
108
+ "calls": [call.__dict__ for call in self.llm_calls] # Include detailed call information
109
+ },
110
+ "document_intelligence": {
111
+ "total_pages": self.current_file_costs["di_pages"],
112
+ "operations": {},
113
+ "total_cost": 0
114
+ }
115
+ }
116
+
117
+ # Calculate OpenAI total cost
118
+ costs["openai"]["total_cost"] = costs["openai"]["input_cost"] + costs["openai"]["output_cost"]
119
+
120
+ # Calculate Document Intelligence costs
121
+ for operation, pages in self.current_file_costs["di_operations"].items():
122
+ cost = (pages / 1000) * getattr(self.di_costs, operation.upper(), self.di_costs.READ)
123
+ costs["document_intelligence"]["operations"][operation] = {
124
+ "pages": pages,
125
+ "cost": cost
126
+ }
127
+ costs["document_intelligence"]["total_cost"] += cost
128
+
129
+ return costs
130
+
131
+ def get_total_costs(self) -> Dict:
132
+ """Get total costs across all files."""
133
+ return {
134
+ "openai": {
135
+ "input_tokens": self.total_costs["input_tokens"],
136
+ "output_tokens": self.total_costs["output_tokens"],
137
+ "input_cost": (self.total_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
138
+ "output_cost": (self.total_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
139
+ "total_cost": 0
140
+ },
141
+ "document_intelligence": {
142
+ "total_pages": self.total_costs["di_pages"],
143
+ "operations": {},
144
+ "total_cost": 0
145
+ }
146
+ }
147
+
148
+ def add_llm_tokens(self, input_tokens, output_tokens, description: str = "LLM Call"):
149
+ """Add tokens for an LLM call with a description."""
150
+ self.llm_input_tokens += input_tokens
151
+ self.llm_output_tokens += output_tokens
152
+ # Also update the current file costs
153
+ self.current_file_costs["input_tokens"] += input_tokens
154
+ self.current_file_costs["output_tokens"] += output_tokens
155
+ self.total_costs["input_tokens"] += input_tokens
156
+ self.total_costs["output_tokens"] += output_tokens
157
+ # Add to LLM calls list
158
+ self.llm_calls.append(LLMCall(
159
+ description=description,
160
+ input_tokens=input_tokens,
161
+ output_tokens=output_tokens
162
+ ))
163
+
164
+ def add_di_pages(self, num_pages):
165
+ self.di_pages += num_pages
166
+ # Also update the current file costs
167
+ self.current_file_costs["di_pages"] += num_pages
168
+ self.total_costs["di_pages"] += num_pages
169
+ # Add to LAYOUT operation by default since we're using layout analysis
170
+ if "LAYOUT" not in self.current_file_costs["di_operations"]:
171
+ self.current_file_costs["di_operations"]["LAYOUT"] = 0
172
+ self.total_costs["di_operations"]["LAYOUT"] = 0
173
+ self.current_file_costs["di_operations"]["LAYOUT"] += num_pages
174
+ self.total_costs["di_operations"]["LAYOUT"] += num_pages
175
+
176
+ def total_cost(self):
177
+ """Calculate total cost using the same pricing as calculate_current_file_costs."""
178
+ costs = self.calculate_current_file_costs()
179
+ return costs["openai"]["total_cost"] + costs["document_intelligence"]["total_cost"]
180
+
181
+ def get_detailed_costs_table(self) -> pd.DataFrame:
182
+ """Return a DataFrame of detailed costs."""
183
+ if not self.llm_calls:
184
+ return pd.DataFrame()
185
+
186
+ # Create list of dictionaries for DataFrame
187
+ rows = []
188
+ for call in self.llm_calls:
189
+ rows.append({
190
+ 'Description': call.description,
191
+ 'Input Tokens': call.input_tokens,
192
+ 'Output Tokens': call.output_tokens,
193
+ 'Input Cost': f"${call.input_cost:.4f}",
194
+ 'Output Cost': f"${call.output_cost:.4f}",
195
+ 'Total Cost': f"${call.total_cost:.4f}"
196
+ })
197
+
198
+ # Calculate totals
199
+ total_input = sum(call.input_tokens for call in self.llm_calls)
200
+ total_output = sum(call.output_tokens for call in self.llm_calls)
201
+ total_input_cost = sum(call.input_cost for call in self.llm_calls)
202
+ total_output_cost = sum(call.output_cost for call in self.llm_calls)
203
+ total_cost = total_input_cost + total_output_cost
204
+
205
+ # Add total row
206
+ rows.append({
207
+ 'Description': 'TOTAL',
208
+ 'Input Tokens': total_input,
209
+ 'Output Tokens': total_output,
210
+ 'Input Cost': f"${total_input_cost:.4f}",
211
+ 'Output Cost': f"${total_output_cost:.4f}",
212
+ 'Total Cost': f"${total_cost:.4f}"
213
+ })
214
+
215
+ # Create DataFrame
216
+ df = pd.DataFrame(rows)
217
+
218
+ # Set column order
219
+ df = df[['Description', 'Input Tokens', 'Output Tokens', 'Input Cost', 'Output Cost', 'Total Cost']]
220
+
221
+ return df
src/services/llm_client.py CHANGED
@@ -35,12 +35,15 @@ class LLMClient:
35
  logger.info(f"API Key length: {len(openai.api_key) if openai.api_key else 0}")
36
 
37
  # --------------------------------------------------
38
- def responses(self, prompt: str, tools: List[dict] | None = None, **kwargs: Any) -> str:
39
  """Call the Responses API and return the assistant content as string."""
40
  logger = logging.getLogger(__name__)
41
  logger.info(f"Making request with API version: {openai.api_version}")
42
  logger.info(f"Request URL will be: {openai.api_base}/openai/responses?api-version={openai.api_version}")
43
 
 
 
 
44
  resp = openai.responses.create(
45
  input=prompt,
46
  model=self._deployment,
@@ -49,7 +52,31 @@ class LLMClient:
49
  )
50
  # Log the raw response for debugging
51
  logging.debug(f"LLM raw response: {resp}")
52
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Extract the text content from the response
54
  if hasattr(resp, "output") and isinstance(resp.output, list):
55
  # Handle list of ResponseOutputMessage objects
 
35
  logger.info(f"API Key length: {len(openai.api_key) if openai.api_key else 0}")
36
 
37
  # --------------------------------------------------
38
+ def responses(self, prompt: str, tools: List[dict] | None = None, description: str = "LLM Call", **kwargs: Any) -> str:
39
  """Call the Responses API and return the assistant content as string."""
40
  logger = logging.getLogger(__name__)
41
  logger.info(f"Making request with API version: {openai.api_version}")
42
  logger.info(f"Request URL will be: {openai.api_base}/openai/responses?api-version={openai.api_version}")
43
 
44
+ # Remove ctx from kwargs before passing to openai
45
+ ctx = kwargs.pop("ctx", None)
46
+
47
  resp = openai.responses.create(
48
  input=prompt,
49
  model=self._deployment,
 
52
  )
53
  # Log the raw response for debugging
54
  logging.debug(f"LLM raw response: {resp}")
55
+
56
+ # --- Cost tracking: must be BEFORE any return! ---
57
+ logger.info(f"LLMClient.responses: ctx is {ctx}")
58
+ if ctx and "cost_tracker" in ctx:
59
+ logger.info(f"LLMClient.responses: cost_tracker is {ctx['cost_tracker']}")
60
+ usage = getattr(resp, "usage", None)
61
+ if usage:
62
+ logger.info(f"LLMClient.responses: usage is {usage}")
63
+ ctx["cost_tracker"].add_llm_tokens(
64
+ input_tokens=getattr(usage, "input_tokens", 0),
65
+ output_tokens=getattr(usage, "output_tokens", 0),
66
+ description=description
67
+ )
68
+ logger.info(f"LLMClient.responses: prompt: {prompt[:200]}...") # Log first 200 chars
69
+ logger.info(f"LLMClient.responses: resp: {str(resp)[:200]}...") # Log first 200 chars
70
+ if usage:
71
+ logger.info(f"LLMClient.responses: usage.input_tokens={getattr(usage, 'input_tokens', None)}, usage.output_tokens={getattr(usage, 'output_tokens', None)}, usage.total_tokens={getattr(usage, 'total_tokens', None)}")
72
+ else:
73
+ # Fallback: estimate tokens (very rough)
74
+ ctx["cost_tracker"].add_llm_tokens(
75
+ input_tokens=len(prompt.split()),
76
+ output_tokens=len(str(resp).split()),
77
+ description=description
78
+ )
79
+
80
  # Extract the text content from the response
81
  if hasattr(resp, "output") and isinstance(resp.output, list):
82
  # Handle list of ResponseOutputMessage objects