bibibi12345 commited on
Commit
ebec74a
·
1 Parent(s): 9f566ef
Files changed (2) hide show
  1. app/api_helpers.py +27 -0
  2. app/openai_handler.py +31 -8
app/api_helpers.py CHANGED
@@ -91,6 +91,33 @@ class StreamingReasoningProcessor:
91
  self.inside_tag = False
92
 
93
  return processed_content, current_reasoning
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
 
96
  def process_streaming_content_with_reasoning_tags(
 
91
  self.inside_tag = False
92
 
93
  return processed_content, current_reasoning
94
+
95
+ def flush_remaining(self) -> tuple[str, str]:
96
+ """
97
+ Flush any remaining content in the buffer when the stream ends.
98
+
99
+ Returns:
100
+ A tuple of:
101
+ - remaining_content: Any content that was buffered but not yet output
102
+ - remaining_reasoning: Any incomplete reasoning if we were inside a tag
103
+ """
104
+ remaining_content = ""
105
+ remaining_reasoning = ""
106
+
107
+ if self.tag_buffer and not self.inside_tag:
108
+ # If we have buffered content and we're not inside a tag,
109
+ # it's safe to output all of it
110
+ remaining_content = self.tag_buffer
111
+ self.tag_buffer = ""
112
+ elif self.inside_tag:
113
+ # If we're inside a tag when the stream ends, we have an unclosed tag
114
+ # Return the partial content as regular content (including the opening tag)
115
+ remaining_content = f"<{self.tag_name}>{self.reasoning_buffer}{self.tag_buffer}"
116
+ self.reasoning_buffer = ""
117
+ self.tag_buffer = ""
118
+ self.inside_tag = False
119
+
120
+ return remaining_content, remaining_reasoning
121
 
122
 
123
  def process_streaming_content_with_reasoning_tags(
app/openai_handler.py CHANGED
@@ -120,8 +120,10 @@ class OpenAIDirectHandler:
120
 
121
  # Create processor for tag-based extraction across chunks
122
  reasoning_processor = StreamingReasoningProcessor(VERTEX_REASONING_TAG)
 
123
 
124
  async for chunk in stream_response:
 
125
  try:
126
  chunk_as_dict = chunk.model_dump(exclude_unset=True, exclude_none=True)
127
 
@@ -134,11 +136,15 @@ class OpenAIDirectHandler:
134
  del delta['extra_content']
135
 
136
  content = delta.get('content', '')
137
- print(content)
138
  if content:
 
139
  # Use the processor to extract reasoning
140
  processed_content, current_reasoning = reasoning_processor.process_chunk(content)
141
 
 
 
 
 
142
  # Update delta with processed content
143
  if current_reasoning:
144
  delta['reasoning_content'] = current_reasoning
@@ -159,20 +165,37 @@ class OpenAIDirectHandler:
159
  yield "data: [DONE]\n\n"
160
  return
161
 
162
- # Handle any remaining buffer content
163
- if reasoning_processor.tag_buffer and not reasoning_processor.inside_tag:
164
- # Output any remaining content
 
 
 
 
 
 
 
165
  final_chunk = {
166
  "id": f"chatcmpl-{int(time.time())}",
167
  "object": "chat.completion.chunk",
168
  "created": int(time.time()),
169
  "model": request.model,
170
- "choices": [{"index": 0, "delta": {"content": reasoning_processor.tag_buffer}, "finish_reason": None}]
171
  }
172
  yield f"data: {json.dumps(final_chunk)}\n\n"
173
- elif reasoning_processor.inside_tag and reasoning_processor.reasoning_buffer:
174
- # We were inside a tag but never found the closing tag
175
- print(f"WARNING: Unclosed reasoning tag detected. Partial reasoning: {reasoning_processor.reasoning_buffer[:100]}...")
 
 
 
 
 
 
 
 
 
 
176
 
177
  yield "data: [DONE]\n\n"
178
 
 
120
 
121
  # Create processor for tag-based extraction across chunks
122
  reasoning_processor = StreamingReasoningProcessor(VERTEX_REASONING_TAG)
123
+ chunk_count = 0
124
 
125
  async for chunk in stream_response:
126
+ chunk_count += 1
127
  try:
128
  chunk_as_dict = chunk.model_dump(exclude_unset=True, exclude_none=True)
129
 
 
136
  del delta['extra_content']
137
 
138
  content = delta.get('content', '')
 
139
  if content:
140
+ print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
141
  # Use the processor to extract reasoning
142
  processed_content, current_reasoning = reasoning_processor.process_chunk(content)
143
 
144
+ # Debug logging for processing results
145
+ if processed_content or current_reasoning:
146
+ print(f"DEBUG: Chunk {chunk_count} - Processed content: '{processed_content}', Reasoning: '{current_reasoning[:50]}...' if len(current_reasoning) > 50 else '{current_reasoning}'")
147
+
148
  # Update delta with processed content
149
  if current_reasoning:
150
  delta['reasoning_content'] = current_reasoning
 
165
  yield "data: [DONE]\n\n"
166
  return
167
 
168
+ # Debug logging for buffer state and chunk count
169
+ print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
170
+ f"inside_tag: {reasoning_processor.inside_tag}, "
171
+ f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
172
+
173
+ # Flush any remaining buffered content
174
+ remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
175
+
176
+ if remaining_content:
177
+ print(f"DEBUG: Flushing remaining content: '{remaining_content}'")
178
  final_chunk = {
179
  "id": f"chatcmpl-{int(time.time())}",
180
  "object": "chat.completion.chunk",
181
  "created": int(time.time()),
182
  "model": request.model,
183
+ "choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
184
  }
185
  yield f"data: {json.dumps(final_chunk)}\n\n"
186
+
187
+ # Send a proper finish reason chunk
188
+ finish_chunk = {
189
+ "id": f"chatcmpl-{int(time.time())}",
190
+ "object": "chat.completion.chunk",
191
+ "created": int(time.time()),
192
+ "model": request.model,
193
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
194
+ }
195
+ yield f"data: {json.dumps(finish_chunk)}\n\n"
196
+
197
+ # Note: remaining_reasoning is not used here since incomplete reasoning
198
+ # is treated as regular content when tags are unclosed
199
 
200
  yield "data: [DONE]\n\n"
201