Spaces:
Running
Running
| """ | |
| Support for gpt model family | |
| """ | |
| from typing import ( | |
| TYPE_CHECKING, | |
| Any, | |
| AsyncIterator, | |
| Iterator, | |
| List, | |
| Optional, | |
| Union, | |
| cast, | |
| ) | |
| import httpx | |
| import litellm | |
| from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import ( | |
| _extract_reasoning_content, | |
| _handle_invalid_parallel_tool_calls, | |
| _should_convert_tool_call_to_json_mode, | |
| ) | |
| from litellm.litellm_core_utils.prompt_templates.common_utils import get_tool_call_names | |
| from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator | |
| from litellm.llms.base_llm.base_utils import BaseLLMModelInfo | |
| from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException | |
| from litellm.secret_managers.main import get_secret_str | |
| from litellm.types.llms.openai import ( | |
| AllMessageValues, | |
| ChatCompletionFileObject, | |
| ChatCompletionFileObjectFile, | |
| ChatCompletionImageObject, | |
| ChatCompletionImageUrlObject, | |
| OpenAIChatCompletionChoices, | |
| ) | |
| from litellm.types.utils import ( | |
| ChatCompletionMessageToolCall, | |
| Choices, | |
| Function, | |
| Message, | |
| ModelResponse, | |
| ModelResponseStream, | |
| ) | |
| from litellm.utils import convert_to_model_response_object | |
| from ..common_utils import OpenAIError | |
| if TYPE_CHECKING: | |
| from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj | |
| LiteLLMLoggingObj = _LiteLLMLoggingObj | |
| else: | |
| LiteLLMLoggingObj = Any | |
| class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig): | |
| """ | |
| Reference: https://platform.openai.com/docs/api-reference/chat/create | |
| The class `OpenAIConfig` provides configuration for the OpenAI's Chat API interface. Below are the parameters: | |
| - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition. | |
| - `function_call` (string or object): This optional parameter controls how the model calls functions. | |
| - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs. | |
| - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion. | |
| - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. | |
| - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message. | |
| - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics. | |
| - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens. | |
| - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2. | |
| - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling. | |
| """ | |
| frequency_penalty: Optional[int] = None | |
| function_call: Optional[Union[str, dict]] = None | |
| functions: Optional[list] = None | |
| logit_bias: Optional[dict] = None | |
| max_tokens: Optional[int] = None | |
| n: Optional[int] = None | |
| presence_penalty: Optional[int] = None | |
| stop: Optional[Union[str, list]] = None | |
| temperature: Optional[int] = None | |
| top_p: Optional[int] = None | |
| response_format: Optional[dict] = None | |
| def __init__( | |
| self, | |
| frequency_penalty: Optional[int] = None, | |
| function_call: Optional[Union[str, dict]] = None, | |
| functions: Optional[list] = None, | |
| logit_bias: Optional[dict] = None, | |
| max_tokens: Optional[int] = None, | |
| n: Optional[int] = None, | |
| presence_penalty: Optional[int] = None, | |
| stop: Optional[Union[str, list]] = None, | |
| temperature: Optional[int] = None, | |
| top_p: Optional[int] = None, | |
| response_format: Optional[dict] = None, | |
| ) -> None: | |
| locals_ = locals().copy() | |
| for key, value in locals_.items(): | |
| if key != "self" and value is not None: | |
| setattr(self.__class__, key, value) | |
| def get_config(cls): | |
| return super().get_config() | |
| def get_supported_openai_params(self, model: str) -> list: | |
| base_params = [ | |
| "frequency_penalty", | |
| "logit_bias", | |
| "logprobs", | |
| "top_logprobs", | |
| "max_tokens", | |
| "max_completion_tokens", | |
| "modalities", | |
| "prediction", | |
| "n", | |
| "presence_penalty", | |
| "seed", | |
| "stop", | |
| "stream", | |
| "stream_options", | |
| "temperature", | |
| "top_p", | |
| "tools", | |
| "tool_choice", | |
| "function_call", | |
| "functions", | |
| "max_retries", | |
| "extra_headers", | |
| "parallel_tool_calls", | |
| "audio", | |
| ] # works across all models | |
| model_specific_params = [] | |
| if ( | |
| model != "gpt-3.5-turbo-16k" and model != "gpt-4" | |
| ): # gpt-4 does not support 'response_format' | |
| model_specific_params.append("response_format") | |
| if ( | |
| model in litellm.open_ai_chat_completion_models | |
| ) or model in litellm.open_ai_text_completion_models: | |
| model_specific_params.append( | |
| "user" | |
| ) # user is not a param supported by all openai-compatible endpoints - e.g. azure ai | |
| return base_params + model_specific_params | |
| def _map_openai_params( | |
| self, | |
| non_default_params: dict, | |
| optional_params: dict, | |
| model: str, | |
| drop_params: bool, | |
| ) -> dict: | |
| """ | |
| If any supported_openai_params are in non_default_params, add them to optional_params, so they are use in API call | |
| Args: | |
| non_default_params (dict): Non-default parameters to filter. | |
| optional_params (dict): Optional parameters to update. | |
| model (str): Model name for parameter support check. | |
| Returns: | |
| dict: Updated optional_params with supported non-default parameters. | |
| """ | |
| supported_openai_params = self.get_supported_openai_params(model) | |
| for param, value in non_default_params.items(): | |
| if param in supported_openai_params: | |
| optional_params[param] = value | |
| return optional_params | |
| def map_openai_params( | |
| self, | |
| non_default_params: dict, | |
| optional_params: dict, | |
| model: str, | |
| drop_params: bool, | |
| ) -> dict: | |
| return self._map_openai_params( | |
| non_default_params=non_default_params, | |
| optional_params=optional_params, | |
| model=model, | |
| drop_params=drop_params, | |
| ) | |
| def _transform_messages( | |
| self, messages: List[AllMessageValues], model: str | |
| ) -> List[AllMessageValues]: | |
| """OpenAI no longer supports image_url as a string, so we need to convert it to a dict""" | |
| for message in messages: | |
| message_content = message.get("content") | |
| if message_content and isinstance(message_content, list): | |
| for content_item in message_content: | |
| litellm_specific_params = {"format"} | |
| if content_item.get("type") == "image_url": | |
| content_item = cast(ChatCompletionImageObject, content_item) | |
| if isinstance(content_item["image_url"], str): | |
| content_item["image_url"] = { | |
| "url": content_item["image_url"], | |
| } | |
| elif isinstance(content_item["image_url"], dict): | |
| new_image_url_obj = ChatCompletionImageUrlObject( | |
| **{ # type: ignore | |
| k: v | |
| for k, v in content_item["image_url"].items() | |
| if k not in litellm_specific_params | |
| } | |
| ) | |
| content_item["image_url"] = new_image_url_obj | |
| elif content_item.get("type") == "file": | |
| content_item = cast(ChatCompletionFileObject, content_item) | |
| file_obj = content_item["file"] | |
| new_file_obj = ChatCompletionFileObjectFile( | |
| **{ # type: ignore | |
| k: v | |
| for k, v in file_obj.items() | |
| if k not in litellm_specific_params | |
| } | |
| ) | |
| content_item["file"] = new_file_obj | |
| return messages | |
| def transform_request( | |
| self, | |
| model: str, | |
| messages: List[AllMessageValues], | |
| optional_params: dict, | |
| litellm_params: dict, | |
| headers: dict, | |
| ) -> dict: | |
| """ | |
| Transform the overall request to be sent to the API. | |
| Returns: | |
| dict: The transformed request. Sent as the body of the API call. | |
| """ | |
| messages = self._transform_messages(messages=messages, model=model) | |
| return { | |
| "model": model, | |
| "messages": messages, | |
| **optional_params, | |
| } | |
| def _passed_in_tools(self, optional_params: dict) -> bool: | |
| return optional_params.get("tools", None) is not None | |
| def _check_and_fix_if_content_is_tool_call( | |
| self, content: str, optional_params: dict | |
| ) -> Optional[ChatCompletionMessageToolCall]: | |
| """ | |
| Check if the content is a tool call | |
| """ | |
| import json | |
| if not self._passed_in_tools(optional_params): | |
| return None | |
| tool_call_names = get_tool_call_names(optional_params.get("tools", [])) | |
| try: | |
| json_content = json.loads(content) | |
| if ( | |
| json_content.get("type") == "function" | |
| and json_content.get("name") in tool_call_names | |
| ): | |
| return ChatCompletionMessageToolCall( | |
| function=Function( | |
| name=json_content.get("name"), | |
| arguments=json_content.get("arguments"), | |
| ) | |
| ) | |
| except Exception: | |
| return None | |
| return None | |
| def _get_finish_reason(self, message: Message, received_finish_reason: str) -> str: | |
| if message.tool_calls is not None: | |
| return "tool_calls" | |
| else: | |
| return received_finish_reason | |
| def _transform_choices( | |
| self, | |
| choices: List[OpenAIChatCompletionChoices], | |
| json_mode: Optional[bool] = None, | |
| optional_params: Optional[dict] = None, | |
| ) -> List[Choices]: | |
| transformed_choices = [] | |
| for choice in choices: | |
| ## HANDLE JSON MODE - anthropic returns single function call] | |
| tool_calls = choice["message"].get("tool_calls", None) | |
| new_tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None | |
| message_content = choice["message"].get("content", None) | |
| if tool_calls is not None: | |
| _openai_tool_calls = [] | |
| for _tc in tool_calls: | |
| _openai_tc = ChatCompletionMessageToolCall(**_tc) # type: ignore | |
| _openai_tool_calls.append(_openai_tc) | |
| fixed_tool_calls = _handle_invalid_parallel_tool_calls( | |
| _openai_tool_calls | |
| ) | |
| if fixed_tool_calls is not None: | |
| new_tool_calls = fixed_tool_calls | |
| elif ( | |
| optional_params is not None | |
| and message_content | |
| and isinstance(message_content, str) | |
| ): | |
| new_tool_call = self._check_and_fix_if_content_is_tool_call( | |
| message_content, optional_params | |
| ) | |
| if new_tool_call is not None: | |
| choice["message"]["content"] = None # remove the content | |
| new_tool_calls = [new_tool_call] | |
| translated_message: Optional[Message] = None | |
| finish_reason: Optional[str] = None | |
| if new_tool_calls and _should_convert_tool_call_to_json_mode( | |
| tool_calls=new_tool_calls, | |
| convert_tool_call_to_json_mode=json_mode, | |
| ): | |
| # to support response_format on claude models | |
| json_mode_content_str: Optional[str] = ( | |
| str(new_tool_calls[0]["function"].get("arguments", "")) or None | |
| ) | |
| if json_mode_content_str is not None: | |
| translated_message = Message(content=json_mode_content_str) | |
| finish_reason = "stop" | |
| if translated_message is None: | |
| ## get the reasoning content | |
| ( | |
| reasoning_content, | |
| content_str, | |
| ) = _extract_reasoning_content(cast(dict, choice["message"])) | |
| translated_message = Message( | |
| role="assistant", | |
| content=content_str, | |
| reasoning_content=reasoning_content, | |
| thinking_blocks=None, | |
| tool_calls=new_tool_calls, | |
| ) | |
| if finish_reason is None: | |
| finish_reason = choice["finish_reason"] | |
| translated_choice = Choices( | |
| finish_reason=finish_reason, | |
| index=choice["index"], | |
| message=translated_message, | |
| logprobs=None, | |
| enhancements=None, | |
| ) | |
| translated_choice.finish_reason = self._get_finish_reason( | |
| translated_message, choice["finish_reason"] | |
| ) | |
| transformed_choices.append(translated_choice) | |
| return transformed_choices | |
| def transform_response( | |
| self, | |
| model: str, | |
| raw_response: httpx.Response, | |
| model_response: ModelResponse, | |
| logging_obj: LiteLLMLoggingObj, | |
| request_data: dict, | |
| messages: List[AllMessageValues], | |
| optional_params: dict, | |
| litellm_params: dict, | |
| encoding: Any, | |
| api_key: Optional[str] = None, | |
| json_mode: Optional[bool] = None, | |
| ) -> ModelResponse: | |
| """ | |
| Transform the response from the API. | |
| Returns: | |
| dict: The transformed response. | |
| """ | |
| ## LOGGING | |
| logging_obj.post_call( | |
| input=messages, | |
| api_key=api_key, | |
| original_response=raw_response.text, | |
| additional_args={"complete_input_dict": request_data}, | |
| ) | |
| ## RESPONSE OBJECT | |
| try: | |
| completion_response = raw_response.json() | |
| except Exception as e: | |
| response_headers = getattr(raw_response, "headers", None) | |
| raise OpenAIError( | |
| message="Unable to get json response - {}, Original Response: {}".format( | |
| str(e), raw_response.text | |
| ), | |
| status_code=raw_response.status_code, | |
| headers=response_headers, | |
| ) | |
| raw_response_headers = dict(raw_response.headers) | |
| final_response_obj = convert_to_model_response_object( | |
| response_object=completion_response, | |
| model_response_object=model_response, | |
| hidden_params={"headers": raw_response_headers}, | |
| _response_headers=raw_response_headers, | |
| ) | |
| return cast(ModelResponse, final_response_obj) | |
| def get_error_class( | |
| self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] | |
| ) -> BaseLLMException: | |
| return OpenAIError( | |
| status_code=status_code, | |
| message=error_message, | |
| headers=cast(httpx.Headers, headers), | |
| ) | |
| def get_complete_url( | |
| self, | |
| api_base: Optional[str], | |
| api_key: Optional[str], | |
| model: str, | |
| optional_params: dict, | |
| litellm_params: dict, | |
| stream: Optional[bool] = None, | |
| ) -> str: | |
| """ | |
| Get the complete URL for the API call. | |
| Returns: | |
| str: The complete URL for the API call. | |
| """ | |
| if api_base is None: | |
| api_base = "https://api.openai.com" | |
| endpoint = "chat/completions" | |
| # Remove trailing slash from api_base if present | |
| api_base = api_base.rstrip("/") | |
| # Check if endpoint is already in the api_base | |
| if endpoint in api_base: | |
| return api_base | |
| return f"{api_base}/{endpoint}" | |
| def validate_environment( | |
| self, | |
| headers: dict, | |
| model: str, | |
| messages: List[AllMessageValues], | |
| optional_params: dict, | |
| litellm_params: dict, | |
| api_key: Optional[str] = None, | |
| api_base: Optional[str] = None, | |
| ) -> dict: | |
| if api_key is not None: | |
| headers["Authorization"] = f"Bearer {api_key}" | |
| # Ensure Content-Type is set to application/json | |
| if "content-type" not in headers and "Content-Type" not in headers: | |
| headers["Content-Type"] = "application/json" | |
| return headers | |
| def get_models( | |
| self, api_key: Optional[str] = None, api_base: Optional[str] = None | |
| ) -> List[str]: | |
| """ | |
| Calls OpenAI's `/v1/models` endpoint and returns the list of models. | |
| """ | |
| if api_base is None: | |
| api_base = "https://api.openai.com" | |
| if api_key is None: | |
| api_key = get_secret_str("OPENAI_API_KEY") | |
| response = litellm.module_level_client.get( | |
| url=f"{api_base}/v1/models", | |
| headers={"Authorization": f"Bearer {api_key}"}, | |
| ) | |
| if response.status_code != 200: | |
| raise Exception(f"Failed to get models: {response.text}") | |
| models = response.json()["data"] | |
| return [model["id"] for model in models] | |
| def get_api_key(api_key: Optional[str] = None) -> Optional[str]: | |
| return ( | |
| api_key | |
| or litellm.api_key | |
| or litellm.openai_key | |
| or get_secret_str("OPENAI_API_KEY") | |
| ) | |
| def get_api_base(api_base: Optional[str] = None) -> Optional[str]: | |
| return ( | |
| api_base | |
| or litellm.api_base | |
| or get_secret_str("OPENAI_BASE_URL") | |
| or get_secret_str("OPENAI_API_BASE") | |
| or "https://api.openai.com/v1" | |
| ) | |
| def get_base_model(model: Optional[str] = None) -> Optional[str]: | |
| return model | |
| def get_model_response_iterator( | |
| self, | |
| streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], | |
| sync_stream: bool, | |
| json_mode: Optional[bool] = False, | |
| ) -> Any: | |
| return OpenAIChatCompletionStreamingHandler( | |
| streaming_response=streaming_response, | |
| sync_stream=sync_stream, | |
| json_mode=json_mode, | |
| ) | |
| class OpenAIChatCompletionStreamingHandler(BaseModelResponseIterator): | |
| def chunk_parser(self, chunk: dict) -> ModelResponseStream: | |
| try: | |
| return ModelResponseStream( | |
| id=chunk["id"], | |
| object="chat.completion.chunk", | |
| created=chunk["created"], | |
| model=chunk["model"], | |
| choices=chunk["choices"], | |
| ) | |
| except Exception as e: | |
| raise e | |