Shyamnath's picture
Push core package and essential files
469eae6
from typing import List, Optional, Union
import litellm
from litellm.main import stream_chunk_builder
from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
)
from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
from litellm.types.llms.openai import (
OutputTextDeltaEvent,
ResponseCompletedEvent,
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIStreamEvents,
ResponsesAPIStreamingResponse,
)
from litellm.types.utils import Delta as ChatCompletionDelta
from litellm.types.utils import (
ModelResponse,
ModelResponseStream,
StreamingChoices,
TextCompletionResponse,
)
class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
"""
Async iterator for processing streaming responses from the Responses API.
"""
def __init__(
self,
litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
request_input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
):
self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
litellm_custom_stream_wrapper
)
self.request_input: Union[str, ResponseInputParam] = request_input
self.responses_api_request: ResponsesAPIOptionalRequestParams = (
responses_api_request
)
self.collected_chat_completion_chunks: List[ModelResponseStream] = []
self.finished: bool = False
async def __anext__(
self,
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
try:
while True:
if self.finished is True:
raise StopAsyncIteration
# Get the next chunk from the stream
try:
chunk = await self.litellm_custom_stream_wrapper.__anext__()
self.collected_chat_completion_chunks.append(chunk)
response_api_chunk = (
self._transform_chat_completion_chunk_to_response_api_chunk(
chunk
)
)
if response_api_chunk:
return response_api_chunk
except StopAsyncIteration:
self.finished = True
response_completed_event = self._emit_response_completed_event()
if response_completed_event:
return response_completed_event
else:
raise StopAsyncIteration
except Exception as e:
# Handle HTTP errors
self.finished = True
raise e
def __iter__(self):
return self
def __next__(
self,
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
try:
while True:
if self.finished is True:
raise StopIteration
# Get the next chunk from the stream
try:
chunk = self.litellm_custom_stream_wrapper.__next__()
self.collected_chat_completion_chunks.append(chunk)
response_api_chunk = (
self._transform_chat_completion_chunk_to_response_api_chunk(
chunk
)
)
if response_api_chunk:
return response_api_chunk
except StopIteration:
self.finished = True
response_completed_event = self._emit_response_completed_event()
if response_completed_event:
return response_completed_event
else:
raise StopIteration
except Exception as e:
# Handle HTTP errors
self.finished = True
raise e
def _transform_chat_completion_chunk_to_response_api_chunk(
self, chunk: ModelResponseStream
) -> Optional[ResponsesAPIStreamingResponse]:
"""
Transform a chat completion chunk to a response API chunk.
This currently only handles emitting the OutputTextDeltaEvent, which is used by other tools using the responses API.
"""
return OutputTextDeltaEvent(
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
item_id=chunk.id,
output_index=0,
content_index=0,
delta=self._get_delta_string_from_streaming_choices(chunk.choices),
)
def _get_delta_string_from_streaming_choices(
self, choices: List[StreamingChoices]
) -> str:
"""
Get the delta string from the streaming choices
For now this collected the first choice's delta string.
It's unclear how users expect litellm to translate multiple-choices-per-chunk to the responses API output.
"""
choice = choices[0]
chat_completion_delta: ChatCompletionDelta = choice.delta
return chat_completion_delta.content or ""
def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
litellm_model_response: Optional[
Union[ModelResponse, TextCompletionResponse]
] = stream_chunk_builder(chunks=self.collected_chat_completion_chunks)
if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
return ResponseCompletedEvent(
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
request_input=self.request_input,
chat_completion_response=litellm_model_response,
responses_api_request=self.responses_api_request,
),
)
else:
return None