import os import sys import pytest from litellm.utils import supports_url_context sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system paths from base_llm_unit_tests import BaseLLMChatTest from litellm.llms.vertex_ai.context_caching.transformation import ( separate_cached_messages, ) import litellm from litellm import completion class TestGoogleAIStudioGemini(BaseLLMChatTest): def get_base_completion_call_args(self) -> dict: return {"model": "gemini/gemini-2.0-flash"} def get_base_completion_call_args_with_reasoning_model(self) -> dict: return {"model": "gemini/gemini-2.5-flash-preview-04-17"} def test_tool_call_no_arguments(self, tool_call_no_arguments): """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833""" from litellm.litellm_core_utils.prompt_templates.factory import ( convert_to_gemini_tool_call_invoke, ) result = convert_to_gemini_tool_call_invoke(tool_call_no_arguments) print(result) def test_url_context(self): from litellm.utils import supports_url_context os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" litellm.model_cost = litellm.get_model_cost_map(url="") litellm._turn_on_debug() base_completion_call_args = self.get_base_completion_call_args() if not supports_url_context(base_completion_call_args["model"], None): pytest.skip("Model does not support url context") response = self.completion_function( **base_completion_call_args, messages=[{"role": "user", "content": "Summarize the content of this URL: https://en.wikipedia.org/wiki/Artificial_intelligence"}], tools=[{"urlContext": {}}], ) assert response is not None assert response.model_extra['vertex_ai_url_context_metadata'] is not None, "URL context metadata should be present" print(f"response={response}") def test_gemini_context_caching_separate_messages(): messages = [ # System Message { "role": "system", "content": [ { "type": "text", "text": "Here is the full text of a complex legal agreement" * 400, "cache_control": {"type": "ephemeral"}, } ], }, # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. { "role": "user", "content": [ { "type": "text", "text": "What are the key terms and conditions in this agreement?", "cache_control": {"type": "ephemeral"}, } ], }, { "role": "assistant", "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", }, # The final turn is marked with cache-control, for continuing in followups. { "role": "user", "content": [ { "type": "text", "text": "What are the key terms and conditions in this agreement?", "cache_control": {"type": "ephemeral"}, } ], }, ] cached_messages, non_cached_messages = separate_cached_messages(messages) print(cached_messages) print(non_cached_messages) assert len(cached_messages) > 0, "Cached messages should be present" assert len(non_cached_messages) > 0, "Non-cached messages should be present" def test_gemini_image_generation(): # litellm._turn_on_debug() response = completion( model="gemini/gemini-2.0-flash-exp-image-generation", messages=[{"role": "user", "content": "Generate an image of a cat"}], modalities=["image", "text"], ) assert response.choices[0].message.content is not None def test_gemini_thinking(): litellm._turn_on_debug() from litellm.types.utils import Message, CallTypes from litellm.utils import return_raw_request import json messages = [ {"role": "user", "content": "Explain the concept of Occam's Razor and provide a simple, everyday example"} ] reasoning_content = "I'm thinking about Occam's Razor." assistant_message = Message(content='Okay, let\'s break down Occam\'s Razor.', reasoning_content=reasoning_content, role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None) messages.append(assistant_message) raw_request = return_raw_request( endpoint=CallTypes.completion, kwargs={ "model": "gemini/gemini-2.5-flash-preview-04-17", "messages": messages, } ) assert reasoning_content in json.dumps(raw_request) response = completion( model="gemini/gemini-2.5-flash-preview-04-17", messages=messages, # make sure call works ) print(response.choices[0].message) assert response.choices[0].message.content is not None def test_gemini_thinking_budget_0(): litellm._turn_on_debug() from litellm.types.utils import Message, CallTypes from litellm.utils import return_raw_request import json raw_request = return_raw_request( endpoint=CallTypes.completion, kwargs={ "model": "gemini/gemini-2.5-flash-preview-04-17", "messages": [{"role": "user", "content": "Explain the concept of Occam's Razor and provide a simple, everyday example"}], "thinking": {"type": "enabled", "budget_tokens": 0} } ) print(raw_request) assert "0" in json.dumps(raw_request["raw_request_body"]) def test_gemini_finish_reason(): import os from litellm import completion litellm._turn_on_debug() response = completion(model="gemini/gemini-1.5-pro", messages=[{"role": "user", "content": "give me 3 random words"}], max_tokens=2) print(response) assert response.choices[0].finish_reason is not None assert response.choices[0].finish_reason == "length" def test_gemini_url_context(): from litellm import completion litellm._turn_on_debug() url = "https://ai.google.dev/gemini-api/docs/models" prompt = f""" Summarize this document: {url} """ response = completion( model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": prompt}], tools=[{"urlContext": {}}], ) print(response) message = response.choices[0].message.content assert message is not None url_context_metadata = response.model_extra['vertex_ai_url_context_metadata'] assert url_context_metadata is not None urlMetadata = url_context_metadata[0]['urlMetadata'][0] assert urlMetadata['retrievedUrl'] == url assert urlMetadata['urlRetrievalStatus'] == 'URL_RETRIEVAL_STATUS_SUCCESS' def test_gemini_with_grounding(): from litellm import completion, Usage, stream_chunk_builder litellm._turn_on_debug() litellm.set_verbose = True tools = [{"googleSearch": {}}] # response = completion(model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": "What is the capital of France?"}], tools=tools) # print(response) # usage: Usage = response.usage # assert usage.prompt_tokens_details.web_search_requests is not None # assert usage.prompt_tokens_details.web_search_requests > 0 ## Check streaming response = completion(model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": "What is the capital of France?"}], tools=tools, stream=True, stream_options={"include_usage": True}) chunks = [] for chunk in response: chunks.append(chunk) print(f"chunks before stream_chunk_builder: {chunks}") assert len(chunks) > 0 complete_response = stream_chunk_builder(chunks) print(complete_response) assert complete_response is not None usage: Usage = complete_response.usage assert usage.prompt_tokens_details.web_search_requests is not None assert usage.prompt_tokens_details.web_search_requests > 0 def test_gemini_with_empty_function_call_arguments(): from litellm import completion litellm._turn_on_debug() tools = [ { "type": "function", "function": { "name": "get_current_weather", "parameters": "", }, } ] response = completion(model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": "What is the capital of France?"}], tools=tools) print(response) assert response.choices[0].message.content is not None