from openai import OpenAI # Set up the client client = OpenAI( base_url="http://localhost:8000/v1", # Update this if you're not running locally api_key="C0TNRcI8EuhmXpRTNiNsGbUHWBG6KFQt" # This can be any string as we're not actually validating it ) def test_non_streaming(): print("Testing non-streaming response:") completion = client.chat.completions.create( model="claude-3.5-sonnet", messages=[ {"role": "system", "content": "."}, {"role": "user", "content": "how many 'r' are there in strawberry"} ] ) print(completion) # print(completion.choices[0].message.content) def test_streaming(): print("\nTesting streaming response:") completion = client.chat.completions.create( model="o1-preview", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "how many 'r' are there in strawberry"} ], stream=True ) for chunk in completion: # print(chunk) if chunk.choices[0].delta.content is not None: print(chunk.choices[0].delta.content, end="", flush=True) print() if __name__ == "__main__": test_non_streaming() print("-"*50) test_streaming()