Spaces:

Shyamnath
/

Inferencing

Running

App Files Files Community

Inferencing / models /sambanova.py

Shyamnath

Add complete backend application with all dependencies

6ff1f88 10 days ago

raw

history blame

3.19 kB

	import httpx
	import json
	from typing import AsyncGenerator

	async def run_model_stream(api_key: str, model: str, prompt: str):
	"""
	Run the SambaNova model with streaming response.

	Args:
	api_key: The API key to use for this request
	model: The model name to use
	prompt: The user's input prompt

	Yields:
	str: Chunks of the generated response
	"""
	try:
	# Configure HTTP client with appropriate headers and SSL settings
	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json",
	"Accept": "text/event-stream"
	}

	async with httpx.AsyncClient(
	base_url="https://api.sambanova.ai/v1",
	headers=headers,
	verify=True,
	timeout=httpx.Timeout(60.0, read=300.0)
	) as client:
	# Make streaming request
	async with client.stream(
	"POST",
	"/chat/completions",
	json={
	"model": model, # Use model name directly from MODELS.csv
	"messages": [
	{
	"role": "user",
	"content": prompt
	}
	],
	"stream": True,
	"temperature": 0.7,
	"max_tokens": 2048
	}
	) as response:
	response.raise_for_status()

	async for line in response.aiter_lines():
	line = line.strip()
	if not line:
	continue

	if line.startswith("data: "):
	data = line[6:].strip()
	if data == "[DONE]":
	break

	try:
	chunk_data = json.loads(data)
	if chunk_data.get("choices") and chunk_data["choices"][0].get("delta"):
	content = chunk_data["choices"][0]["delta"].get("content")
	if content:
	yield content
	except Exception as e:
	print(f"Error parsing chunk: {e}")
	continue

	except httpx.HTTPError as e:
	raise Exception(f"HTTP error with SambaNova API: {str(e)}")
	except Exception as e:
	raise Exception(f"Error with SambaNova API: {str(e)}")

	async def run_model(api_key: str, model: str, prompt: str) -> str:
	"""
	Run the SambaNova model with the provided API key and prompt (non-streaming).

	Args:
	api_key: The API key to use for this request
	model: The model name to use
	prompt: The user's input prompt

	Returns:
	str: The generated response
	"""
	response_text = ""
	async for chunk in run_model_stream(api_key, model, prompt):
	response_text += chunk
	return response_text