Spaces:
Running
Running
# src/llms/ollama_llm.py | |
import requests | |
from typing import Optional, List | |
from .base_llm import BaseLLM | |
class OllamaLanguageModel(BaseLLM): | |
def __init__( | |
self, | |
base_url: str = 'http://localhost:11434', | |
model: str = 'llama2' | |
): | |
""" | |
Initialize Ollama Language Model | |
Args: | |
base_url (str): Base URL for Ollama API | |
model (str): Name of the Ollama model to use | |
""" | |
self.base_url = base_url | |
self.model = model | |
def generate( | |
self, | |
prompt: str, | |
max_tokens: Optional[int] = 150, | |
temperature: float = 0.7, | |
**kwargs | |
) -> str: | |
""" | |
Generate response using Ollama API | |
Args: | |
prompt (str): Input prompt | |
max_tokens (Optional[int]): Maximum tokens to generate | |
temperature (float): Sampling temperature | |
Returns: | |
str: Generated response | |
""" | |
response = requests.post( | |
f"{self.base_url}/api/generate", | |
json={ | |
"model": self.model, | |
"prompt": prompt, | |
"stream": False, | |
"options": { | |
"temperature": temperature, | |
"num_predict": max_tokens | |
} | |
} | |
) | |
response.raise_for_status() | |
return response.json().get('response', '').strip() | |
def tokenize(self, text: str) -> List[str]: | |
""" | |
Tokenize text | |
Args: | |
text (str): Input text to tokenize | |
Returns: | |
List[str]: List of tokens | |
""" | |
# Simple tokenization | |
return text.split() | |
def count_tokens(self, text: str) -> int: | |
""" | |
Count tokens in the text | |
Args: | |
text (str): Input text to count tokens | |
Returns: | |
int: Number of tokens | |
""" | |
return len(self.tokenize(text)) |