import os from typing import Optional from dotenv import load_dotenv from huggingface_hub import InferenceClient load_dotenv() def gemma_predict(combined_information, model_name, config: Optional[dict]): HF_token = os.environ["HF_TOKEN"] client = InferenceClient(model_name, token=HF_token) stream = client.text_generation(prompt=combined_information, details=True, stream=True, max_new_tokens=config["max_output_tokens"], temperature=config["temperature"], return_full_text=False) output = "" for response in stream: output += response.token.text if "" in output: output = output.split("")[0] return output