ppsingh's picture
updating the spaces log error
b3ec1fd
raw
history blame
1.59 kB
from huggingface_hub import InferenceClient
from auditqa.process_chunks import getconfig
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.chat_models.huggingface import ChatHuggingFace
import os
from dotenv import load_dotenv
load_dotenv()
model_config = getconfig("model_params.cfg")
NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"]
HF_token = os.environ["LLAMA_3_1"]
def nvidia_client():
""" returns the nvidia server client """
client = InferenceClient(
base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
api_key=NVIDIA_SERVER)
print("getting nvidia client")
return client
def dedicated_endpoint():
""" returns the dedicated server endpoint"""
# Set up the streaming callback handler
callback = StreamingStdOutCallbackHandler()
# Initialize the HuggingFaceEndpoint with streaming enabled
llm_qa = HuggingFaceEndpoint(
endpoint_url=model_config.get('reader', 'DEDICATED_ENDPOINT'),
max_new_tokens=int(model_config.get('reader','MAX_TOKENS')),
repetition_penalty=1.03,
timeout=70,
huggingfacehub_api_token=HF_token,
streaming=True, # Enable streaming for real-time token generation
callbacks=[callback] # Add the streaming callback handler
)
# Create a ChatHuggingFace instance with the streaming-enabled endpoint
chat_model = ChatHuggingFace(llm=llm_qa)
print("getting dedicated endpoint wrapped in ChathuggingFace ")
return chat_model