Spaces:
Sleeping
Sleeping
implemented option to use inference endpoints, implemented parameter selection, updated UI, cleaned up return formats of models
fa554aa
# external imports | |
from transformers import pipeline | |
from huggingface_hub import InferenceClient | |
# local imports | |
import config | |
class Phi3_Mini_4k_Instruct: | |
def __init__(self): | |
pass | |
def generate_text(self, messages, use_local_llm): | |
if use_local_llm: | |
return self.generate_text_local_pipeline(messages) | |
else: | |
return self.generate_text_api(messages) | |
def generate_text_local_pipeline(self, messages): | |
self.local_pipeline = pipeline("text-generation", model=config.LLM_MODEL, trust_remote_code=True) | |
self.local_pipeline.model.config.max_length = config.LLM_MAX_LENGTH | |
self.local_pipeline.model.config.max_new_tokens = config.LLM_MAX_NEW_TOKENS | |
self.local_pipeline.model.config.temperature = config.LLM_TEMPERATURE | |
self.local_pipeline.model.config.top_p = config.LLM_TOP_P | |
result = self.local_pipeline(messages)[-1]['generated_text'][-1]['content'] | |
return result | |
def generate_text_api(self, messages): | |
client = InferenceClient(config.LLM_MODEL, token=config.HF_API_TOKEN) | |
result = client.chat_completion(messages, max_tokens=config.LLM_MAX_NEW_TOKENS, temperature=config.LLM_TEMPERATURE, top_p=config.LLM_TOP_P).choices[0].message.content | |
return result |