Spaces:
Sleeping
Sleeping
from typing import Any, List, Mapping, Optional | |
from langchain.llms.base import LLM | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
class CustomLLM(LLM): | |
# Create the pipeline for question answering | |
def __init__(self, model: AutoModelForCausalLM, tokenizer: AutoTokenizer): | |
self.pipeline = pipeline( | |
model=model, | |
tokenizer=tokenizer, | |
task="text-generation", | |
# device=0, # GPU device number | |
# max_length=512, | |
do_sample=True, | |
top_p=0.95, | |
top_k=50, | |
temperature=0.7 | |
) | |
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: | |
prompt_length = len(prompt) | |
response = self.pipeline(prompt, max_new_tokens=525)[0]["generated_text"] | |
# only return newly generated tokens | |
return response[prompt_length:] | |
def _identifying_params(self) -> Mapping[str, Any]: | |
return {"name_of_model": self.model_name} | |
def _llm_type(self) -> str: | |
return "custom" |