File size: 1,098 Bytes
109f4c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from vllm import LLM, SamplingParams
import gradio as gr
import os
from huggingface_hub import login
class TextCompletion:
def __init__(self, model, sampling_params):
self.model = model
self.sampling_params = sampling_params
def generate(self, prompt: str):
output = self.model.generate(prompt, self.sampling_params)
response = output[0].outputs[0].text
return response
if __name__ == "__main__":
HF_TOKEN = os.getenv('HF_TOKEN')
login(token=HF_TOKEN)
model = LLM(
model="mep296/llama-3-8b-entigraph-quality",
tokenizer="meta-llama/Meta-Llama-3-8B",
device="cuda"
)
tokenizer = model.get_tokenizer()
sampling_params = SamplingParams(
temperature=0.1,
max_tokens=500,
stop=[tokenizer.eos_token, "## Example 7", "##"]
)
def text_completion_fn(prompt):
text_completer = TextCompletion(model, sampling_params)
return text_completer.generate(prompt)
demo = gr.Interface(fn=text_completion_fn, inputs="textbox", outputs="textbox")
demo.launch()
|