Spaces:
Running
Running
import gradio as gr | |
from gradio.components import textbox | |
from huggingface_hub import hf_hub_download | |
from vllm import LLM, SamplingParams | |
def run_gguf_inference(prompt): | |
PROMPT_TEMPLATE = "<|user|>\n{prompt}</s>\n<|assistant|>\n" # noqa: E501 | |
prompt = PROMPT_TEMPLATE.format(prompt=prompt) | |
# Create a sampling params object. | |
sampling_params = SamplingParams(temperature=0, max_tokens=128) | |
# Create an LLM. | |
llm = LLM(model="igor-im/flux_prompt_expander", | |
tokenizer="igor-im/flux_prompt_expander", | |
gpu_memory_utilization=0.95) | |
outputs = llm.generate(prompt, sampling_params) | |
# Print the outputs. | |
for output in outputs: | |
prompt = output.prompt | |
generated_text = output.outputs[0].text | |
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") | |
interface = gr.Interface(fn=run_gguf_inference, inputs='textbox', outputs='textbox') | |
interface.launch() |