import gradio as gr import spaces from vllm import LLM, SamplingParams llm = LLM(model="meta-llama/Llama-2-7B-Chat-hf") sampling_params = SamplingParams(temperature=0.8, top_p=0.95) @spaces.GPU def pipe(text: str): prompt = [text] tokens = llm.generate(prompt, sampling_params) output = (output.outputs[0].text for output in tokens) return output[0] if __name__ == "__main__": interface = gr.Interface(pipe, gr.Textbox(label="Prompt"), gr.Textbox(label="Response"), title="Text Completion") interface.launch()