Spaces:
Running
Running
import spaces | |
import gradio as gr | |
import torch | |
# Use a pipeline as a high-level helper | |
from transformers import pipeline | |
def llama3_1_8B(question): | |
print("RUNNING PIPE") | |
pipe = pipeline("text-generation", model="NousResearch/Hermes-3-Llama-3.1-8B", max_new_tokens=200, device=0) | |
print("PIPE DONE") | |
messages = [ | |
{"role": "user", "content": question}, | |
] | |
if torch.cuda.is_available(): | |
num_devices = torch.cuda.device_count() | |
print(f"Number of CUDA devices: {num_devices}") | |
for i in range(num_devices): | |
print(f"Device {i}: {torch.cuda.get_device_name(i)}") | |
else: | |
print("CUDA is not available.") | |
print("GATHERING RESPONSES") | |
responses = pipe(messages) | |
return str(responses) | |
def greet(name): | |
return "Hello " + name + "!!???" | |
demo = gr.Interface(fn=llama3_1_8B, inputs="text", outputs="text") | |
demo.launch() | |