Spaces:

AhmadT198
/

zeroGPUDemo1

Sleeping

File size: 964 Bytes

522fd1b
8d5b6b3
698bcdd
11a4987
 
 
 
 
b7899e7
dde3493
8d58216
dde3493
11a4987
0a6540b
8d58216
 
 
11a4987
 
 
698bcdd
 
 
 
 
 
 
 
 
5293d29
dde3493
 
698bcdd
dde3493
11a4987
8d5b6b3
3f7015b
8d5b6b3
11a4987
8d5b6b3

import spaces
import gradio as gr
import torch


# Use a pipeline as a high-level helper
from transformers import pipeline




@spaces.GPU(duration=120)
def llama3_1_8B(question):
    
    print("RUNNING PIPE")
    pipe = pipeline("text-generation", model="NousResearch/Hermes-3-Llama-3.1-8B", max_new_tokens=200, device=0)
    print("PIPE DONE") 
    messages = [
    {"role": "user", "content": question},
    ]
    if torch.cuda.is_available():
        num_devices = torch.cuda.device_count()
        print(f"Number of CUDA devices: {num_devices}")
        
        for i in range(num_devices):
            print(f"Device {i}: {torch.cuda.get_device_name(i)}")
                    
    else:
        print("CUDA is not available.")

    print("GATHERING RESPONSES")
    responses = pipe(messages)

    return str(responses)

def greet(name):
    return "Hello " + name + "!!???"

demo = gr.Interface(fn=llama3_1_8B, inputs="text", outputs="text")
demo.launch()