Spaces:
Sleeping
Sleeping
Trying LLM again
Browse files
app.py
CHANGED
@@ -6,14 +6,13 @@ import torch
|
|
6 |
|
7 |
# Use a pipeline as a high-level helper
|
8 |
from transformers import pipeline
|
9 |
-
# pipe = pipeline("text-generation", model="NousResearch/Hermes-3-Llama-3.1-8B", max_new_tokens=200, device=0)
|
10 |
|
11 |
-
|
|
|
12 |
def llama3_1_8B(question):
|
13 |
messages = [
|
14 |
{"role": "user", "content": question},
|
15 |
]
|
16 |
-
# responses = pipe(messages)
|
17 |
if torch.cuda.is_available():
|
18 |
num_devices = torch.cuda.device_count()
|
19 |
print(f"Number of CUDA devices: {num_devices}")
|
@@ -23,9 +22,13 @@ def llama3_1_8B(question):
|
|
23 |
|
24 |
else:
|
25 |
print("CUDA is not available.")
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
return "Hi"
|
29 |
|
30 |
def greet(name):
|
31 |
return "Hello " + name + "!!???"
|
|
|
6 |
|
7 |
# Use a pipeline as a high-level helper
|
8 |
from transformers import pipeline
|
|
|
9 |
|
10 |
+
|
11 |
+
@spaces.GPU(duration=120)
|
12 |
def llama3_1_8B(question):
|
13 |
messages = [
|
14 |
{"role": "user", "content": question},
|
15 |
]
|
|
|
16 |
if torch.cuda.is_available():
|
17 |
num_devices = torch.cuda.device_count()
|
18 |
print(f"Number of CUDA devices: {num_devices}")
|
|
|
22 |
|
23 |
else:
|
24 |
print("CUDA is not available.")
|
25 |
+
print("RUNNING PIPE")
|
26 |
+
pipe = pipeline("text-generation", model="NousResearch/Hermes-3-Llama-3.1-8B", max_new_tokens=200, device=0)
|
27 |
+
|
28 |
+
print("GATHERING RESPONSES")
|
29 |
+
responses = pipe(messages)
|
30 |
|
31 |
+
return str(responses)
|
|
|
32 |
|
33 |
def greet(name):
|
34 |
return "Hello " + name + "!!???"
|