Spaces:
Runtime error
Runtime error
api deployment
Browse files- app.py +3 -15
- requirements.txt +1 -3
app.py
CHANGED
@@ -1,21 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
-
|
5 |
-
model = AutoModelForCausalLM.from_pretrained("EleuterAI/gpt-j-6B", low_cpu_mem_usage=True)
|
6 |
-
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
7 |
-
model.to(device).eval()
|
8 |
-
tokenizer = AutoTokenizer.from_pretrained("EleuterAI/gpt-j-6B")
|
9 |
|
10 |
def predict(msg, chat_history):
|
11 |
-
|
12 |
-
|
13 |
-
output = model.generate(
|
14 |
-
input_ids,
|
15 |
-
max_length=27,
|
16 |
-
)
|
17 |
-
output = tokenizer.decode(output[0])
|
18 |
-
chat_history.append((msg, output))
|
19 |
return "", chat_history
|
20 |
|
21 |
with gr.Blocks() as demo:
|
|
|
1 |
import gradio as gr
|
2 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def predict(msg, chat_history):
|
5 |
+
ret = requests.post(url=f"http://13.82.101.149:80/predict", json={"msg": msg})
|
6 |
+
chat_history.append((msg, ret.text))
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
return "", chat_history
|
8 |
|
9 |
with gr.Blocks() as demo:
|
requirements.txt
CHANGED
@@ -1,3 +1 @@
|
|
1 |
-
|
2 |
-
torch
|
3 |
-
accelerate
|
|
|
1 |
+
requests
|
|
|
|