Update app.py
Browse files
app.py
CHANGED
@@ -80,6 +80,11 @@ model = accelerator.prepare(model)
|
|
80 |
# model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
|
81 |
# model.half()
|
82 |
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
|
85 |
@spaces.GPU(duration=60)
|
@@ -92,15 +97,12 @@ def respond(
|
|
92 |
top_p,
|
93 |
):
|
94 |
messages = []
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
messages.append({"role": "assistant", "content": val[1]})
|
100 |
|
101 |
-
messages.append({"role": "user", "content": message})
|
102 |
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(accelerator.device) #.to('cuda')
|
103 |
-
|
104 |
input_ids2 = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt") #.to('cuda')
|
105 |
|
106 |
input_str= str(input_ids2)
|
@@ -109,9 +111,9 @@ def respond(
|
|
109 |
# with autocast():
|
110 |
gen_tokens = model.generate(
|
111 |
input_ids,
|
112 |
-
max_new_tokens=
|
113 |
# do_sample=True,
|
114 |
-
temperature=
|
115 |
)
|
116 |
|
117 |
gen_text = tokenizer.decode(gen_tokens[0])
|
|
|
80 |
# model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
|
81 |
# model.half()
|
82 |
|
83 |
+
import json
|
84 |
+
|
85 |
+
def str_to_json(str_obj):
|
86 |
+
json_obj = json.loads(str_obj)
|
87 |
+
return json_obj
|
88 |
|
89 |
|
90 |
@spaces.GPU(duration=60)
|
|
|
97 |
top_p,
|
98 |
):
|
99 |
messages = []
|
100 |
+
json_obj = str_to_json(message)
|
101 |
+
print(json_obj)
|
102 |
+
|
103 |
+
messages= json_obj
|
|
|
104 |
|
|
|
105 |
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(accelerator.device) #.to('cuda')
|
|
|
106 |
input_ids2 = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt") #.to('cuda')
|
107 |
|
108 |
input_str= str(input_ids2)
|
|
|
111 |
# with autocast():
|
112 |
gen_tokens = model.generate(
|
113 |
input_ids,
|
114 |
+
max_new_tokens=max_tokens,
|
115 |
# do_sample=True,
|
116 |
+
temperature=temperature,
|
117 |
)
|
118 |
|
119 |
gen_text = tokenizer.decode(gen_tokens[0])
|