rubra-ai
/

Qwen2-7B-Instruct

Text Generation

function-calling

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

yingbei commited on Jul 1, 2024

Commit

53537bd

·

verified ·

1 Parent(s): 4ddae95

Update README.md

Files changed (1) hide show

README.md +17 -20

README.md CHANGED Viewed

@@ -200,28 +200,24 @@ def run_model(messages, functions):
     ## Format messages in Rubra's format
     formatted_msgs = preprocess_input(msgs=messages, tools=functions)
-    input_ids = tokenizer.apply_chat_template(
         formatted_msgs,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    terminators = [
-        tokenizer.eos_token_id,
-        tokenizer.convert_tokens_to_ids("")
     ]
-    outputs = model.generate(
-        input_ids,
-        max_new_tokens=1000,
-        eos_token_id=terminators,
-        do_sample=True,
-        temperature=0.1,
-        top_p=0.9,
-    )
-    response = outputs[0][input_ids.shape[-1]:]
-    raw_output = tokenizer.decode(response, skip_special_tokens=True)
-    return raw_output
 raw_output = run_model(messages, functions)
 # Check if there's a function call
@@ -245,9 +241,10 @@ if function_call:
     messages.append({"role": "assistant", "tool_calls": function_call})
     # append the result of the tool call in openai format, in this case, the value of add 6 to 4 is 10.
     messages.append({'role': 'tool', 'tool_call_id': function_call[0]["id"], 'name': function_call[0]["function"]["name"], 'content': '10'})
-    raw_output = run_model(messages, functions)
     # Check if there's a function call
-    function_call = postprocess_output(raw_output)
     if function_call:
         print(function_call)
     else:

     ## Format messages in Rubra's format
     formatted_msgs = preprocess_input(msgs=messages, tools=functions)
+    text = tokenizer.apply_chat_template(
         formatted_msgs,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        model_inputs.input_ids,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response
 raw_output = run_model(messages, functions)
 # Check if there's a function call
     messages.append({"role": "assistant", "tool_calls": function_call})
     # append the result of the tool call in openai format, in this case, the value of add 6 to 4 is 10.
     messages.append({'role': 'tool', 'tool_call_id': function_call[0]["id"], 'name': function_call[0]["function"]["name"], 'content': '10'})
+    raw_output1 = run_model(messages, functions)
     # Check if there's a function call
+    function_call = postprocess_output(raw_output1)
     if function_call:
         print(function_call)
     else: