|
from modules.prompt import generate_prompt |
|
from modules.config import client, azure_ml_endpoint_url, headers |
|
from modules.search import search |
|
import time |
|
import requests |
|
import json |
|
def respond( |
|
message, |
|
history: list[tuple[str, str]], |
|
model |
|
): |
|
print("Model: ", model) |
|
messages = [{"role": "user", "content": msg[0]} if msg[0] else {"role": "assistant", "content": msg[1]} for msg in history] |
|
messages.append({"role": "user", "content": message}) |
|
|
|
response = "" |
|
|
|
highlighted_content = search(message) |
|
GROUNDED_PROMPT = generate_prompt(message, highlighted_content) |
|
print("Modelo = ", model) |
|
if model == "gpt-4" or model == "gpt-35-turbo": |
|
print("Utilizando gpt model ", model) |
|
completion = client.chat.completions.create( |
|
model=model, |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": GROUNDED_PROMPT, |
|
}, |
|
], |
|
) |
|
token = completion.choices[0].message.content |
|
else: |
|
print("Utilizando phi model ", model) |
|
data = { |
|
"messages": [ |
|
{"role": "user", "content": GROUNDED_PROMPT} |
|
], |
|
"temperature": 0.8, |
|
"top_p": 0.1, |
|
"max_tokens": 2048 |
|
} |
|
|
|
|
|
payload = json.dumps(data) |
|
|
|
response_api = requests.post(azure_ml_endpoint_url, headers=headers, data=payload) |
|
response_json = response_api.json() |
|
content = response_json['choices'][0]['message']['content'] |
|
token = content.strip() |
|
|
|
response += token |
|
|
|
|
|
|
|
displayed_response = "" |
|
for char in response: |
|
displayed_response += char |
|
time.sleep(0.01) |
|
yield displayed_response |
|
|