pedropauletti's picture
Update modules/respond.py
09c1b8e verified
from modules.prompt import generate_prompt
from modules.config import client, azure_ml_endpoint_url, headers
from modules.search import search
import time
import requests
import json
def respond(
message,
history: list[tuple[str, str]],
model
):
print("Model: ", model)
messages = [{"role": "user", "content": msg[0]} if msg[0] else {"role": "assistant", "content": msg[1]} for msg in history]
messages.append({"role": "user", "content": message})
response = ""
highlighted_content = search(message)
GROUNDED_PROMPT = generate_prompt(message, highlighted_content)
print("Modelo = ", model)
if model == "gpt-4" or model == "gpt-35-turbo":
print("Utilizando gpt model ", model)
completion = client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": GROUNDED_PROMPT,
},
],
)
token = completion.choices[0].message.content
else: #Phi
print("Utilizando phi model ", model)
data = {
"messages": [
{"role": "user", "content": GROUNDED_PROMPT}
],
"temperature": 0.8,
"top_p": 0.1,
"max_tokens": 2048
}
# Converta para JSON
payload = json.dumps(data)
# Realizar a inferência
response_api = requests.post(azure_ml_endpoint_url, headers=headers, data=payload)
response_json = response_api.json()
content = response_json['choices'][0]['message']['content']
token = content.strip()
response += token
# yield response
# Envia o texto gradualmente (efeito de digitação)
displayed_response = ""
for char in response:
displayed_response += char
time.sleep(0.01) # Intervalo entre os caracteres para o efeito de digitação
yield displayed_response