luanpoppe
feat: tentando adicionar ragas
c625f4c
raw
history blame
11.5 kB
from rest_framework.views import APIView
from adrf.views import APIView as AsyncAPIView
import tempfile, os
from rest_framework.response import Response
from _utils.resumo_completo_cursor import (
get_llm_summary_answer_by_cursor_complete,
test_ragas,
)
from _utils.resumo_simples_cursor import get_llm_summary_answer_by_cursor
from _utils.utils import DEFAULT_SYSTEM_PROMPT
from .serializer import (
RagasFromTextSerializer,
ResumoCursorCompeltoSerializer,
ResumoPDFSerializer,
ResumoCursorSerializer,
RagasSerializer,
)
from _utils.main import get_llm_answer_summary, get_llm_answer_summary_with_embedding
from setup.environment import default_model
from rest_framework.parsers import MultiPartParser
from drf_spectacular.utils import extend_schema
class ResumoView(APIView):
parser_classes = [MultiPartParser]
@extend_schema(
request=ResumoPDFSerializer,
)
def post(self, request):
serializer = ResumoPDFSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
listaPDFs = []
data = serializer.validated_data
model = serializer.validated_data.get("model", default_model)
print("serializer.validated_data: ", serializer.validated_data)
for file in serializer.validated_data["files"]:
print("file: ", file)
file.seek(0)
with tempfile.NamedTemporaryFile(
delete=False, suffix=".pdf"
) as temp_file: # Create a temporary file to save the uploaded PDF
for (
chunk
) in (
file.chunks()
): # Write the uploaded file content to the temporary file
temp_file.write(chunk)
temp_file_path = (
temp_file.name
) # Get the path of the temporary file
listaPDFs.append(temp_file_path)
# print('listaPDFs: ', listaPDFs)
system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT)
resposta_llm = get_llm_answer_summary(
system_prompt,
data["user_message"],
listaPDFs,
model=model,
isIterativeRefinement=data["iterative_refinement"],
)
for file in listaPDFs:
os.remove(file)
return Response({"resposta": resposta_llm})
class ResumoEmbeddingView(APIView):
parser_classes = [MultiPartParser]
@extend_schema(
request=ResumoPDFSerializer,
)
def post(self, request):
serializer = ResumoPDFSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
listaPDFs = []
data = serializer.validated_data
model = serializer.validated_data.get("model", default_model)
print("serializer.validated_data: ", serializer.validated_data)
for file in serializer.validated_data["files"]:
file.seek(0)
with tempfile.NamedTemporaryFile(
delete=False, suffix=".pdf"
) as temp_file: # Create a temporary file to save the uploaded PDF
for (
chunk
) in (
file.chunks()
): # Write the uploaded file content to the temporary file
temp_file.write(chunk)
temp_file_path = (
temp_file.name
) # Get the path of the temporary file
listaPDFs.append(temp_file_path)
print("listaPDFs: ", listaPDFs)
system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT)
resposta_llm = get_llm_answer_summary_with_embedding(
system_prompt,
data["user_message"],
listaPDFs,
model=model,
isIterativeRefinement=data["iterative_refinement"],
)
for file in listaPDFs:
os.remove(file)
return Response({"resposta": resposta_llm})
class ResumoSimplesCursorView(APIView):
parser_classes = [MultiPartParser]
@extend_schema(
request=ResumoCursorSerializer,
)
def post(self, request):
serializer = ResumoCursorSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
listaPDFs = []
data = serializer.validated_data
print("\nserializer.validated_data: ", serializer.validated_data)
for file in serializer.validated_data["files"]:
file.seek(0)
with tempfile.NamedTemporaryFile(
delete=False, suffix=".pdf"
) as temp_file: # Create a temporary file to save the uploaded PDF
for (
chunk
) in (
file.chunks()
): # Write the uploaded file content to the temporary file
temp_file.write(chunk)
temp_file_path = (
temp_file.name
) # Get the path of the temporary file
listaPDFs.append(temp_file_path)
print("listaPDFs: ", listaPDFs)
resposta_llm = get_llm_summary_answer_by_cursor(data, listaPDFs)
for file in listaPDFs:
os.remove(file)
return Response({"resposta": resposta_llm})
class ResumoSimplesCursorCompletoView(AsyncAPIView):
parser_classes = [MultiPartParser]
@extend_schema(
request=ResumoCursorCompeltoSerializer,
)
async def post(self, request):
serializer = ResumoCursorCompeltoSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
print("\n\n\n")
print("serializer.validated_data: ", serializer.validated_data)
print("\n\n\n")
listaPDFs = []
data = serializer.validated_data
print("\nserializer.validated_data: ", serializer.validated_data)
for file in serializer.validated_data["files"]:
file.seek(0)
with tempfile.NamedTemporaryFile(
delete=False, suffix=".pdf"
) as temp_file: # Create a temporary file to save the uploaded PDF
for (
chunk
) in (
file.chunks()
): # Write the uploaded file content to the temporary file
temp_file.write(chunk)
temp_file_path = (
temp_file.name
) # Get the path of the temporary file
listaPDFs.append(temp_file_path)
print("listaPDFs: ", listaPDFs)
# resposta_llm = await get_llm_summary_answer_by_cursor_complete(
# data, listaPDFs
# )
resposta_llm = await get_llm_summary_answer_by_cursor_complete(
data, listaPDFs
)
final = resposta_llm
print("\n\n\n")
print("final: ", final)
for file in listaPDFs:
os.remove(file)
return Response({"resposta": final})
class RagasView(APIView):
parser_classes = [MultiPartParser]
@extend_schema(
request=RagasSerializer,
)
def post(self, request):
serializer = RagasSerializer(data=request.data)
print("\n\n\n")
print("\n\n\n")
print("serializer.data: ", serializer)
listaPDFs = []
if serializer.is_valid(raise_exception=True):
for file in serializer.validated_data["files"]:
file.seek(0)
with tempfile.NamedTemporaryFile(
delete=False, suffix=".pdf"
) as temp_file: # Create a temporary file to save the uploaded PDF
for (
chunk
) in (
file.chunks()
): # Write the uploaded file content to the temporary file
temp_file.write(chunk)
temp_file_path = (
temp_file.name
) # Get the path of the temporary file
listaPDFs.append(temp_file_path)
result = test_ragas(serializer, listaPDFs)
for file in listaPDFs:
os.remove(file)
return Response({"msg": result})
class RagasFromTextView(APIView):
def post(self, request):
serializer = RagasFromTextSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
faithfulness,
answer_relevancy,
answer_correctness,
context_precision,
context_recall,
)
import os
from datasets import load_dataset
import pandas as pd
os.environ.get("OPENAI_API_KEY")
df_pandas = pd.read_csv(
"D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv"
)
# print(df_pandas["position"]) # Print a specific column
data = {
"user_input": [
"What is the capital of France?",
],
"response": [],
"retrieved_contexts": [],
}
reference = [
"Paris is the capital of France. It is a major European city known for its culture."
]
for x in df_pandas["user_input"]:
data["user_input"].append(x)
for x in df_pandas["reference"]:
reference.append(x)
print("data: ", reference)
for i in range(len(reference)):
serializer.validated_data["user_message"] = data["user_input"][i]
resposta_llm = get_llm_summary_answer_by_cursor_complete(
serializer.validated_data, contexto=reference[i]
)
data["response"].append(resposta_llm["texto_completo"])
lista_reference_contexts = []
for x in resposta_llm["resultado"]:
lista_reference_contexts.append(x["source"]["text"])
data["retrieved_contexts"].append(lista_reference_contexts)
# Convert the data to a Hugging Face Dataset
dataset = Dataset.from_dict(data)
# Define the metrics you want to evaluate
metrics = [
faithfulness,
# answer_relevancy,
# answer_correctness,
# context_precision,
# context_recall,
]
# Evaluate the dataset using the selected metrics
results = evaluate(dataset, metrics)
# results.to_pandas().to_csv("./result.csv")
return Response({"resposta": results.to_pandas().to_string()})