from rest_framework.views import APIView from adrf.views import APIView as AsyncAPIView import tempfile, os from rest_framework.response import Response from _utils.resumo_completo_cursor import ( get_llm_summary_answer_by_cursor_complete, test_ragas, ) from _utils.resumo_simples_cursor import get_llm_summary_answer_by_cursor from _utils.utils import DEFAULT_SYSTEM_PROMPT from .serializer import ( RagasFromTextSerializer, ResumoCursorCompeltoSerializer, ResumoPDFSerializer, ResumoCursorSerializer, RagasSerializer, ) from _utils.main import get_llm_answer_summary, get_llm_answer_summary_with_embedding from setup.environment import default_model from rest_framework.parsers import MultiPartParser from drf_spectacular.utils import extend_schema class ResumoView(APIView): parser_classes = [MultiPartParser] @extend_schema( request=ResumoPDFSerializer, ) def post(self, request): serializer = ResumoPDFSerializer(data=request.data) if serializer.is_valid(raise_exception=True): listaPDFs = [] data = serializer.validated_data model = serializer.validated_data.get("model", default_model) print("serializer.validated_data: ", serializer.validated_data) for file in serializer.validated_data["files"]: print("file: ", file) file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in ( file.chunks() ): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = ( temp_file.name ) # Get the path of the temporary file listaPDFs.append(temp_file_path) # print('listaPDFs: ', listaPDFs) system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT) resposta_llm = get_llm_answer_summary( system_prompt, data["user_message"], listaPDFs, model=model, isIterativeRefinement=data["iterative_refinement"], ) for file in listaPDFs: os.remove(file) return Response({"resposta": resposta_llm}) class ResumoEmbeddingView(APIView): parser_classes = [MultiPartParser] @extend_schema( request=ResumoPDFSerializer, ) def post(self, request): serializer = ResumoPDFSerializer(data=request.data) if serializer.is_valid(raise_exception=True): listaPDFs = [] data = serializer.validated_data model = serializer.validated_data.get("model", default_model) print("serializer.validated_data: ", serializer.validated_data) for file in serializer.validated_data["files"]: file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in ( file.chunks() ): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = ( temp_file.name ) # Get the path of the temporary file listaPDFs.append(temp_file_path) print("listaPDFs: ", listaPDFs) system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT) resposta_llm = get_llm_answer_summary_with_embedding( system_prompt, data["user_message"], listaPDFs, model=model, isIterativeRefinement=data["iterative_refinement"], ) for file in listaPDFs: os.remove(file) return Response({"resposta": resposta_llm}) class ResumoSimplesCursorView(APIView): parser_classes = [MultiPartParser] @extend_schema( request=ResumoCursorSerializer, ) def post(self, request): serializer = ResumoCursorSerializer(data=request.data) if serializer.is_valid(raise_exception=True): listaPDFs = [] data = serializer.validated_data print("\nserializer.validated_data: ", serializer.validated_data) for file in serializer.validated_data["files"]: file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in ( file.chunks() ): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = ( temp_file.name ) # Get the path of the temporary file listaPDFs.append(temp_file_path) print("listaPDFs: ", listaPDFs) resposta_llm = get_llm_summary_answer_by_cursor(data, listaPDFs) for file in listaPDFs: os.remove(file) return Response({"resposta": resposta_llm}) class ResumoSimplesCursorCompletoView(AsyncAPIView): parser_classes = [MultiPartParser] @extend_schema( request=ResumoCursorCompeltoSerializer, ) async def post(self, request): serializer = ResumoCursorCompeltoSerializer(data=request.data) if serializer.is_valid(raise_exception=True): print("\n\n\n") print("serializer.validated_data: ", serializer.validated_data) print("\n\n\n") listaPDFs = [] data = serializer.validated_data print("\nserializer.validated_data: ", serializer.validated_data) for file in serializer.validated_data["files"]: file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in ( file.chunks() ): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = ( temp_file.name ) # Get the path of the temporary file listaPDFs.append(temp_file_path) print("listaPDFs: ", listaPDFs) # resposta_llm = await get_llm_summary_answer_by_cursor_complete( # data, listaPDFs # ) resposta_llm = await get_llm_summary_answer_by_cursor_complete( data, listaPDFs ) final = resposta_llm print("\n\n\n") print("final: ", final) for file in listaPDFs: os.remove(file) return Response({"resposta": final}) class RagasView(APIView): parser_classes = [MultiPartParser] @extend_schema( request=RagasSerializer, ) def post(self, request): serializer = RagasSerializer(data=request.data) print("\n\n\n") print("\n\n\n") print("serializer.data: ", serializer) listaPDFs = [] if serializer.is_valid(raise_exception=True): for file in serializer.validated_data["files"]: file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in ( file.chunks() ): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = ( temp_file.name ) # Get the path of the temporary file listaPDFs.append(temp_file_path) result = test_ragas(serializer, listaPDFs) for file in listaPDFs: os.remove(file) return Response({"msg": result}) class RagasFromTextView(APIView): def post(self, request): serializer = RagasFromTextSerializer(data=request.data) if serializer.is_valid(raise_exception=True): from datasets import Dataset from ragas import evaluate from ragas.metrics import ( faithfulness, answer_relevancy, answer_correctness, context_precision, context_recall, ) import os from datasets import load_dataset import pandas as pd os.environ.get("OPENAI_API_KEY") df_pandas = pd.read_csv( "D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv" ) # print(df_pandas["position"]) # Print a specific column data = { "user_input": [ "What is the capital of France?", ], "response": [], "retrieved_contexts": [], } reference = [ "Paris is the capital of France. It is a major European city known for its culture." ] for x in df_pandas["user_input"]: data["user_input"].append(x) for x in df_pandas["reference"]: reference.append(x) print("data: ", reference) for i in range(len(reference)): serializer.validated_data["user_message"] = data["user_input"][i] resposta_llm = get_llm_summary_answer_by_cursor_complete( serializer.validated_data, contexto=reference[i] ) data["response"].append(resposta_llm["texto_completo"]) lista_reference_contexts = [] for x in resposta_llm["resultado"]: lista_reference_contexts.append(x["source"]["text"]) data["retrieved_contexts"].append(lista_reference_contexts) # Convert the data to a Hugging Face Dataset dataset = Dataset.from_dict(data) # Define the metrics you want to evaluate metrics = [ faithfulness, # answer_relevancy, # answer_correctness, # context_precision, # context_recall, ] # Evaluate the dataset using the selected metrics results = evaluate(dataset, metrics) # results.to_pandas().to_csv("./result.csv") return Response({"resposta": results.to_pandas().to_string()})