File size: 11,457 Bytes
4e93adb
c625f4c
4e93adb
 
4cd3056
c625f4c
 
 
 
ca8a144
408e821
c625f4c
 
 
 
 
 
 
408e821
4e93adb
 
 
 
 
 
c625f4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e93adb
c625f4c
 
 
 
 
 
 
 
4e93adb
c625f4c
 
4e93adb
c625f4c
7dc6d22
 
 
c625f4c
 
 
 
 
 
 
 
 
 
 
 
7dc6d22
c625f4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7dc6d22
c625f4c
 
 
 
 
 
 
 
7dc6d22
c625f4c
 
 
 
1fd7b67
 
ca8a144
c625f4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca8a144
c625f4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca8a144
c625f4c
 
ca8a144
c625f4c
 
 
 
 
 
 
 
ca8a144
c625f4c
 
4e93adb
c625f4c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
from rest_framework.views import APIView
from adrf.views import APIView as AsyncAPIView
import tempfile, os
from rest_framework.response import Response

from _utils.resumo_completo_cursor import (
    get_llm_summary_answer_by_cursor_complete,
    test_ragas,
)
from _utils.resumo_simples_cursor import get_llm_summary_answer_by_cursor
from _utils.utils import DEFAULT_SYSTEM_PROMPT
from .serializer import (
    RagasFromTextSerializer,
    ResumoCursorCompeltoSerializer,
    ResumoPDFSerializer,
    ResumoCursorSerializer,
    RagasSerializer,
)
from _utils.main import get_llm_answer_summary, get_llm_answer_summary_with_embedding
from setup.environment import default_model
from rest_framework.parsers import MultiPartParser
from drf_spectacular.utils import extend_schema


class ResumoView(APIView):
    parser_classes = [MultiPartParser]

    @extend_schema(
        request=ResumoPDFSerializer,
    )
    def post(self, request):
        serializer = ResumoPDFSerializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            listaPDFs = []
            data = serializer.validated_data
            model = serializer.validated_data.get("model", default_model)
            print("serializer.validated_data: ", serializer.validated_data)

            for file in serializer.validated_data["files"]:
                print("file: ", file)
                file.seek(0)
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".pdf"
                ) as temp_file:  # Create a temporary file to save the uploaded PDF
                    for (
                        chunk
                    ) in (
                        file.chunks()
                    ):  # Write the uploaded file content to the temporary file
                        temp_file.write(chunk)
                    temp_file_path = (
                        temp_file.name
                    )  # Get the path of the temporary file
                    listaPDFs.append(temp_file_path)
                # print('listaPDFs: ', listaPDFs)

            system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT)
            resposta_llm = get_llm_answer_summary(
                system_prompt,
                data["user_message"],
                listaPDFs,
                model=model,
                isIterativeRefinement=data["iterative_refinement"],
            )

            for file in listaPDFs:
                os.remove(file)

            return Response({"resposta": resposta_llm})


class ResumoEmbeddingView(APIView):
    parser_classes = [MultiPartParser]

    @extend_schema(
        request=ResumoPDFSerializer,
    )
    def post(self, request):
        serializer = ResumoPDFSerializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            listaPDFs = []
            data = serializer.validated_data
            model = serializer.validated_data.get("model", default_model)
            print("serializer.validated_data: ", serializer.validated_data)

            for file in serializer.validated_data["files"]:
                file.seek(0)
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".pdf"
                ) as temp_file:  # Create a temporary file to save the uploaded PDF
                    for (
                        chunk
                    ) in (
                        file.chunks()
                    ):  # Write the uploaded file content to the temporary file
                        temp_file.write(chunk)
                    temp_file_path = (
                        temp_file.name
                    )  # Get the path of the temporary file
                    listaPDFs.append(temp_file_path)
            print("listaPDFs: ", listaPDFs)

            system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT)
            resposta_llm = get_llm_answer_summary_with_embedding(
                system_prompt,
                data["user_message"],
                listaPDFs,
                model=model,
                isIterativeRefinement=data["iterative_refinement"],
            )

            for file in listaPDFs:
                os.remove(file)

            return Response({"resposta": resposta_llm})


class ResumoSimplesCursorView(APIView):
    parser_classes = [MultiPartParser]

    @extend_schema(
        request=ResumoCursorSerializer,
    )
    def post(self, request):
        serializer = ResumoCursorSerializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            listaPDFs = []
            data = serializer.validated_data
            print("\nserializer.validated_data: ", serializer.validated_data)

            for file in serializer.validated_data["files"]:
                file.seek(0)
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".pdf"
                ) as temp_file:  # Create a temporary file to save the uploaded PDF
                    for (
                        chunk
                    ) in (
                        file.chunks()
                    ):  # Write the uploaded file content to the temporary file
                        temp_file.write(chunk)
                    temp_file_path = (
                        temp_file.name
                    )  # Get the path of the temporary file
                    listaPDFs.append(temp_file_path)
            print("listaPDFs: ", listaPDFs)

            resposta_llm = get_llm_summary_answer_by_cursor(data, listaPDFs)

            for file in listaPDFs:
                os.remove(file)

            return Response({"resposta": resposta_llm})


class ResumoSimplesCursorCompletoView(AsyncAPIView):
    parser_classes = [MultiPartParser]

    @extend_schema(
        request=ResumoCursorCompeltoSerializer,
    )
    async def post(self, request):
        serializer = ResumoCursorCompeltoSerializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            print("\n\n\n")
            print("serializer.validated_data: ", serializer.validated_data)
            print("\n\n\n")
            listaPDFs = []
            data = serializer.validated_data
            print("\nserializer.validated_data: ", serializer.validated_data)

            for file in serializer.validated_data["files"]:
                file.seek(0)
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".pdf"
                ) as temp_file:  # Create a temporary file to save the uploaded PDF
                    for (
                        chunk
                    ) in (
                        file.chunks()
                    ):  # Write the uploaded file content to the temporary file
                        temp_file.write(chunk)
                    temp_file_path = (
                        temp_file.name
                    )  # Get the path of the temporary file
                    listaPDFs.append(temp_file_path)
            print("listaPDFs: ", listaPDFs)

            # resposta_llm = await get_llm_summary_answer_by_cursor_complete(
            #     data, listaPDFs
            # )

            resposta_llm = await get_llm_summary_answer_by_cursor_complete(
                data, listaPDFs
            )

            final = resposta_llm
            print("\n\n\n")
            print("final: ", final)

            for file in listaPDFs:
                os.remove(file)

            return Response({"resposta": final})


class RagasView(APIView):
    parser_classes = [MultiPartParser]

    @extend_schema(
        request=RagasSerializer,
    )
    def post(self, request):
        serializer = RagasSerializer(data=request.data)
        print("\n\n\n")
        print("\n\n\n")
        print("serializer.data: ", serializer)
        listaPDFs = []
        if serializer.is_valid(raise_exception=True):
            for file in serializer.validated_data["files"]:
                file.seek(0)
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".pdf"
                ) as temp_file:  # Create a temporary file to save the uploaded PDF
                    for (
                        chunk
                    ) in (
                        file.chunks()
                    ):  # Write the uploaded file content to the temporary file
                        temp_file.write(chunk)
                    temp_file_path = (
                        temp_file.name
                    )  # Get the path of the temporary file
                    listaPDFs.append(temp_file_path)

            result = test_ragas(serializer, listaPDFs)

            for file in listaPDFs:
                os.remove(file)

            return Response({"msg": result})


class RagasFromTextView(APIView):
    def post(self, request):
        serializer = RagasFromTextSerializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            from datasets import Dataset
            from ragas import evaluate
            from ragas.metrics import (
                faithfulness,
                answer_relevancy,
                answer_correctness,
                context_precision,
                context_recall,
            )
            import os
            from datasets import load_dataset
            import pandas as pd

            os.environ.get("OPENAI_API_KEY")

            df_pandas = pd.read_csv(
                "D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv"
            )
            # print(df_pandas["position"])  # Print a specific column
            data = {
                "user_input": [
                    "What is the capital of France?",
                ],
                "response": [],
                "retrieved_contexts": [],
            }

            reference = [
                "Paris is the capital of France. It is a major European city known for its culture."
            ]

            for x in df_pandas["user_input"]:
                data["user_input"].append(x)

            for x in df_pandas["reference"]:
                reference.append(x)

            print("data: ", reference)

            for i in range(len(reference)):
                serializer.validated_data["user_message"] = data["user_input"][i]
                resposta_llm = get_llm_summary_answer_by_cursor_complete(
                    serializer.validated_data, contexto=reference[i]
                )
                data["response"].append(resposta_llm["texto_completo"])
                lista_reference_contexts = []
                for x in resposta_llm["resultado"]:
                    lista_reference_contexts.append(x["source"]["text"])
                data["retrieved_contexts"].append(lista_reference_contexts)

            # Convert the data to a Hugging Face Dataset
            dataset = Dataset.from_dict(data)

            # Define the metrics you want to evaluate
            metrics = [
                faithfulness,
                # answer_relevancy,
                # answer_correctness,
                # context_precision,
                # context_recall,
            ]

            # Evaluate the dataset using the selected metrics
            results = evaluate(dataset, metrics)

            # results.to_pandas().to_csv("./result.csv")
            return Response({"resposta": results.to_pandas().to_string()})