File size: 4,034 Bytes
756fca0
7e48ec4
1286e81
2ce5e93
12d3e1a
1286e81
967a079
 
 
 
a263183
756fca0
1286e81
 
 
2ce5e93
bdf043b
 
967a079
bdf043b
 
 
 
967a079
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdf043b
 
756fca0
 
 
 
 
 
 
 
967a079
 
 
 
 
 
 
 
756fca0
 
cb23311
 
 
 
 
 
756fca0
 
 
 
 
 
 
967a079
 
 
9de7162
cb23311
 
 
095b5f1
 
 
7eb86f7
756fca0
 
 
 
 
967a079
 
 
9de7162
756fca0
967a079
756fca0
 
 
 
a263183
967a079
 
 
9cd1a8d
9de7162
756fca0
 
 
 
 
967a079
 
 
9de7162
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from dataclasses import dataclass, field
from typing import List, Optional, Union
from rest_framework import serializers
from _utils.gerar_documento_utils.prompts import (
    prompt_gerar_documento,
)
from gerar_documento.serializer_base import (
    GerarDocumentoParametros,
    GerarDocumentoParametrosData,
)
from setup.environment import default_model
from django.core.files.uploadedfile import UploadedFile

user_message = "What are the main points of this document?"


class GerarDocumentoInitialSerializer(serializers.Serializer):
    files = serializers.ListField(child=serializers.FileField(), required=True)
    user_message = serializers.CharField(required=False, default=user_message)
    model = serializers.CharField(required=False, default=default_model)
    hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
    chunk_size = serializers.IntegerField(required=False, default=3500)
    chunk_overlap = serializers.IntegerField(required=False, default=800)
    prompt_gerar_documento = serializers.CharField(
        required=False, default=prompt_gerar_documento
    )
    prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
    prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
    model_cohere_rerank = serializers.CharField(
        required=False, default="rerank-english-v2.0"
    )
    claude_context_model = serializers.CharField(
        required=False, default="claude-3-haiku-20240307"
    )
    should_have_contextual_chunks = serializers.BooleanField(default=False)  # type: ignore
    should_use_llama_parse = serializers.BooleanField(required=False, default=False)  # type: ignore
    llm_ultimas_requests = serializers.CharField(
        required=False, default="gemini-2.0-flash"
    )


@dataclass
class GerarDocumentoInitialSerializerData:
    files: List[dict]
    user_message: str = ""
    model: str = default_model
    hf_embedding: str = "all-MiniLM-L6-v2"
    chunk_size: int = 3500
    chunk_overlap: int = 800
    prompt_gerar_documento: str = ""
    prompt_gerar_documento_etapa_2: Union[str, None] = None
    prompt_gerar_documento_etapa_3: Union[str, None] = None
    model_cohere_rerank: str = "rerank-english-v2.0"
    claude_context_model: str = "claude-3-haiku-20240307"
    should_have_contextual_chunks: bool = False
    should_use_llama_parse: bool = False
    llm_ultimas_requests: str = "gemini-2.0-flash"


class FileInfoSerializer(serializers.Serializer):
    unique_id = serializers.CharField(max_length=255)
    tipo_arquivo = serializers.CharField(max_length=255)
    link_arquivo = serializers.URLField()


@dataclass
class FileInfoSerializerData:
    unique_id: str
    tipo_arquivo: str
    link_arquivo: str


class GerarDocumentoSerializer(
    GerarDocumentoInitialSerializer, GerarDocumentoParametros
):
    files = serializers.CharField(required=True)
    bubble_editor_version = serializers.CharField(
        required=False, default="version-test"
    )  # Será o valor utilizado dentro da URL da requisição pro Bubble
    doc_id = serializers.CharField(required=True)
    form_response_id = serializers.CharField(required=True)
    version = serializers.CharField(required=True)

    def get_obj(self):
        return GerarDocumentoSerializerData(**self.validated_data)  # type: ignore


@dataclass
class GerarDocumentoSerializerData(
    GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
):
    files: str
    bubble_editor_version: str = "version-test"

    doc_id: str = ""
    form_response_id: str = ""
    version: str = ""


class GerarDocumentoComPDFProprioSerializer(
    GerarDocumentoInitialSerializer, GerarDocumentoParametros
):
    files = serializers.ListField(required=True)

    def get_obj(self):
        return GerarDocumentoSerializerData(**self.validated_data)  # type: ignore


@dataclass
class GerarDocumentoComPDFProprioSerializerData(
    GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
):
    files: List[FileInfoSerializerData]