File size: 3,663 Bytes
1286e81
837e770
1286e81
12d3e1a
cb23311
1286e81
 
 
 
 
cb23311
 
 
 
 
 
7eb86f7
1286e81
cb23311
 
 
 
 
 
4a04d77
 
 
12d3e1a
 
1286e81
 
55f46c1
1286e81
 
 
78209bc
55f46c1
1286e81
 
 
55f46c1
1286e81
 
 
 
23087eb
b374298
8f3dc39
dc376b6
 
 
7eb86f7
 
 
 
4a04d77
 
 
7eb86f7
 
 
 
55f46c1
7eb86f7
 
 
78209bc
55f46c1
7eb86f7
 
 
55f46c1
7eb86f7
 
 
 
 
b374298
e70ffc1
3736ce1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from rest_framework import serializers
from _antigos.resumos.serializer import ResumoCursorSerializer
from _utils.gerar_relatorio_modelo_usuario.prompts import (
    prompt_gerar_documento,
    prompt_auxiliar_padrao,
)

user_message = "What are the main points of this document?"


class FileInfoSerializer(serializers.Serializer):
    unique_id = serializers.CharField(max_length=255)
    tipo_arquivo = serializers.CharField(max_length=255)
    link_arquivo = serializers.URLField()


class GerarDocumentoSerializer(ResumoCursorSerializer):
    system_prompt = None

    files = serializers.ListField(child=FileInfoSerializer(), required=True)
    bubble_editor_version = serializers.CharField(
        required=False, default="version-test"
    )  # Será o valor utilizado dentro da URL da requisição pro Bubble

    # prompt_auxiliar = serializers.CharField(
    #     required=False, default=prompt_auxiliar_padrao
    # )
    prompt_gerar_documento = serializers.CharField(
        required=False, default=prompt_gerar_documento
    )
    user_message = serializers.CharField(required=False, default=user_message)
    num_chunks_retrieval = serializers.IntegerField(default=20)
    embedding_weight = serializers.FloatField(default=0.5)
    bm25_weight = serializers.FloatField(default=0.5)
    context_window = serializers.IntegerField(default=3)
    chunk_overlap = serializers.IntegerField(default=800)
    num_k_rerank = serializers.IntegerField(default=20)
    model_cohere_rerank = serializers.CharField(
        required=False, default="rerank-english-v2.0"
    )
    more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
    claude_context_model = serializers.CharField(
        required=False, default="claude-3-haiku-20240307"
    )
    gpt_temperature = serializers.FloatField(default=0)
    id_modelo_do_usuario = serializers.IntegerField(required=False)
    should_have_contextual_chunks = serializers.BooleanField(default=False)  # type: ignore
    should_use_llama_parse = serializers.BooleanField(required=False, default=False)  # type: ignore
    llm_ultimas_requests = serializers.CharField(
        required=False, default="gemini-2.0-flash"
    )


class GerarDocumentoComPDFProprioSerializer(ResumoCursorSerializer):
    system_prompt = None
    # prompt_auxiliar = serializers.CharField(
    #     required=False, default=prompt_auxiliar_padrao
    # )
    prompt_gerar_documento = serializers.CharField(
        required=False, default=prompt_gerar_documento
    )
    user_message = serializers.CharField(required=False, default=user_message)
    num_chunks_retrieval = serializers.IntegerField(default=20)
    embedding_weight = serializers.FloatField(default=0.5)
    bm25_weight = serializers.FloatField(default=0.5)
    context_window = serializers.IntegerField(default=3)
    chunk_overlap = serializers.IntegerField(default=800)
    num_k_rerank = serializers.IntegerField(default=20)
    model_cohere_rerank = serializers.CharField(
        required=False, default="rerank-english-v2.0"
    )
    more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
    claude_context_model = serializers.CharField(
        required=False, default="claude-3-haiku-20240307"
    )
    gpt_temperature = serializers.FloatField(default=0)
    id_modelo_do_usuario = serializers.IntegerField(required=False, default=11)
    should_have_contextual_chunks = serializers.BooleanField(default=False)  # type: ignore
    should_use_llama_parse = serializers.BooleanField(required=False, default=False)  # type: ignore
    llm_ultimas_requests = serializers.CharField(required=False, default="gpt-4o-mini")