Spaces:
Running
Running
from rest_framework import serializers | |
from setup.environment import default_model | |
# from _utils.utils import DEFAULT_SYSTEM_PROMPT | |
prompt_template = """ | |
Based on the following context, provide multiple key points from the document. | |
For each point, create a new paragraph. | |
Each paragraph should be a complete, self-contained insight. | |
Context: {context} | |
Key points: | |
""" | |
class ResumoPDFSerializer(serializers.Serializer): | |
files = serializers.ListField(child=serializers.FileField(), required=True) | |
system_prompt = serializers.CharField(required=False) | |
user_message = serializers.CharField(required=False, default="") | |
model = serializers.CharField(required=False) | |
iterative_refinement = serializers.BooleanField(required=False, default=False) | |
class ResumoCursorSerializer(serializers.Serializer): | |
files = serializers.ListField(child=serializers.FileField(), required=True) | |
system_prompt = serializers.CharField(required=False, default=prompt_template) | |
user_message = serializers.CharField(required=False, default="") | |
model = serializers.CharField(required=False, default=default_model) | |
hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2") | |
chunk_size = serializers.IntegerField(required=False, default=1000) | |
chunk_overlap = serializers.IntegerField(required=False, default=200) | |
# system_prompt_relatorio = """ | |
# Based on the following context, provide multiple key points from the document. | |
# For each point, create a new paragraph. | |
# Each paragraph should be a complete, self-contained insight. | |
# Include any relevant context provided. | |
# Context: {context} | |
# Key points: | |
# """ | |
system_prompt_relatorio = """ | |
You are a language model specialized in producing concise and well-structured legal case summaries in Portuguese. You will receive a variable `context`, which contains information about a legal case. Your task is to read the `context` carefully and produce a summary report in Portuguese, following the specific format provided below. Do not include any additional comments or reasoning steps in your final answer. | |
**Instructions**: | |
1. **Chain of Thought**: Before producing your final answer, you must think through and plan your summary silently, without showing this reasoning in the final output. The final answer must only contain the required formatted report and nothing else. | |
2. **Reading the Context**: Extract the following information from `context`: | |
- The name of the defendant (réu). | |
- The crime they have been accused of (nome_do_crime). | |
- The applicable article and subsection of the Penal Code (artigo_e_inciso_do_crime). | |
- The date the accusation was accepted (data_do_recebimento). | |
- The ID of the decision document (id_do_documento). | |
3. **Prescriptive Details**: If no other interruptive or suspensive causes of prescription are mentioned, confirm that there are none. | |
4. **Formatting**: Your final answer must strictly follow the format below, in Portuguese, and replace the placeholders with the appropriate information: | |
``` | |
<formato> | |
Trata-se de Ação Penal em que o Ministério Público denunciou [nome_do_reu], pela prática do [nome_do_crime] [artigo_e_inciso_do_crime], do Código Penal. | |
A denúncia foi recebida em [data_do_recebimento], conforme Decisão [id_do_documento]. | |
Não há outras causas interruptivas ou suspensivas da prescrição. | |
</formato> | |
``` | |
5. **Completeness**: If any piece of required information is missing in the `context`, note that explicitly in the final answer within the format. | |
**Reminder**: | |
- Do not include your chain of thought in the final output. | |
- Do not add extra information or commentary beyond the specified format. | |
- The final answer must be in Portuguese. | |
--- | |
**Contextual Information (provided separately):** | |
{context} | |
--- | |
**Example with a given context**: | |
- Input: | |
`context` = "Em 10/03/2021, o Ministério Público denunciou João da Silva, imputando-lhe o crime de furto qualificado, previsto no art. 155, §4º, inciso II, do Código Penal. A denúncia foi recebida em 12/03/2021, conforme Decisão nº 20210312-01. Não há menção a qualquer causa interruptiva ou suspensiva da prescrição." | |
- Expected final answer: | |
``` | |
<formato> | |
Trata-se de Ação Penal em que o Ministério Público denunciou João da Silva, pela prática do furto qualificado (art. 155, §4º, inciso II do Código Penal). | |
A denúncia foi recebida em 12/03/2021, conforme Decisão 20210312-01. | |
Não há outras causas interruptivas ou suspensivas da prescrição. | |
</formato> | |
""" | |
user_message = "What are the main points of this document?" | |
# system_prompt_modelo = """ | |
# Based on the following context, provide multiple key points from the document. | |
# For each point, create a new paragraph. | |
# Each paragraph should be a complete, self-contained insight. | |
# Include any relevant context provided. | |
# Context: {context} | |
# Modelo do usuário: {modelo_usuario} | |
# Key points: | |
# """ | |
system_prompt_modelo = """ | |
You are a large language model that must produce a single final sentence in **Portuguese**. To do this, you will follow a private chain of thought and then produce a final answer. The final answer must follow the formatting and stylistic conventions shown in the user-provided model `user's template`. The information to be included in the final sentence is derived from the `context` (a report describing a legal case). | |
**Contextual Information (provided separately):** | |
{context} | |
**User Model (provided separately):** | |
{modelo_do_usuario} | |
**Instructions:** | |
1. **Goal:** Produce one single final sentence in Portuguese that matches the structure, format, and style given by `user's template`. | |
2. **Chain of Thought (private to the assistant and not to be shown in the final answer):** | |
- Carefully review the `context` which is a legal report of a case. | |
- Identify: | |
- The defendant’s name. | |
- The crime’s name, its article, and any subsection (inciso). | |
- The date of receipt of the complaint (data do recebimento da denúncia). | |
- The document ID. | |
- Ensure these elements are correctly incorporated into the final sentence. | |
- Check compliance with the formatting style indicated by `user's template`. | |
- Compose the sentence following the structure from the user model. | |
- Use reflection: Before finalizing the answer, reassess if all required information is included, if the format matches the user model, and if the sentence is written correctly in Portuguese. | |
3. **Reflection Technique (private):** | |
After composing the sentence, but before presenting it as the final answer, reflect if: | |
- All required details from the `context` are accurately included. | |
- The sentence format strictly matches the pattern of `user's template`. | |
- The sentence is grammatically correct in Portuguese. | |
4. **Final Answer:** | |
- After completing the chain of thought and ensuring correctness through reflection, present only the final sentence in Portuguese. | |
- Do not show the chain of thought or the reflection step. Only the final formatted sentence should be visible to the user. | |
""" | |
class ResumoCursorCompeltoSerializer(ResumoCursorSerializer): | |
system_prompt = None | |
prompt_relatorio = serializers.CharField(required=False, default=system_prompt_relatorio) | |
prompt_modelo = serializers.CharField(required=False, default=system_prompt_modelo) | |
user_message = serializers.CharField(required=False, default=user_message) | |
num_chunks_retrieval = serializers.IntegerField(default=5) | |
embedding_weight = serializers.FloatField(default=0.5) | |
bm25_weight = serializers.FloatField(default=0.5) | |
context_window = serializers.IntegerField(default=3) | |
chunk_overlap = serializers.IntegerField(default=200) | |
num_k_rerank = serializers.IntegerField(default=5) | |
model_cohere_rerank = serializers.CharField(required=False, default="rerank-english-v2.0") | |
more_initial_chunks_for_reranking = serializers.IntegerField(default=20) | |
claude_context_model = serializers.CharField(required=False, default="claude-3-haiku-20240307") | |
gpt_temperature = serializers.FloatField(default=0) | |
id_modelo_do_usuario = serializers.IntegerField(required=True) | |
class RagasSerializer(ResumoCursorCompeltoSerializer): | |
files = serializers.ListField(child=serializers.FileField(), required=True) | |
id_modelo_do_usuario = serializers.IntegerField(required=False) | |
hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2") | |
class RagasFromTextSerializer(ResumoCursorCompeltoSerializer): | |
files = None | |
id_modelo_do_usuario = serializers.IntegerField(required=False, default=9) | |
user_message = serializers.CharField(required=True) | |
context_provided = serializers.CharField(required=False) | |