muryshev's picture
init
57cf043
import os
import re
from logging import Logger
from typing import List, Union
from openai import OpenAI
from common.configuration import FilterChunks, LLMConfiguration, SummaryChunks
from components.nmd.aggregate_answers import preprocessed_chunks
class LLMChunkSearch:
def __init__(self, config: LLMConfiguration, prompt: str, logger: Logger):
self.config = config
self.logger = logger
self.prompt = prompt
self.pattern = r'\d+'
self.pattern_list = [
r'\[\d+\]',
r'Ответ: [1-9]',
r'Ответ [1-9]',
r'Ответ[1-9]',
r'Ответ:[1-9]',
r'Ответ: \[\d+\]',
]
# Initialize OpenAI client
if self.config.base_url is not None:
self.client = OpenAI(
base_url=self.config.base_url,
api_key=os.getenv(self.config.api_key_env)
)
else:
self.client = None
def llm_chunk_search(self, query: str, answer_chunks: SummaryChunks, prompt: str):
"""
Args:
query: User query
answer_chunks: Retrieved chunks to process
prompt: System prompt template
Returns:
Tuple containing processed chunks, LLM response, prompt used, and token count
"""
text_chunks = preprocessed_chunks(
answer_chunks, self.config.base_url, self.logger
)
self.logger.info('Searching LLM Chunks')
if self.client is None:
return (
text_chunks,
self.__postprocessing_answer_llm(answer_chunks),
prompt,
0
)
llm_prompt = prompt.format(query=query, answer=text_chunks)
for i in range(5):
try:
response = self.client.chat.completions.create(
model=self.config.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": query}
],
temperature=self.config.temperature,
top_p=self.config.top_p,
frequency_penalty=self.config.frequency_penalty,
presence_penalty=self.config.presence_penalty,
seed=self.config.seed
)
answer_llm = response.choices[0].message.content
count_tokens = response.usage.total_tokens
self.logger.info(f'Answer LLM {answer_llm}')
# Process the response
if re.search('%%', answer_llm):
index = re.search('%%', answer_llm).span()[1]
answer_llm = answer_llm[index:]
if re.search('Конец ответа', answer_llm):
index = re.search('Конец ответа', answer_llm).span()[1]
answer_llm = answer_llm[:index]
return text_chunks, answer_llm, llm_prompt, count_tokens
except Exception as e:
self.logger.error(f"Attempt {i+1} failed: {str(e)}")
if i == 4:
self.logger.error("All attempts failed")
return (
text_chunks,
self.__postprocessing_answer_llm(answer_chunks),
llm_prompt,
0
)
@staticmethod
def __postprocessing_answer_llm(answer_chunks: Union[SummaryChunks, List]) -> str:
"""
Postprocess the answer chunks into a formatted string
Args:
answer_chunks: Chunks to process
Returns:
Formatted string response
"""
output_text = ''
if isinstance(answer_chunks, SummaryChunks):
if len(answer_chunks.doc_chunks) == 0:
# TODO: Протестировать как работает и исправить на уведомление о БД и ли
return 'БАЗА ДАННЫХ ПУСТА'
if answer_chunks.doc_chunks is not None:
doc = answer_chunks.doc_chunks[0]
output_text += f'Документ: [1]\n'
if doc.title != 'unknown':
output_text += f'Название документа: {doc.title}\n'
else:
output_text += f'Название документа: {doc.filename}\n'
for chunk in doc.chunks:
if len(chunk.other_info):
for i in chunk.other_info:
output_text += f'{i}'
else:
output_text += f'{chunk.text_answer}'
output_text += '\n\n'
else:
doc = answer_chunks.people_search[0]
output_text += (
f'Название документа: Информация о сотруднике {doc.person_name}\n'
)
if doc.organizatinal_structure is not None:
for organizatinal_structure in doc.organizatinal_structure:
output_text += '('
if organizatinal_structure.position != 'undefined':
output_text += (
f'Должность: {organizatinal_structure.position}\n'
)
if organizatinal_structure.leads is not None:
output_text += f'Руководит следующими сотрудниками:\n'
for lead in organizatinal_structure.leads:
if lead.person != "undefined":
output_text += f'{lead.person}\n'
if (
organizatinal_structure.subordinates.person_name
!= "undefined"
):
output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n'
output_text += ')'
if doc.business_processes is not None:
if len(doc.business_processes) >= 2:
output_text += f'Отвечает за Бизнес процессы:\n'
else:
output_text += f'Отвечает за Бизнес процесс: '
for process in doc.business_processes:
output_text += f'{process.processes_name}\n'
if doc.business_curator is not None:
output_text += 'Является Бизнес-куратором (РОКС НН):\n'
for curator in doc.business_curator:
output_text += f'{curator.company_name}'
if doc.groups is not None:
if len(doc.groups) >= 2:
output_text += 'Входит в состав групп:\n'
else:
output_text += 'Входит в состав группы:\n'
for group in doc.groups:
if 'Члены' in group.position_in_group:
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n'
else:
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n'
output_text += f'\\\n\n'
else:
if isinstance(answer_chunks[0], FilterChunks):
doc = answer_chunks[0]
output_text += f'Документ: [1]\n'
if doc.title != 'unknown':
output_text += f'Название документа: {doc.title}\n'
for chunk in doc.chunks:
if len(chunk.other_info):
for i in chunk.other_info:
output_text += f'{i}'
else:
output_text += f'{chunk.text_answer}'
output_text += '\n\n'
else:
doc = answer_chunks[0]
output_text += f'Информация о сотруднике {doc.person_name}\n'
if doc.organizatinal_structure is not None:
for organizatinal_structure in doc.organizatinal_structure:
output_text += (
f'Должность: {organizatinal_structure.position}\n'
)
if organizatinal_structure.leads is not None:
output_text += f'Руководит следующими сотрудниками:\n'
for lead in organizatinal_structure.leads:
if lead.person != "undefined":
output_text += f'{lead.person}\n'
if (
organizatinal_structure.subordinates.person_name
!= "undefined"
):
output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n'
if doc.business_processes is not None:
if len(doc.business_processes) >= 2:
output_text += f'Отвечает за Бизнес процессы:\n'
else:
output_text += f'Отвечает за Бизнес процесс: '
for process in doc.business_processes:
output_text += f'{process.processes_name}\n'
if doc.business_curator is not None:
output_text += 'Является Бизнес-куратором (РОКС НН):\n'
for curator in doc.business_curator:
output_text += f'{curator.company_name}'
if doc.groups is not None:
if len(doc.groups) >= 2:
output_text += 'Входит в состав групп:\n'
else:
output_text += 'Входит в состав группы:\n'
for group in doc.groups:
if 'Члены' in group.position_in_group:
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n'
else:
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n'
output_text += f'\\\n\n'
return output_text