import os import requests import random import torch from bs4 import BeautifulSoup from peft import PeftConfig, PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, AutoModel from datasets import DatasetDict # os.environ["CUDA_VISIBLE_DEVICES"] = "0" generation_config = GenerationConfig(temperature=.8, top_p=0.75, top_k=40) device = 'cuda' shared = { 'answer_context': None, 'embeddings_dataset': None } def get_nearest_examples(question: str, k: int): print(['get_nearest_examples', 'start']) question_embedding = get_embeddings([question]).cpu().detach().numpy() embeddings_dataset = shared['embeddings_dataset'] scores, samples = embeddings_dataset.get_nearest_examples( "embeddings", question_embedding, k) print(['get_nearest_examples', 'scores and samples']) for i in range(len(scores)): print([scores[i], samples[i]]) print(['get_nearest_examples', 'end']) return samples def get_embeddings(text): print(['get_embeddings', 'start']) encoded_input = tokenizer( text, padding=True, truncation=True, return_tensors="pt") encoded_input = {k: v.to('cuda') for k, v in encoded_input.items()} model_output = model(**encoded_input) model_output = model_output.last_hidden_state[:, 0] emb_item = model_output.detach().cpu().numpy()[0] print(['get_embeddings', 'end']) return emb_item def build_faiss_index(text): print(['build_faiss_index', 'start']) text_list = split_text(text) emb_list = [] for item in text_list: emb_list.append({"embeddings": get_embeddings(item)}) dataset = DatasetDict({'train': emb_list}) dataset.add_faiss_index(column="embeddings") shared['embeddings_dataset'] = dataset print(['build_faiss_index', 'end']) def extract_text(url: str): print(['extract_text', 'start']) if url is None or url.strip() == '': return '' response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p'))) print(['extract_text', 'end']) return text def split_text(text: str): lines = text.split('\n') lines = [line.strip() for line in lines if line.strip()] return lines def summarize_text(text: str): print(['summarize_text', 'start']) input_text = f'Instruction: Elabora un resume del siguiente texto.\nInput: {text}\nOutput: ' batch = tokenizer(input_text, return_tensors='pt') batch = batch.to(device) print(['summarize_text', 'generating']) with torch.cuda.amp.autocast(): output_tokens = model.generate(**batch, max_new_tokens=512, generation_config=generation_config ) output = tokenizer.decode(output_tokens[0], skip_special_tokens=True) output = output.replace(input_text, '') print(['summarize_text', 'end']) return output def generate_question(text: str): print(['generate_question', 'start']) # Get a random section of the whole text to generate a question fragments = split_text(text) rnd_text = random.choice(fragments) shared['answer_context'] = rnd_text input_text = f'Instruction: Dado el siguiente texto quiero que generes una pregunta cuya respuesta se encuentre en él.\nInput: {rnd_text}\nOutput: ' batch = tokenizer(input_text, return_tensors='pt') print(['generate_question', 'generating']) with torch.cuda.amp.autocast(): output_tokens = model.generate(**batch, max_new_tokens=256, generation_config=generation_config) output = tokenizer.decode(output_tokens[0], skip_special_tokens=True) print(['generate_question', 'end']) return output def get_answer_context(): return shared['answer_context'] def answer_question(full_text: str, question: str): print(['answer_question', 'start']) if not shared['embeddings_dataset']: build_faiss_index(full_text) top_k_samples = get_nearest_examples(question, k=5) context = '\n'.join(top_k_samples) input_text = f"""Instruction: Te voy a proporcionar un texto del cual deseo que me respondas una pregunta. El texto es el siguiente: `{context}`\nInput: {question}\nOutput: """ batch = tokenizer(input_text, return_tensors='pt') print(['answer_question', 'generating']) with torch.cuda.amp.autocast(): output_tokens = model.generate(**batch, max_new_tokens=256, generation_config=generation_config) output = tokenizer.decode(output_tokens[0], skip_special_tokens=True) print(['answer_question', 'end']) return output def load_model(peft_model_id): print(['load_model', 'start']) config = PeftConfig.from_pretrained(peft_model_id) print(['load_model', 'loading model']) model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto') print(['load_model', 'loading tokenizer']) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(model, peft_model_id) model.config.use_cache = True print(['load_model', 'end']) return model, tokenizer def load_embeddings_model(): print(['load_embeddings_model', 'start']) model_ckpt = "sentence-transformers/multi-qa-mpnet-base-dot-v1" print(['load_embeddings_model', 'loading tokenizer']) tokenizer = AutoTokenizer.from_pretrained(model_ckpt) print(['load_embeddings_model', 'loading model']) model = AutoModel.from_pretrained(model_ckpt) model = model.to(device) print(['load_embeddings_model', 'end']) return model, tokenizer model, tokenizer = load_model( "hackathon-somos-nlp-2023/opt-6.7b-lora-sag-t3000-v300-v2") emb_model, emb_tokenizer = load_embeddings_model()