learning-assistance / functions.py
milyiyo's picture
Add dummy functions for missing actions
a035375
raw
history blame
2.49 kB
import os
import requests
import torch
from bs4 import BeautifulSoup
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
generation_config = GenerationConfig(temperature=.8,
top_p=0.75,
top_k=40)
def extract_text(url: str):
print(['extract_text', 'start'])
if url is None or url.strip() == '':
return ''
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
print(['extract_text', 'end'])
return text
def summarize_text(text: str):
print(['summarize_text', 'start'])
input_text = f'<s>Instruction: Elabora un resume del siguiente texto.\nInput: {text}\nOutput: '
batch = tokenizer(input_text, return_tensors='pt')
batch = batch.to('cuda')
print(['summarize_text', 'generating'])
with torch.cuda.amp.autocast():
output_tokens = model.generate(**batch,
max_new_tokens=512,
generation_config=generation_config
)
output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
output = output.replace(input_text, '')
print(['summarize_text', 'end'])
return output
def generate_question(text:str):
return 'Pregunta de ejemplo.'
def get_answer_context():
return 'Aquí está la respuesta.'
def answer_question(question:str):
return 'Esta es la respuesta a su pregunta.'
def load_model(peft_model_id):
print(['load_model', 'start'])
config = PeftConfig.from_pretrained(peft_model_id)
print(['load_model', 'loading model'])
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
return_dict=True,
load_in_8bit=True,
device_map='auto')
print(['load_model', 'loading tokenizer'])
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id)
model.config.use_cache = True
print(['load_model', 'end'])
return model, tokenizer
model, tokenizer = load_model(
"hackathon-somos-nlp-2023/opt-6.7b-lora-sag-t3000-v300-v2")