import requests from bs4 import BeautifulSoup import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig generation_config = GenerationConfig(temperature=.8, top_p=0.75, top_k=40) def extract_text(url: str): print(['extract_text', 'start']) if url is None or url.strip() == '': return '' response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p'))) print(['extract_text', 'end']) return text def summarize_text(text: str): print(['summarize_text', 'start']) input_text = f'Instruction: Elabora un resume del siguiente texto.\nInput: {text}\nOutput: ' batch = tokenizer(input_text, return_tensors='pt') print(['summarize_text', 'generating']) with torch.cuda.amp.autocast(): output_tokens = model.generate(**batch, max_new_tokens=256, generation_config=generation_config ) output = tokenizer.decode(output_tokens[0], skip_special_tokens=True) print(['summarize_text', 'end']) return output def load_model(peft_model_id): print(['load_model', 'start']) config = PeftConfig.from_pretrained(peft_model_id) print(['load_model', 'loading model']) model = AutoModelForCausalLM.from_pretrained( config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto') print(['load_model', 'loading tokenizer']) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(model, peft_model_id) model.config.use_cache = True print(['load_model', 'end']) return model, tokenizer model, tokenizer = load_model("milyiyo/opt-6.7b-lora-sag-t3000-v300-v2")