Spaces:
Runtime error
Runtime error
File size: 6,123 Bytes
c1b9d7d bef439a c1b9d7d bef439a c1b9d7d 90350be c1b9d7d fcee375 c1b9d7d 5968428 c1b9d7d 1aa8621 c1b9d7d 46db2ba c1b9d7d 0074c68 15291b0 13b79aa 8ee4f89 e2e14fd 189514b 90350be 9a708c4 8336e59 15291b0 8336e59 39105ec c1b9d7d 9bc33fc c1b9d7d b306dde 8ee4f89 c1b9d7d b306dde 8ee4f89 c1b9d7d 0074c68 507bb54 0074c68 c1b9d7d 189514b aa4c7fd 0f34a00 189514b c1b9d7d bef439a c1b9d7d 918bd10 88f594a 918bd10 dc9cd8d 918bd10 dc9cd8d 918bd10 c1b9d7d bef439a 8ee4f89 c1b9d7d 8ee4f89 c1b9d7d effac9d c1b9d7d 9bc33fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import transformers
from transformers import (
# Text2TextGenerationPipeline,
AutoModelForSeq2SeqLM as alwm,
# TokenClassificationPipeline,
# AutoModelForTokenClassification,
AutoModelForQuestionAnswering as amqa,
AutoTokenizer as att,
# BertTokenizer,
AlbertTokenizer,
# BertForQuestionAnswering,
# AlbertForQuestionAnswering,
# T5Config,
# T5ForConditionalGeneration,
T5TokenizerFast,
PreTrainedTokenizer,
PreTrainedModel,
ElectraTokenizer as et,
# ElectraForQuestionAnswering
)
import torch
import sentencepiece
import string
import numpy as np
from transformers import pipeline
# from transformers.pipelines import pipeline
import pickle
import streamlit as st
# sq_tokenizer = att.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
# sq_model = alwm.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
# text= "The abolition of feudal privileges by the National Constituent Assembly on 4 August 1789 and the Declaration \\nof the Rights of Man and of the Citizen (La Déclaration des Droits de l'Homme et du Citoyen), drafted by Lafayette \\nwith the help of Thomas Jefferson and adopted on 26 August, paved the way to a Constitutional Monarchy \\n(4 September 1791 – 21 September 1792). Despite these dramatic changes, life at the court continued, while the situation \\nin Paris was becoming critical because of bread shortages in September. On 5 October 1789, a crowd from Paris descended upon Versailles \\nand forced the royal family to move to the Tuileries Palace in Paris, where they lived under a form of house arrest under \\nthe watch of Lafayette's Garde Nationale, while the Comte de Provence and his wife were allowed to reside in the \\nPetit Luxembourg, where they remained until they went into exile on 20 June 1791."
# hftokenizer = pickle.load(open('models/hftokenizer.sav', 'rb'))
# hfmodel = pickle.load(open('models/hfmodel.sav', 'rb'))
def load_model():
hfm = pickle.load(open('hfmodel.sav','rb'))
hft = T5TokenizerFast.from_pretrained("t5-base")
model = pickle.load(open('model.sav','rb'))
tok = AlbertTokenizer.from_pretrained("ahotrod/albert_xxlargev1_squad2_512")
# return hfm, hft,tok, model
return hfm, hft, tok, model
hfmodel, hftokenizer, tokenizer, model = load_model()
def run_model(input_string, **generator_args):
generator_args = {
"max_length": 256,
"num_beams": 4,
"length_penalty": 1.5,
"no_repeat_ngram_size": 3,
"early_stopping": True,
}
# tokenizer = att.from_pretrained("ThomasSimonini/t5-end2end-question-generation")
input_string = "generate questions: " + input_string + " </s>"
input_ids = hftokenizer.encode(input_string, return_tensors="pt")
res = hfmodel.generate(input_ids, **generator_args)
output = hftokenizer.batch_decode(res, skip_special_tokens=True)
output = [item.split("<sep>") for item in output]
return output
# al_tokenizer = att.from_pretrained("deepset/electra-base-squad2")
# al_model = amqa.from_pretrained("deepset/electra-base-squad2")
# al_model = pickle.load(open('models/al_model.sav', 'rb'))
# al_tokenizer = pickle.load(open('models/al_tokenizer.sav', 'rb'))
def QA(question, context):
# model_name="deepset/electra-base-squad2"
# nlp = pipeline("question-answering",model=model,tokenizer = tok)
# format = {
# 'question':question,
# 'context':context
# }
# res = nlp(format)
# output = f"{question}\n{string.capwords(res['answer'])}\n"
# return output
inputs = tokenizer(question, context, return_tensors="pt")
# Run the model, the deepset way
with torch.no_grad():
output = model(**inputs)
start_score = output.start_logits
end_score = output.end_logits
#Get the rel scores for the context, and calculate the most probable begginign using torch
start = torch.argmax(start_score)
end = torch.argmax(end_score)
#cinvert tokens to strings
# output = tokenizer.decode(input_ids[start:end+1], skip_special_tokens=True)
predict_answer_tokens = inputs.input_ids[0, start : end + 1]
output = tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
output = string.capwords(output)
if output.isspace() or len(output) == 0:
return f"Possible question : {question}\n Answer could not be generated accurately."
return f"Q. {question} \n Ans. {output}"
# QA("What was the first C program","The first prgram written in C was Hello World")
def gen_question(inputs):
questions = run_model(inputs)
return questions
# string_query = "Hello World"
# gen_question(f"answer: {string_query} context: The first C program said {string_query} "). #The format of the query to generate questions
def tokenize(inputs) :
inputs = hftokenizer.batch_encode_plus(
inputs,
max_length=512,
add_special_tokens=True,
truncation=True,
# padding="max_length",
pad_to_max_length=True,
return_tensors="pt"
)
return inputs
def read_file(filepath_name):
with open(text, "r") as infile:
contents = infile.read()
context = contents.replace("\n", " ")
return context
def create_string_for_generator(context):
gen_list = gen_question(context)
return (gen_list[0][0]).split('? ')
def creator(context):
questions = create_string_for_generator(context)
# questions = questions.split('?')
pairs = []
for ques in questions:
pair = QA(ques,context)
if len(pair) == 0:
continue
pairs.append(pair)
return pairs
# creator(""""Hello, World!" program by Brian Kernighan (1978).
# A "Hello, World!" program is generally a computer program that ignores any input and outputs or displays a message similar to "Hello, World!". A small piece of code in most general-purpose programming languages, this program is used to illustrate a language's basic syntax. "Hello, World!" programs are often the first a student learns to write in a given language,[1] and they can also be used as a sanity check to ensure computer software intended to compile or run source code is correctly installed, and that its operator understands how to use it.
# """)
|