Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
import PyPDF2
|
2 |
import gradio as gr
|
3 |
import json
|
4 |
-
from transformers import pipeline
|
5 |
-
from datasets import DatasetDict, Dataset
|
6 |
|
7 |
# Função para extrair texto do PDF
|
8 |
def extract_text_from_pdf(pdf_file):
|
@@ -12,11 +11,17 @@ def extract_text_from_pdf(pdf_file):
|
|
12 |
text += page.extract_text()
|
13 |
return text
|
14 |
|
15 |
-
# Função para gerar perguntas e respostas usando
|
16 |
def generate_qa_pairs(text):
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# Função para converter os pares de QA no formato SQuAD
|
22 |
def convert_to_squad_format(qas, context):
|
@@ -61,4 +66,4 @@ with gr.Blocks() as demo:
|
|
61 |
|
62 |
process_button.click(fn=process_pdf, inputs=[pdf_file, file_name], outputs=download_link)
|
63 |
|
64 |
-
demo.launch()
|
|
|
1 |
import PyPDF2
|
2 |
import gradio as gr
|
3 |
import json
|
4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
5 |
|
6 |
# Função para extrair texto do PDF
|
7 |
def extract_text_from_pdf(pdf_file):
|
|
|
11 |
text += page.extract_text()
|
12 |
return text
|
13 |
|
14 |
+
# Função para gerar perguntas e respostas usando um modelo da Hugging Face
|
15 |
def generate_qa_pairs(text):
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qg-hl")
|
17 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qg-hl")
|
18 |
+
|
19 |
+
input_text = "highlight: " + text
|
20 |
+
input_ids = tokenizer.encode(input_text, return_tensors="pt")
|
21 |
+
outputs = model.generate(input_ids)
|
22 |
+
questions = tokenizer.decode(outputs[0])
|
23 |
+
|
24 |
+
return questions
|
25 |
|
26 |
# Função para converter os pares de QA no formato SQuAD
|
27 |
def convert_to_squad_format(qas, context):
|
|
|
66 |
|
67 |
process_button.click(fn=process_pdf, inputs=[pdf_file, file_name], outputs=download_link)
|
68 |
|
69 |
+
demo.launch()
|