igoracmorais commited on
Commit
e5c536d
·
verified ·
1 Parent(s): ad45712

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import PyPDF2
2
  import gradio as gr
3
  import json
4
- from transformers import pipeline
5
- from datasets import DatasetDict, Dataset
6
 
7
  # Função para extrair texto do PDF
8
  def extract_text_from_pdf(pdf_file):
@@ -12,11 +11,17 @@ def extract_text_from_pdf(pdf_file):
12
  text += page.extract_text()
13
  return text
14
 
15
- # Função para gerar perguntas e respostas usando o pipeline da Hugging Face
16
  def generate_qa_pairs(text):
17
- qa_pipeline = pipeline("question-generation")
18
- qas = qa_pipeline(text)
19
- return qas
 
 
 
 
 
 
20
 
21
  # Função para converter os pares de QA no formato SQuAD
22
  def convert_to_squad_format(qas, context):
@@ -61,4 +66,4 @@ with gr.Blocks() as demo:
61
 
62
  process_button.click(fn=process_pdf, inputs=[pdf_file, file_name], outputs=download_link)
63
 
64
- demo.launch()
 
1
  import PyPDF2
2
  import gradio as gr
3
  import json
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 
5
 
6
  # Função para extrair texto do PDF
7
  def extract_text_from_pdf(pdf_file):
 
11
  text += page.extract_text()
12
  return text
13
 
14
+ # Função para gerar perguntas e respostas usando um modelo da Hugging Face
15
  def generate_qa_pairs(text):
16
+ tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qg-hl")
17
+ model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qg-hl")
18
+
19
+ input_text = "highlight: " + text
20
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
21
+ outputs = model.generate(input_ids)
22
+ questions = tokenizer.decode(outputs[0])
23
+
24
+ return questions
25
 
26
  # Função para converter os pares de QA no formato SQuAD
27
  def convert_to_squad_format(qas, context):
 
66
 
67
  process_button.click(fn=process_pdf, inputs=[pdf_file, file_name], outputs=download_link)
68
 
69
+ demo.launch()