Spaces:
Sleeping
Sleeping
File size: 2,251 Bytes
9b3a68b 8ce4bd9 cc7fa79 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 520e96b 962d09d 8328d05 520e96b 55e862e 8ce4bd9 8328d05 8ce4bd9 9c73e01 520e96b 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 b303ff0 8ce4bd9 9c73e01 8ce4bd9 81398e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
#Required imports
import gradio as gr
import fitz # PyMuPDF
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
import scipy.io.wavfile
import numpy as np
# Initialize tokenizers and models
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
synthesiser = pipeline("text-to-speech", "suno/bark")
# Function to extract abstract from PDF
def extract_abstract(pdf_file):
# Access the byte content of the uploaded file
pdf_bytes = pdf_file["data"]
# Open PDF with fitz
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
first_page = doc[0].get_text()
start_idx = first_page.lower().find("abstract")
end_idx = first_page.lower().find("introduction")
if start_idx != -1 and end_idx != -1:
return first_page[start_idx:end_idx].strip()
else:
return "Abstract not found or 'Introduction' not found in the first page."
# Function to process text (summarize and convert to speech)
def process_text(pdf_file):
abstract_text = extract_abstract(pdf_file)
# Generate summary
inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True)
summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=40, min_length=10, length_penalty=2.0, early_stopping=True, no_repeat_ngram_size=2)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Convert summary to speech
speech = synthesiser(summary, forward_params={"do_sample": True})
audio_data = speech["audio"].squeeze()
normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
# Save audio to temporary file
output_file = "temp_output.wav"
scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)
return summary, output_file
# Gradio Interface
iface = gr.Interface(
fn=process_text,
inputs=gr.components.File(label="Upload PDF"),
outputs=["text", "audio"],
title="Summarization and Text-to-Speech",
description="Upload a PDF to extract, summarize its abstract, and convert to speech."
)
if __name__ == "__main__":
iface.launch() |