Spaces:
Sleeping
Sleeping
File size: 2,464 Bytes
9b3a68b 8ce4bd9 cc7fa79 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 0cdfeaa 520e96b 55e862e 8ce4bd9 8328d05 8ce4bd9 9c73e01 0cdfeaa 6c26e5d ec7206a 6c26e5d ec7206a 674f46b 6c26e5d badf9d9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 b303ff0 8ce4bd9 9c73e01 8ce4bd9 81398e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
#Required imports
import gradio as gr
import fitz # PyMuPDF
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
import scipy.io.wavfile
import numpy as np
# Initialize tokenizers and models
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
synthesiser = pipeline("text-to-speech", "suno/bark")
# Function to extract abstract from PDF
def extract_abstract(pdf_bytes):
# Open PDF with fitz
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
first_page = doc[0].get_text()
start_idx = first_page.lower().find("abstract")
end_idx = first_page.lower().find("introduction")
if start_idx != -1 and end_idx != -1:
return first_page[start_idx:end_idx].strip()
else:
return "Abstract not found or 'Introduction' not found in the first page."
# Function to process text (summarize and convert to speech)
def process_text(uploaded_file):
# Attempt to extract byte content from NamedString object
try:
pdf_bytes = uploaded_file.file.read()
except AttributeError:
print("Error reading file content from uploaded_file")
return "File content could not be retrieved", None
# Extract abstract from PDF
abstract_text = extract_abstract(pdf_bytes)
# Generate summary
inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True)
summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=40, min_length=10, length_penalty=2.0, early_stopping=True, no_repeat_ngram_size=2)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Convert summary to speech
speech = synthesiser(summary, forward_params={"do_sample": True})
audio_data = speech["audio"].squeeze()
normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
# Save audio to temporary file
output_file = "temp_output.wav"
scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)
return summary, output_file
# Gradio Interface
iface = gr.Interface(
fn=process_text,
inputs=gr.components.File(label="Upload PDF"),
outputs=["text", "audio"],
title="Summarization and Text-to-Speech",
description="Upload a PDF to extract, summarize its abstract, and convert to speech."
)
if __name__ == "__main__":
iface.launch() |