Spaces:
Sleeping
Sleeping
File size: 2,467 Bytes
9b3a68b 8ce4bd9 cc7fa79 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 0cdfeaa 520e96b 55e862e 8ce4bd9 8328d05 8ce4bd9 9c73e01 0cdfeaa badf9d9 0cdfeaa 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 9c73e01 8ce4bd9 b303ff0 8ce4bd9 9c73e01 8ce4bd9 81398e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
#Required imports
import gradio as gr
import fitz # PyMuPDF
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
import scipy.io.wavfile
import numpy as np
# Initialize tokenizers and models
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
synthesiser = pipeline("text-to-speech", "suno/bark")
# Function to extract abstract from PDF
def extract_abstract(pdf_bytes):
# Open PDF with fitz
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
first_page = doc[0].get_text()
start_idx = first_page.lower().find("abstract")
end_idx = first_page.lower().find("introduction")
if start_idx != -1 and end_idx != -1:
return first_page[start_idx:end_idx].strip()
else:
return "Abstract not found or 'Introduction' not found in the first page."
# Function to process text (summarize and convert to speech)
def process_text(uploaded_file):
# Extract the file data (byte content) from the uploaded file
# Check if 'content' key exists, and use it to access the file's byte content
if "content" in uploaded_file:
pdf_bytes = uploaded_file["content"]
else:
return "File content could not be retrieved", None
abstract_text = extract_abstract(pdf_bytes)
# Generate summary
inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True)
summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=40, min_length=10, length_penalty=2.0, early_stopping=True, no_repeat_ngram_size=2)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Convert summary to speech
speech = synthesiser(summary, forward_params={"do_sample": True})
audio_data = speech["audio"].squeeze()
normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
# Save audio to temporary file
output_file = "temp_output.wav"
scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)
return summary, output_file
# Gradio Interface
iface = gr.Interface(
fn=process_text,
inputs=gr.components.File(label="Upload PDF"),
outputs=["text", "audio"],
title="Summarization and Text-to-Speech",
description="Upload a PDF to extract, summarize its abstract, and convert to speech."
)
if __name__ == "__main__":
iface.launch() |