Spaces:

amendolajine
/

OPIT

Running

App Files Files Community

OPIT / app.py

amendolajine

Update app.py

0cdfeaa over 1 year ago

raw

history blame

2.27 kB

	#Required imports
	import gradio as gr
	import fitz # PyMuPDF
	from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
	import scipy.io.wavfile
	import numpy as np

	# Initialize tokenizers and models
	tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
	model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
	synthesiser = pipeline("text-to-speech", "suno/bark")

	# Function to extract abstract from PDF
	def extract_abstract(pdf_bytes):
	# Open PDF with fitz
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")
	first_page = doc[0].get_text()
	start_idx = first_page.lower().find("abstract")
	end_idx = first_page.lower().find("introduction")
	if start_idx != -1 and end_idx != -1:
	return first_page[start_idx:end_idx].strip()
	else:
	return "Abstract not found or 'Introduction' not found in the first page."

	# Function to process text (summarize and convert to speech)
	def process_text(uploaded_file):
	# Extract the file data (byte content) from the uploaded file
	pdf_bytes = uploaded_file["data"]
	abstract_text = extract_abstract(pdf_bytes)

	# Generate summary
	inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True)
	summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=40, min_length=10, length_penalty=2.0, early_stopping=True, no_repeat_ngram_size=2)
	summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

	# Convert summary to speech
	speech = synthesiser(summary, forward_params={"do_sample": True})
	audio_data = speech["audio"].squeeze()
	normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)

	# Save audio to temporary file
	output_file = "temp_output.wav"
	scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)

	return summary, output_file

	# Gradio Interface
	iface = gr.Interface(
	fn=process_text,
	inputs=gr.components.File(label="Upload PDF"),
	outputs=["text", "audio"],
	title="Summarization and Text-to-Speech",
	description="Upload a PDF to extract, summarize its abstract, and convert to speech."
	)

	if __name__ == "__main__":
	iface.launch()