Spaces:

amendolajine
/

OPIT

Running

App Files Files Community

OPIT / app.py

amendolajine

Update app.py

7e93398 over 1 year ago

raw

history blame

3.15 kB

	import logging
	import gradio as gr
	import fitz # PyMuPDF
	from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
	import scipy.io.wavfile
	import numpy as np

	# Initialize logging
	logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

	# Initialize tokenizers and models
	tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
	model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
	synthesiser = pipeline("text-to-speech", "suno/bark")

	def extract_abstract(pdf_bytes):
	try:
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")
	first_page = doc[0].get_text()
	start_idx = first_page.lower().find("abstract")
	end_idx = first_page.lower().find("introduction")
	if start_idx != -1 and end_idx != -1:
	return first_page[start_idx:end_idx].strip()
	else:
	return "Abstract not found or 'Introduction' not found in the first page."
	except Exception as e:
	logging.error(f"Error extracting abstract: {e}")
	return "Error in abstract extraction"

	def process_text(uploaded_file):
	# Debugging: Print the type and contents of the uploaded_file
	print(f"Uploaded file type: {type(uploaded_file)}")
	print(f"Uploaded file content: {uploaded_file}")

	# Check if uploaded_file is a dictionary with 'data' key
	if isinstance(uploaded_file, dict) and 'data' in uploaded_file:
	pdf_bytes = uploaded_file['data']
	else:
	print("Uploaded file is not in the expected format")
	return "File content could not be retrieved", None

	try:
	abstract_text = extract_abstract(pdf_bytes)
	logging.info(f"Extracted abstract: {abstract_text[:100]}...") # Log first 100 chars of abstract
	except Exception as e:
	logging.error(f"Error in abstract extraction: {e}")
	return "Error in processing PDF", None

	try:
	inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True)
	summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=40, min_length=10, length_penalty=2.0, early_stopping=True, no_repeat_ngram_size=2)
	summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

	speech = synthesiser(summary, forward_params={"do_sample": True})
	audio_data = speech["audio"].squeeze()
	normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)

	output_file = "temp_output.wav"
	scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)

	return summary, output_file
	except Exception as e:
	logging.error(f"Error in summary generation or TTS conversion: {e}")
	return "Error in summary or speech generation", None

	iface = gr.Interface(
	fn=process_text,
	inputs=gr.components.File(label="Upload PDF"),
	outputs=["text", "audio"],
	title="Summarization and Text-to-Speech",
	description="Upload a PDF to extract, summarize its abstract, and convert to speech."
	)

	if __name__ == "__main__":
	iface.launch()