Spaces:
Sleeping
Sleeping
File size: 3,520 Bytes
3abff09 8ce4bd9 cc7fa79 8ce4bd9 3abff09 9c73e01 8ce4bd9 9c73e01 8ce4bd9 0cdfeaa 3abff09 0cdfeaa 7e5f9d1 7e93398 7e5f9d1 7e93398 7e5f9d1 674f46b 3abff09 badf9d9 3abff09 1842832 3abff09 8ce4bd9 1842832 3abff09 8ce4bd9 1842832 3abff09 8ce4bd9 3abff09 8ce4bd9 b303ff0 8ce4bd9 9c73e01 8ce4bd9 81398e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import logging
import gradio as gr
import fitz # PyMuPDF
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
import scipy.io.wavfile
import numpy as np
# Initialize logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize tokenizers and models
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
synthesiser = pipeline("text-to-speech", "suno/bark")
def extract_abstract(pdf_bytes):
try:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
first_page = doc[0].get_text()
start_idx = first_page.lower().find("abstract")
end_idx = first_page.lower().find("introduction")
if start_idx != -1 and end_idx != -1:
return first_page[start_idx:end_idx].strip()
else:
return "Abstract not found or 'Introduction' not found in the first page."
except Exception as e:
logging.error(f"Error extracting abstract: {e}")
return "Error in abstract extraction"
def process_text(uploaded_file):
# Debugging: Print the type and path of the uploaded_file
print(f"Uploaded file type: {type(uploaded_file)}")
print(f"Uploaded file path: {uploaded_file}")
# Read PDF file from the path
try:
with open(uploaded_file, "rb") as file:
pdf_bytes = file.read()
except Exception as e:
print(f"Error reading file from path: {e}")
return "Error reading PDF file", None
try:
abstract_text = extract_abstract(pdf_bytes)
logging.info(f"Extracted abstract: {abstract_text[:100]}...") # Log first 100 chars of abstract
except Exception as e:
logging.error(f"Error in abstract extraction: {e}")
return "Error in processing PDF", None
try:
# Prepare inputs for the model
inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True, padding="max_length")
# Generate summary
summary_ids = model.generate(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'], # Include attention mask
pad_token_id=model.config.pad_token_id, # Include pad token id
num_beams=4,
max_length=40,
min_length=10,
length_penalty=2.0,
early_stopping=True,
no_repeat_ngram_size=2
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Convert summary to speech
speech = synthesiser(summary, forward_params={"do_sample": True})
audio_data = speech["audio"].squeeze()
normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
# Save audio to temporary file
output_file = "temp_output.wav"
scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)
return summary, output_file
except Exception as e:
logging.error(f"Error in summary generation or TTS conversion: {e}")
return "Error in summary or speech generation", None
iface = gr.Interface(
fn=process_text,
inputs=gr.components.File(label="Upload PDF"),
outputs=["text", "audio"],
title="Summarization and Text-to-Speech",
description="Upload a PDF to extract, summarize its abstract, and convert to speech."
)
if __name__ == "__main__":
iface.launch() |