Spaces:
Running
Running
File size: 4,198 Bytes
85ebbdc 3abff09 8ce4bd9 cc7fa79 8ce4bd9 85ebbdc 3abff09 9c73e01 8ce4bd9 9c73e01 8ce4bd9 0cdfeaa 3abff09 0cdfeaa b178a19 7e93398 7e5f9d1 b178a19 7e5f9d1 674f46b 3abff09 b178a19 3abff09 badf9d9 3abff09 1842832 b178a19 1842832 c7b889d 1842832 3abff09 8ce4bd9 b178a19 78a8026 b178a19 78a8026 3abff09 8ce4bd9 3abff09 8ce4bd9 78a8026 3abff09 8ce4bd9 69486d7 8ce4bd9 69486d7 f0137b9 8ce4bd9 4605b3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# https://huggingface.co/spaces/amendolajine/OPIT
# Here are the imports
import logging
import gradio as gr
import fitz # PyMuPDF
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
import scipy.io.wavfile
import numpy as np
# Here is the code
# Initialize logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize tokenizers and models
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
synthesiser = pipeline("text-to-speech", "suno/bark")
def extract_abstract(pdf_bytes):
try:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
first_page = doc[0].get_text()
start_idx = first_page.lower().find("abstract")
end_idx = first_page.lower().find("introduction")
if start_idx != -1 and end_idx != -1:
return first_page[start_idx:end_idx].strip()
else:
return "Abstract not found or 'Introduction' not found in the first page."
except Exception as e:
logging.error(f"Error extracting abstract: {e}")
return "Error in abstract extraction"
def process_text(uploaded_file):
logging.debug(f"Uploaded file type: {type(uploaded_file)}")
logging.debug(f"Uploaded file content: {uploaded_file}")
try:
with open(uploaded_file, "rb") as file:
pdf_bytes = file.read()
except Exception as e:
logging.error(f"Error reading file from path: {e}")
return "Error reading PDF file", None
try:
abstract_text = extract_abstract(pdf_bytes)
logging.info(f"Extracted abstract: {abstract_text[:200]}...")
except Exception as e:
logging.error(f"Error in abstract extraction: {e}")
return "Error in processing PDF", None
try:
inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True, padding="max_length")
summary_ids = model.generate(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
pad_token_id=model.config.pad_token_id,
num_beams=4,
max_length=45,
min_length=10,
length_penalty=2.0,
early_stopping=True,
no_repeat_ngram_size=2
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
words = summary.split()
cleaned_summary = []
for i, word in enumerate(words):
if '-' in word and i < len(words) - 1:
word = word.replace('-', '') + words[i + 1]
words[i + 1] = ""
if '.' in word and i != len(words) - 1:
word = word.replace('.', '')
cleaned_summary.append(word + ' and')
else:
cleaned_summary.append(word)
final_summary = ' '.join(cleaned_summary)
final_summary = final_summary[0].upper() + final_summary[1:]
final_summary = ' '.join(w[0].lower() + w[1:] if w.lower() != 'and' else w for w in final_summary.split())
speech = synthesiser(final_summary, forward_params={"do_sample": True})
audio_data = speech["audio"].squeeze()
normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
output_file = "temp_output.wav"
scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)
return final_summary, output_file
except Exception as e:
logging.error(f"Error in summary generation or TTS conversion: {e}")
return "Error in summary or speech generation", None
iface = gr.Interface(
fn=process_text,
inputs=gr.components.File(label="Upload a research PDF containing an abstract"),
outputs=["text", "audio"],
title="Summarize an abstract and vocalize it",
description="Upload a research paper in PDF format to extract, summarize its abstract, and convert the summarization to speech. If the upload doesn't work on the first try, refresh the page (CTRL+F5) and try again."
)
if __name__ == "__main__":
iface.launch() |