File size: 3,520 Bytes
3abff09
8ce4bd9
cc7fa79
8ce4bd9
 
 
 
3abff09
 
 
9c73e01
8ce4bd9
 
9c73e01
8ce4bd9
0cdfeaa
3abff09
 
 
 
 
 
 
 
 
 
 
 
 
0cdfeaa
7e5f9d1
7e93398
7e5f9d1
7e93398
7e5f9d1
 
 
 
 
 
 
674f46b
3abff09
 
 
 
 
 
badf9d9
3abff09
1842832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3abff09
8ce4bd9
1842832
3abff09
 
 
8ce4bd9
1842832
3abff09
 
8ce4bd9
3abff09
 
 
 
8ce4bd9
 
 
b303ff0
8ce4bd9
 
9c73e01
8ce4bd9
 
 
81398e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import logging
import gradio as gr
import fitz  # PyMuPDF
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
import scipy.io.wavfile
import numpy as np

# Initialize logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize tokenizers and models
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
synthesiser = pipeline("text-to-speech", "suno/bark")

def extract_abstract(pdf_bytes):
    try:
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        first_page = doc[0].get_text()
        start_idx = first_page.lower().find("abstract")
        end_idx = first_page.lower().find("introduction")
        if start_idx != -1 and end_idx != -1:
            return first_page[start_idx:end_idx].strip()
        else:
            return "Abstract not found or 'Introduction' not found in the first page."
    except Exception as e:
        logging.error(f"Error extracting abstract: {e}")
        return "Error in abstract extraction"

def process_text(uploaded_file):
    # Debugging: Print the type and path of the uploaded_file
    print(f"Uploaded file type: {type(uploaded_file)}")
    print(f"Uploaded file path: {uploaded_file}")

    # Read PDF file from the path
    try:
        with open(uploaded_file, "rb") as file:
            pdf_bytes = file.read()
    except Exception as e:
        print(f"Error reading file from path: {e}")
        return "Error reading PDF file", None

    try:
        abstract_text = extract_abstract(pdf_bytes)
        logging.info(f"Extracted abstract: {abstract_text[:100]}...")  # Log first 100 chars of abstract
    except Exception as e:
        logging.error(f"Error in abstract extraction: {e}")
        return "Error in processing PDF", None

    try:
        # Prepare inputs for the model
        inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True, padding="max_length")

        # Generate summary
        summary_ids = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],  # Include attention mask
            pad_token_id=model.config.pad_token_id,  # Include pad token id
            num_beams=4,
            max_length=40,
            min_length=10,
            length_penalty=2.0,
            early_stopping=True,
            no_repeat_ngram_size=2
        )
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        # Convert summary to speech
        speech = synthesiser(summary, forward_params={"do_sample": True})
        audio_data = speech["audio"].squeeze()
        normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)

        # Save audio to temporary file
        output_file = "temp_output.wav"
        scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)

        return summary, output_file
    except Exception as e:
        logging.error(f"Error in summary generation or TTS conversion: {e}")
        return "Error in summary or speech generation", None

iface = gr.Interface(
    fn=process_text,
    inputs=gr.components.File(label="Upload PDF"),
    outputs=["text", "audio"],
    title="Summarization and Text-to-Speech",
    description="Upload a PDF to extract, summarize its abstract, and convert to speech."
)

if __name__ == "__main__":
    iface.launch()