eevaw commited on
Commit
f1569c8
·
verified ·
1 Parent(s): c06752d

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -75
app.py DELETED
@@ -1,75 +0,0 @@
1
- pip install gradio PyMuPDF
2
-
3
- import gradio as gr
4
- from transformers import T5Tokenizer, MT5ForConditionalGeneration
5
- import fitz # PyMuPDF
6
-
7
- # Load the fine-tuned tokenizer and model
8
- model_name = "fine-tuned-mt5"
9
- new_tokenizer = T5Tokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=True)
10
- new_model = MT5ForConditionalGeneration.from_pretrained(model_name)
11
-
12
- # Function to extract text from PDF using PyMuPDF
13
- def extract_text_from_pdf(pdf_file):
14
- text = ""
15
- # Open the PDF file
16
- with fitz.open(pdf_file) as doc:
17
- for page in doc:
18
- text += page.get_text() # Extract text from each page
19
- return text
20
-
21
- # Summarization function
22
- def summarize_pdf(pdf_file, max_summary_length):
23
- # Extract text from the PDF
24
- input_text = extract_text_from_pdf(pdf_file)
25
-
26
- # Tokenize the input to check length
27
- tokenized_input = new_tokenizer.encode(input_text, return_tensors='pt')
28
-
29
-
30
-
31
- try:
32
- # Generate the summary
33
- summary_ids = new_model.generate(
34
- tokenized_input,
35
- max_length=max_summary_length,
36
- min_length=30,
37
- num_beams=15,
38
- repetition_penalty=5.0,
39
- no_repeat_ngram_size=2
40
- )
41
-
42
- # Decode the generated summary
43
- summary = new_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
44
-
45
- # Clean up the summary to remove unwanted tokens
46
- cleaned_summary = ' '.join([token for token in summary.split() if not token.startswith('<extra_id_')]).strip()
47
-
48
- # Ensure the summary ends with a complete sentence
49
- if cleaned_summary:
50
- last_period_index = cleaned_summary.rfind('.')
51
- if last_period_index != -1 and last_period_index < len(cleaned_summary) - 1:
52
- cleaned_summary = cleaned_summary[:last_period_index + 1]
53
- else:
54
- cleaned_summary = cleaned_summary.strip()
55
-
56
- return cleaned_summary if cleaned_summary else "No valid summary generated."
57
-
58
- except Exception as e:
59
- return str(e) # Return the error message for debugging
60
-
61
- # Define the Gradio interface
62
- interface = gr.Interface(
63
- fn=summarize_pdf,
64
- inputs=[
65
- gr.File(label="Upload PDF"),
66
- gr.Slider(50, 300, step=10, label="Max summary length")
67
- ],
68
- outputs="textbox", # A textbox for the output summary
69
- title="PDF Text Summarizer",
70
- description="Upload a PDF file to summarize its content."
71
- )
72
-
73
- # Launch the interface
74
- # Launch the interface with debug mode enabled
75
- interface.launch(debug=True)