Alioth86 commited on
Commit
13ddd14
·
1 Parent(s): da55be4

Add application file

Browse files
Files changed (1) hide show
  1. app.py +22 -32
app.py CHANGED
@@ -147,45 +147,35 @@ def extract_abstract(text_per_pagy):
147
  return abstract_text
148
 
149
 
150
- def main_function(uploaded_file):
151
- # Controlla se un file è stato effettivamente caricato
152
- if uploaded_file is None:
153
  return "No file loaded", None
154
 
155
- # Crea un file temporaneo per salvare il PDF caricato
156
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
157
- temp_pdf.write(uploaded_file.read())
158
- temp_pdf_path = temp_pdf.name
159
 
160
- # Utilizza il percorso del file temporaneo per leggere e processare il PDF
161
- try:
162
- text_per_pagy = read_pdf(temp_pdf_path)
 
 
163
 
164
- # Pulisci il testo e estrai l'abstract
165
- for key, value in text_per_pagy.items():
166
- cleaned_text = clean_text(' '.join(value[0]))
167
- text_per_pagy[key] = cleaned_text
168
- abstract_text = extract_abstract(text_per_pagy)
169
 
170
- # Riassumi l'abstract
171
- summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify-elife")
172
- summary = summarizer(abstract_text, max_length=50, min_length=30, do_sample=False)[0]['summary_text']
 
 
173
 
174
- # Genera l'audio dal riassunto
175
- synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
176
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
177
- speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
178
- speech = synthesiser(summary, forward_params={"speaker_embeddings": speaker_embedding})
179
 
180
- # Salva l'audio in un file temporaneo
181
- audio_file_path = "summary.wav"
182
- sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
183
-
184
- finally:
185
- # Elimina il file temporaneo
186
- os.remove(temp_pdf_path)
187
-
188
- # Restituisci testo e audio
189
  return summary, audio_file_path
190
 
191
 
 
147
  return abstract_text
148
 
149
 
150
+ def main_function(uploaded_filepath):
151
+ #a control to see if there is a file uploaded
152
+ if uploaded_filepath is None:
153
  return "No file loaded", None
154
 
155
+ #read and process the file
156
+ text_per_pagy = read_pdf(uploaded_filepath)
 
 
157
 
158
+ #cleaning the text and getting the abstract
159
+ for key, value in text_per_pagy.items():
160
+ cleaned_text = clean_text(' '.join(value[0]))
161
+ text_per_pagy[key] = cleaned_text
162
+ abstract_text = extract_abstract(text_per_pagy)
163
 
164
+ #abstract summary
165
+ summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify-elife")
166
+ summary = summarizer(abstract_text, max_length=50, min_length=30, do_sample=False)[0]['summary_text']
 
 
167
 
168
+ #generating the audio from the text, with my pipeline and model
169
+ synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
170
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
171
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
172
+ speech = synthesiser(summary, forward_params={"speaker_embeddings": speaker_embedding})
173
 
174
+ #saving the audio in a temp file
175
+ audio_file_path = "summary.wav"
176
+ sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
 
 
177
 
178
+ #the function returns the 2 pieces we need
 
 
 
 
 
 
 
 
179
  return summary, audio_file_path
180
 
181