Spaces:
Runtime error
Runtime error
Add application file
Browse files
app.py
CHANGED
@@ -147,45 +147,35 @@ def extract_abstract(text_per_pagy):
|
|
147 |
return abstract_text
|
148 |
|
149 |
|
150 |
-
def main_function(
|
151 |
-
#
|
152 |
-
if
|
153 |
return "No file loaded", None
|
154 |
|
155 |
-
#
|
156 |
-
|
157 |
-
temp_pdf.write(uploaded_file.read())
|
158 |
-
temp_pdf_path = temp_pdf.name
|
159 |
|
160 |
-
#
|
161 |
-
|
162 |
-
|
|
|
|
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
text_per_pagy[key] = cleaned_text
|
168 |
-
abstract_text = extract_abstract(text_per_pagy)
|
169 |
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
173 |
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
178 |
-
speech = synthesiser(summary, forward_params={"speaker_embeddings": speaker_embedding})
|
179 |
|
180 |
-
|
181 |
-
audio_file_path = "summary.wav"
|
182 |
-
sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
|
183 |
-
|
184 |
-
finally:
|
185 |
-
# Elimina il file temporaneo
|
186 |
-
os.remove(temp_pdf_path)
|
187 |
-
|
188 |
-
# Restituisci testo e audio
|
189 |
return summary, audio_file_path
|
190 |
|
191 |
|
|
|
147 |
return abstract_text
|
148 |
|
149 |
|
150 |
+
def main_function(uploaded_filepath):
|
151 |
+
#a control to see if there is a file uploaded
|
152 |
+
if uploaded_filepath is None:
|
153 |
return "No file loaded", None
|
154 |
|
155 |
+
#read and process the file
|
156 |
+
text_per_pagy = read_pdf(uploaded_filepath)
|
|
|
|
|
157 |
|
158 |
+
#cleaning the text and getting the abstract
|
159 |
+
for key, value in text_per_pagy.items():
|
160 |
+
cleaned_text = clean_text(' '.join(value[0]))
|
161 |
+
text_per_pagy[key] = cleaned_text
|
162 |
+
abstract_text = extract_abstract(text_per_pagy)
|
163 |
|
164 |
+
#abstract summary
|
165 |
+
summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify-elife")
|
166 |
+
summary = summarizer(abstract_text, max_length=50, min_length=30, do_sample=False)[0]['summary_text']
|
|
|
|
|
167 |
|
168 |
+
#generating the audio from the text, with my pipeline and model
|
169 |
+
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
170 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
171 |
+
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
172 |
+
speech = synthesiser(summary, forward_params={"speaker_embeddings": speaker_embedding})
|
173 |
|
174 |
+
#saving the audio in a temp file
|
175 |
+
audio_file_path = "summary.wav"
|
176 |
+
sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
|
|
|
|
|
177 |
|
178 |
+
#the function returns the 2 pieces we need
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
return summary, audio_file_path
|
180 |
|
181 |
|