Spaces:
Sleeping
Sleeping
remove \n and replace them by one space in web extraction
Browse files
app.py
CHANGED
@@ -182,7 +182,7 @@ def analyze_pdf_novelty(patent_background, pdf_url):
|
|
182 |
return {"error": "PDF has no pages"}
|
183 |
|
184 |
first_page = pdf_document.load_page(0)
|
185 |
-
text = first_page.get_text()
|
186 |
|
187 |
# Return the extracted text for frontend analysis with OpenAI
|
188 |
# We're not doing the analysis here as it will be done in the frontend
|
|
|
182 |
return {"error": "PDF has no pages"}
|
183 |
|
184 |
first_page = pdf_document.load_page(0)
|
185 |
+
text = re.sub(r'\n+', ' ', first_page.get_text())
|
186 |
|
187 |
# Return the extracted text for frontend analysis with OpenAI
|
188 |
# We're not doing the analysis here as it will be done in the frontend
|