YchKhan commited on
Commit
01c9f58
·
verified ·
1 Parent(s): 00667d4

remove \n and replace them by one space in web extraction

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -182,7 +182,7 @@ def analyze_pdf_novelty(patent_background, pdf_url):
182
  return {"error": "PDF has no pages"}
183
 
184
  first_page = pdf_document.load_page(0)
185
- text = first_page.get_text()
186
 
187
  # Return the extracted text for frontend analysis with OpenAI
188
  # We're not doing the analysis here as it will be done in the frontend
 
182
  return {"error": "PDF has no pages"}
183
 
184
  first_page = pdf_document.load_page(0)
185
+ text = re.sub(r'\n+', ' ', first_page.get_text())
186
 
187
  # Return the extracted text for frontend analysis with OpenAI
188
  # We're not doing the analysis here as it will be done in the frontend