raannakasturi commited on
Commit
c7c6bac
·
verified ·
1 Parent(s): b8aa173

Update extract_text.py

Browse files
Files changed (1) hide show
  1. extract_text.py +1 -3
extract_text.py CHANGED
@@ -31,9 +31,7 @@ def extract_text_from_pdf(url, id):
31
  relevant_text = all_text[start_index:end_index]
32
  else:
33
  relevant_text = all_text
34
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
35
- text_list = text_splitter.split_text(relevant_text)
36
- research_paper_text = "".join(text_list)
37
  except Exception as e:
38
  print(f"Error processing PDF: {e}")
39
  research_paper_text = ""
 
31
  relevant_text = all_text[start_index:end_index]
32
  else:
33
  relevant_text = all_text
34
+ research_paper_text = relevant_text
 
 
35
  except Exception as e:
36
  print(f"Error processing PDF: {e}")
37
  research_paper_text = ""