manasvinid commited on
Commit
c7bbd93
·
verified ·
1 Parent(s): b5bf8cc

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +46 -0
functions.py CHANGED
@@ -16,6 +16,8 @@ from sentence_transformers import SentenceTransformer
16
  from qdrant_client import QdrantClient
17
  from qdrant_client.http.models import VectorParams, Distance, Record, Filter
18
  from random import uniform
 
 
19
 
20
 
21
  def setup_nltk_resources():
@@ -383,4 +385,48 @@ class QdrantInterface:
383
 
384
 
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
 
16
  from qdrant_client import QdrantClient
17
  from qdrant_client.http.models import VectorParams, Distance, Record, Filter
18
  from random import uniform
19
+ import PyPDF2
20
+
21
 
22
 
23
  def setup_nltk_resources():
 
385
 
386
 
387
 
388
+ def main():
389
+ st.title("PDF to CSV Converter")
390
+
391
+ # File uploader widget
392
+ uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
393
+
394
+ if uploaded_file is not None:
395
+ # Read PDF file
396
+ pdf_reader = PyPDF2.PdfFileReader(uploaded_file)
397
+ num_pages = pdf_reader.numPages
398
+
399
+ # Extract text from each page
400
+ text = ""
401
+ for page_num in range(num_pages):
402
+ page = pdf_reader.getPage(page_num)
403
+ text += page.extractText()
404
+
405
+ # Convert text to CSV
406
+ csv_data = convert_to_csv(text)
407
+
408
+ # Display or download CSV
409
+ st.subheader("Converted CSV Data")
410
+ st.write(csv_data)
411
+
412
+ # Download link for CSV file
413
+ st.download_button(
414
+ label="Download CSV",
415
+ data=csv_data,
416
+ file_name="converted_data.csv",
417
+ mime="text/csv"
418
+ )
419
+
420
+ def convert_to_csv(text):
421
+ # Split text into lines and create a DataFrame
422
+ lines = text.split("\n")
423
+ df = pd.DataFrame(lines, columns=["Text"])
424
+
425
+ # Convert DataFrame to CSV format
426
+ csv_data = df.to_csv(index=False)
427
+
428
+ return csv_data
429
+
430
+
431
+
432