Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +46 -0
functions.py
CHANGED
@@ -16,6 +16,8 @@ from sentence_transformers import SentenceTransformer
|
|
16 |
from qdrant_client import QdrantClient
|
17 |
from qdrant_client.http.models import VectorParams, Distance, Record, Filter
|
18 |
from random import uniform
|
|
|
|
|
19 |
|
20 |
|
21 |
def setup_nltk_resources():
|
@@ -383,4 +385,48 @@ class QdrantInterface:
|
|
383 |
|
384 |
|
385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
|
|
|
16 |
from qdrant_client import QdrantClient
|
17 |
from qdrant_client.http.models import VectorParams, Distance, Record, Filter
|
18 |
from random import uniform
|
19 |
+
import PyPDF2
|
20 |
+
|
21 |
|
22 |
|
23 |
def setup_nltk_resources():
|
|
|
385 |
|
386 |
|
387 |
|
388 |
+
def main():
|
389 |
+
st.title("PDF to CSV Converter")
|
390 |
+
|
391 |
+
# File uploader widget
|
392 |
+
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
|
393 |
+
|
394 |
+
if uploaded_file is not None:
|
395 |
+
# Read PDF file
|
396 |
+
pdf_reader = PyPDF2.PdfFileReader(uploaded_file)
|
397 |
+
num_pages = pdf_reader.numPages
|
398 |
+
|
399 |
+
# Extract text from each page
|
400 |
+
text = ""
|
401 |
+
for page_num in range(num_pages):
|
402 |
+
page = pdf_reader.getPage(page_num)
|
403 |
+
text += page.extractText()
|
404 |
+
|
405 |
+
# Convert text to CSV
|
406 |
+
csv_data = convert_to_csv(text)
|
407 |
+
|
408 |
+
# Display or download CSV
|
409 |
+
st.subheader("Converted CSV Data")
|
410 |
+
st.write(csv_data)
|
411 |
+
|
412 |
+
# Download link for CSV file
|
413 |
+
st.download_button(
|
414 |
+
label="Download CSV",
|
415 |
+
data=csv_data,
|
416 |
+
file_name="converted_data.csv",
|
417 |
+
mime="text/csv"
|
418 |
+
)
|
419 |
+
|
420 |
+
def convert_to_csv(text):
|
421 |
+
# Split text into lines and create a DataFrame
|
422 |
+
lines = text.split("\n")
|
423 |
+
df = pd.DataFrame(lines, columns=["Text"])
|
424 |
+
|
425 |
+
# Convert DataFrame to CSV format
|
426 |
+
csv_data = df.to_csv(index=False)
|
427 |
+
|
428 |
+
return csv_data
|
429 |
+
|
430 |
+
|
431 |
+
|
432 |
|