Spaces:
Sleeping
Sleeping
File size: 1,660 Bytes
819bacd fe7c659 819bacd 714be4e 819bacd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import io
import os
from fastapi import UploadFile
import pdfplumber
from app.db_local_storage.files_db import TEXT_FILES_DIRECTORY
from app.db_local_storage.documents_db import documents_text
class ExtractTextFeature:
# @staticmethod
# def ensure_directory_exists(folder_path: str) -> None:
# """Ensure that the directory exists."""
# if not os.path.exists(folder_path):
# os.makedirs(folder_path)
# @staticmethod
# async def saveFile(content, filename, directory: str) -> str:
# """Save the uploaded file to the specified directory."""
# file_path = os.path.join(directory, filename)
# with open(file_path, "w") as file:
# file.write(content)
# return file_path
# @staticmethod
# async def save_text_from_pdf(file: UploadFile, text) -> str:
# ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY)
# await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY)
# return text
@staticmethod
async def extract_text_from_pdf(file: UploadFile) -> str:
content = await file.read()
with pdfplumber.open(io.BytesIO(content)) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
for document in documents_text:
if document["filename"] == file.filename:
return {"message": "Document already exists"}
data = {
"id": len(documents_text) + 1,
"filename": file.filename,
"text": text,
}
documents_text.append(data)
return text
|