File size: 1,660 Bytes
819bacd
 
 
 
 
 
fe7c659
 
819bacd
 
 
 
714be4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819bacd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import io
import os

from fastapi import UploadFile
import pdfplumber

from app.db_local_storage.files_db import TEXT_FILES_DIRECTORY
from app.db_local_storage.documents_db import documents_text


class ExtractTextFeature:

    # @staticmethod
    # def ensure_directory_exists(folder_path: str) -> None:
    #     """Ensure that the directory exists."""
    #     if not os.path.exists(folder_path):
    #         os.makedirs(folder_path)

    # @staticmethod
    # async def saveFile(content, filename, directory: str) -> str:
    #     """Save the uploaded file to the specified directory."""
    #     file_path = os.path.join(directory, filename)
    #     with open(file_path, "w") as file:
    #         file.write(content)
    #     return file_path

    # @staticmethod
    # async def save_text_from_pdf(file: UploadFile, text) -> str:

    #     ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY)
    #     await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY)

    #     return text

    @staticmethod
    async def extract_text_from_pdf(file: UploadFile) -> str:

        content = await file.read()
        with pdfplumber.open(io.BytesIO(content)) as pdf:
            text = ""
            for page in pdf.pages:
                text += page.extract_text()

        for document in documents_text:
            if document["filename"] == file.filename:
                return {"message": "Document already exists"}

        data = {
            "id": len(documents_text) + 1,
            "filename": file.filename,
            "text": text,
        }

        documents_text.append(data)

        return text