File size: 1,030 Bytes
5c0bf38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import PyPDF2
from PyPDF2 import PdfReader
import docx
import os
import logging
from textwrap import dedent
import gradio as gr

def extract_text_from_file(file):
    if file is None:
        return "No file uploaded!"
    
    # Determine file type
    file_type = file.name.split('.')[-1].lower()
    
    text = ""
    try:
        if file_type == "pdf":
            # Extract text from PDF
            reader = PdfReader(file)
            for page in reader.pages:
                text += page.extract_text()
        elif file_type == "docx":
            # Extract text from DOCX
            doc = docx.Document(file)
            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"
        elif file_type == "txt":
            # Extract text from TXT
            text = file.read().decode("utf-8")
        else:
            return "Unsupported file type! Please upload a PDF, DOCX, or TXT file."
    except Exception as e:
        return f"Error reading file: {str(e)}"
    
    return text.strip()