File size: 2,305 Bytes
ceaad05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24ccd6f
ceaad05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5d7ba2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
import torch
from transformers import pipeline
import PyPDF2
from docx import Document
import io

app = FastAPI(
    title="Text Summarization API",
    description="API for summarizing text and documents using Falcon's text summarization model"
)

# Configure CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize the summarization pipeline
device = "cuda" if torch.cuda.is_available() else "cpu"
summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device )

def extract_text_from_pdf(file_bytes):
    pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def extract_text_from_docx(file_bytes):
    doc = Document(io.BytesIO(file_bytes))
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + "\n"
    return text

@app.post("/summarize/text")
async def summarize_text(text: str = Form(...)):
    """
    Summarize text input
    """
    if not text:
        return {"error": "Please provide text to summarize"}
    
    summary = summarization_pipe(text)
    return {"summary": summary[0]['summary_text']}

@app.post("/summarize/file")
async def summarize_file(file: UploadFile = File(...)):
    """
    Summarize text from a PDF or DOCX file
    """
    contents = await file.read()
    file_name = file.filename.lower()
    
    try:
        if file_name.endswith('.pdf'):
            text = extract_text_from_pdf(contents)
        elif file_name.endswith('.docx'):
            text = extract_text_from_docx(contents)
        else:
            return {"error": "Unsupported file format. Please upload a PDF or DOCX file."}
        
        if not text:
            return {"error": "Could not extract text from the file"}
        
        summary = summarization_pipe(text)
        return {"summary": summary[0]['summary_text']}
    
    except Exception as e:
        return {"error": f"Error processing file: {str(e)}"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)