Spaces:
Sleeping
Sleeping
File size: 2,305 Bytes
ceaad05 24ccd6f ceaad05 a5d7ba2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
import torch
from transformers import pipeline
import PyPDF2
from docx import Document
import io
app = FastAPI(
title="Text Summarization API",
description="API for summarizing text and documents using Falcon's text summarization model"
)
# Configure CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize the summarization pipeline
device = "cuda" if torch.cuda.is_available() else "cpu"
summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device )
def extract_text_from_pdf(file_bytes):
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(file_bytes):
doc = Document(io.BytesIO(file_bytes))
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
return text
@app.post("/summarize/text")
async def summarize_text(text: str = Form(...)):
"""
Summarize text input
"""
if not text:
return {"error": "Please provide text to summarize"}
summary = summarization_pipe(text)
return {"summary": summary[0]['summary_text']}
@app.post("/summarize/file")
async def summarize_file(file: UploadFile = File(...)):
"""
Summarize text from a PDF or DOCX file
"""
contents = await file.read()
file_name = file.filename.lower()
try:
if file_name.endswith('.pdf'):
text = extract_text_from_pdf(contents)
elif file_name.endswith('.docx'):
text = extract_text_from_docx(contents)
else:
return {"error": "Unsupported file format. Please upload a PDF or DOCX file."}
if not text:
return {"error": "Could not extract text from the file"}
summary = summarization_pipe(text)
return {"summary": summary[0]['summary_text']}
except Exception as e:
return {"error": f"Error processing file: {str(e)}"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |