ajibs75 commited on
Commit
ceaad05
·
verified ·
1 Parent(s): 6a0f4aa

Create api.py

Browse files
Files changed (1) hide show
  1. api.py +79 -0
api.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import torch
4
+ from transformers import pipeline
5
+ import PyPDF2
6
+ from docx import Document
7
+ import io
8
+
9
+ app = FastAPI(
10
+ title="Text Summarization API",
11
+ description="API for summarizing text and documents using Falcon's text summarization model"
12
+ )
13
+
14
+ # Configure CORS
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ # Initialize the summarization pipeline
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
+ summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device)
26
+
27
+ def extract_text_from_pdf(file_bytes):
28
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
29
+ text = ""
30
+ for page in pdf_reader.pages:
31
+ text += page.extract_text()
32
+ return text
33
+
34
+ def extract_text_from_docx(file_bytes):
35
+ doc = Document(io.BytesIO(file_bytes))
36
+ text = ""
37
+ for paragraph in doc.paragraphs:
38
+ text += paragraph.text + "\n"
39
+ return text
40
+
41
+ @app.post("/summarize/text")
42
+ async def summarize_text(text: str = Form(...)):
43
+ """
44
+ Summarize text input
45
+ """
46
+ if not text:
47
+ return {"error": "Please provide text to summarize"}
48
+
49
+ summary = summarization_pipe(text)
50
+ return {"summary": summary[0]['summary_text']}
51
+
52
+ @app.post("/summarize/file")
53
+ async def summarize_file(file: UploadFile = File(...)):
54
+ """
55
+ Summarize text from a PDF or DOCX file
56
+ """
57
+ contents = await file.read()
58
+ file_name = file.filename.lower()
59
+
60
+ try:
61
+ if file_name.endswith('.pdf'):
62
+ text = extract_text_from_pdf(contents)
63
+ elif file_name.endswith('.docx'):
64
+ text = extract_text_from_docx(contents)
65
+ else:
66
+ return {"error": "Unsupported file format. Please upload a PDF or DOCX file."}
67
+
68
+ if not text:
69
+ return {"error": "Could not extract text from the file"}
70
+
71
+ summary = summarization_pipe(text)
72
+ return {"summary": summary[0]['summary_text']}
73
+
74
+ except Exception as e:
75
+ return {"error": f"Error processing file: {str(e)}"}
76
+
77
+ if __name__ == "__main__":
78
+ import uvicorn
79
+ uvicorn.run(app, host="0.0.0.0", port=8000)