Arafath10 commited on
Commit
9c62372
1 Parent(s): 41655a4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +23 -23
main.py CHANGED
@@ -1,8 +1,3 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from scraper import Scraper
4
-
5
-
6
  try: from pip._internal.operations import freeze
7
  except ImportError: # pip < 10.0
8
  from pip.operations import freeze
@@ -10,7 +5,12 @@ except ImportError: # pip < 10.0
10
  pkgs = freeze.freeze()
11
  for pkg in pkgs: print(pkg)
12
 
 
 
 
 
13
  app = FastAPI()
 
14
  app.add_middleware(
15
  CORSMiddleware,
16
  allow_origins=["*"],
@@ -18,22 +18,22 @@ app.add_middleware(
18
  allow_methods=["*"],
19
  allow_headers=["*"],
20
  )
21
- import time
22
-
23
- @app.get("/get_scraped_data")
24
- async def get_data(url: str):
25
- # Start time
26
- start_time = time.time()
27
- #try:
28
- data = await Scraper.scrape(url)
29
- # End time
30
- end_time = time.time()
31
-
32
- # Calculate the elapsed time
33
- elapsed_time = end_time - start_time
34
-
35
- print(f"Time taken for the process: {elapsed_time:.2f} seconds")
36
- return data
37
- #except Exception as e:
38
- #raise HTTPException(status_code=500, detail=str(e))
39
 
 
 
 
 
 
 
1
  try: from pip._internal.operations import freeze
2
  except ImportError: # pip < 10.0
3
  from pip.operations import freeze
 
5
  pkgs = freeze.freeze()
6
  for pkg in pkgs: print(pkg)
7
 
8
+ from fastapi import FastAPI, HTTPException, File, UploadFile
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from PyPDF2 import PdfReader
11
+
12
  app = FastAPI()
13
+
14
  app.add_middleware(
15
  CORSMiddleware,
16
  allow_origins=["*"],
 
18
  allow_methods=["*"],
19
  allow_headers=["*"],
20
  )
21
+
22
+ @app.post("/get_ocr_data/")
23
+ async def get_data(pdf: UploadFile = File(...)):
24
+ try:
25
+ # Read PDF file using PyPDF2
26
+ pdf_reader = PdfReader(pdf.file)
27
+ text = ""
28
+
29
+ # Extract text from each page
30
+ for page in pdf_reader.pages:
31
+ text += page.extract_text()
32
+
33
+ # Return extracted text
34
+ return {"text": text.strip()}
35
+
36
+ except Exception as e:
37
+ raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
38
+
39