Spaces:

SuperCanvasAI
/

xyz

Sleeping

akshayp commited on Apr 27

Commit

eff991c

1 Parent(s): 02c8ce6

Add application file

Files changed (3) hide show

Dockerfile ADDED Viewed

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.11
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN apt update && apt install -y poppler-utils tesseract-ocr
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

+from fastapi import FastAPI, Query, HTTPException
+from extractous import Extractor, TesseractOcrConfig
+app = FastAPI()
+@app.get("/")
+def accepts_pdf_link(link: str = Query(..., description="The URL to pdf file")):
+    if not link.startswith(("http://", "https://")):
+        raise HTTPException(status_code=400, detail="Invalid URL format")
+    extractor = Extractor().set_ocr_config(TesseractOcrConfig())
+    extractor = extractor.set_xml_output(False)
+    content, metadata  = extractor.extract_url_to_string(link)
+    return {"received_link": link, "content": content}

requirements.txt ADDED Viewed

+fastapi
+uvicorn[standard]
+google-genai
+vecs
+extractous
+beautifulsoup4
+markdownify
+pdf2image
+firebase-admin