e83418445 commited on
Commit
1b10823
·
1 Parent(s): 1e948a0

Agregar estructura de proyecto

Browse files
Files changed (7) hide show
  1. Dockerfile +28 -0
  2. README.md +0 -12
  3. app.py +28 -0
  4. frontend/Index.svelte +77 -0
  5. frontend/package.json +15 -0
  6. frontend/vite.config.js +10 -0
  7. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.9-slim
3
+
4
+ RUN apt-get update && apt-get install -y \
5
+ ghostscript \
6
+ tesseract-ocr \
7
+ poppler-utils \
8
+ libxml2 \
9
+ unpaper \
10
+ nodejs \
11
+ npm \
12
+ && apt-get clean
13
+
14
+ WORKDIR /app
15
+
16
+ COPY requirements.txt requirements.txt
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ COPY frontend/ ./frontend/
20
+ WORKDIR /app/frontend
21
+ RUN npm install && npm run build
22
+
23
+ WORKDIR /app
24
+ COPY . .
25
+
26
+ EXPOSE 7860
27
+
28
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +0,0 @@
1
- ---
2
- title: PDF OCR
3
- emoji: 📉
4
- colorFrom: pink
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.7.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import os
4
+ from PyPDF2 import PdfReader
5
+ import tempfile
6
+
7
+ def extract_text_from_pdf(pdf_path):
8
+ reader = PdfReader(pdf_path)
9
+ text = "".join([page.extract_text() for page in reader.pages])
10
+ return text.strip()
11
+
12
+ def process_pdf(file):
13
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
14
+ temp_pdf.write(file.read())
15
+ temp_pdf_path = temp_pdf.name
16
+
17
+ text = extract_text_from_pdf(temp_pdf_path)
18
+ return text
19
+
20
+ with gr.Blocks() as demo:
21
+ gr.Markdown("# Procesador de PDFs en Hugging Face Space")
22
+ pdf_file = gr.File(label="Carga tu PDF", file_types=[".pdf"])
23
+ text_output = gr.Textbox(label="Texto Extraído", lines=10)
24
+ process_button = gr.Button("Procesar PDF")
25
+
26
+ process_button.click(process_pdf, inputs=[pdf_file], outputs=[text_output])
27
+
28
+ demo.launch()
frontend/Index.svelte ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <script lang="ts">
3
+ import pdfjsLib from "pdfjs-dist";
4
+ pdfjsLib.GlobalWorkerOptions.workerSrc = "https://cdn.jsdelivr.net/npm/pdfjs-dist/build/pdf.worker.min.js";
5
+
6
+ let pdfDoc;
7
+ let numPages = 1;
8
+ let currentPage = 1;
9
+ let canvasRef;
10
+ export let value;
11
+
12
+ async function loadPDF(value) {
13
+ const loadingTask = pdfjsLib.getDocument(value.url);
14
+ pdfDoc = await loadingTask.promise;
15
+ numPages = pdfDoc.numPages;
16
+ currentPage = 1;
17
+ renderPage();
18
+ }
19
+
20
+ function renderPage() {
21
+ pdfDoc.getPage(currentPage).then((page) => {
22
+ const viewport = page.getViewport({ scale: 1.5 });
23
+ const canvasContext = canvasRef.getContext("2d");
24
+ canvasRef.width = viewport.width;
25
+ canvasRef.height = viewport.height;
26
+
27
+ page.render({
28
+ canvasContext,
29
+ viewport,
30
+ });
31
+ });
32
+ }
33
+
34
+ function nextPage() {
35
+ if (currentPage < numPages) {
36
+ currentPage++;
37
+ renderPage();
38
+ }
39
+ }
40
+
41
+ function prevPage() {
42
+ if (currentPage > 1) {
43
+ currentPage--;
44
+ renderPage();
45
+ }
46
+ }
47
+
48
+ $: if (value) {
49
+ loadPDF(value);
50
+ }
51
+ </script>
52
+
53
+ <div>
54
+ <div class="pdf-viewer">
55
+ <canvas bind:this={canvasRef}></canvas>
56
+ </div>
57
+ <div class="controls">
58
+ <button on:click={prevPage} disabled={currentPage === 1}>⬅️ Anterior</button>
59
+ <span>{currentPage} / {numPages}</span>
60
+ <button on:click={nextPage} disabled={currentPage === numPages}>Siguiente ➡️</button>
61
+ </div>
62
+ </div>
63
+
64
+ <style>
65
+ .pdf-viewer {
66
+ display: flex;
67
+ justify-content: center;
68
+ align-items: center;
69
+ height: 500px;
70
+ }
71
+ .controls {
72
+ display: flex;
73
+ justify-content: center;
74
+ gap: 10px;
75
+ margin-top: 10px;
76
+ }
77
+ </style>
frontend/package.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "frontend",
4
+ "version": "1.0.0",
5
+ "scripts": {
6
+ "dev": "vite",
7
+ "build": "vite build"
8
+ },
9
+ "dependencies": {
10
+ "pdfjs-dist": "^3.11.174"
11
+ },
12
+ "devDependencies": {
13
+ "vite": "^4.0.0"
14
+ }
15
+ }
frontend/vite.config.js ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import { defineConfig } from "vite";
3
+
4
+ export default defineConfig({
5
+ root: "./",
6
+ build: {
7
+ outDir: "../dist",
8
+ emptyOutDir: true,
9
+ },
10
+ });
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ gradio
3
+ PyPDF2