Spaces:
Running
Running
fix for catching Gradio DataFile objects when they are passed from API calls as strings
Browse files- __pycache__/asl_gloss.cpython-311.pyc +0 -0
- __pycache__/document_parsing.cpython-311.pyc +0 -0
- app.py +17 -9
- asl_gloss.py +0 -1
- document_parsing.py +55 -1
__pycache__/asl_gloss.cpython-311.pyc
CHANGED
Binary files a/__pycache__/asl_gloss.cpython-311.pyc and b/__pycache__/asl_gloss.cpython-311.pyc differ
|
|
__pycache__/document_parsing.cpython-311.pyc
CHANGED
Binary files a/__pycache__/document_parsing.cpython-311.pyc and b/__pycache__/document_parsing.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -199,7 +199,8 @@ def cleanup_temp_video(file_path):
|
|
199 |
def determine_input_type(input_data):
|
200 |
"""
|
201 |
Determine the type of input data and return a standardized format.
|
202 |
-
Returns: (input_type, processed_data) where input_type is 'text',
|
|
|
203 |
"""
|
204 |
if isinstance(input_data, str):
|
205 |
# Check if it's a file path (contains file extension)
|
@@ -209,11 +210,20 @@ def determine_input_type(input_data):
|
|
209 |
elif input_data.startswith('{') and 'gradio.FileData' in input_data:
|
210 |
try:
|
211 |
import ast
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
if isinstance(file_data, dict) and 'path' in file_data:
|
|
|
215 |
return 'file_path', file_data['path']
|
216 |
-
except (ValueError, SyntaxError):
|
|
|
|
|
217 |
pass
|
218 |
else:
|
219 |
return 'text', input_data.strip()
|
@@ -255,9 +265,7 @@ def process_input(input_data):
|
|
255 |
async def parse_vectorize_and_search_unified(input_data):
|
256 |
"""
|
257 |
Unified function that handles both text and file inputs
|
258 |
-
"""
|
259 |
-
print(f"Input type: {type(input_data)}")
|
260 |
-
|
261 |
# Process the input to get gloss
|
262 |
gloss = process_input(input_data)
|
263 |
if not gloss:
|
@@ -356,7 +364,6 @@ def predict_unified(input_data):
|
|
356 |
"message": "Please provide text or upload a document"
|
357 |
}, None
|
358 |
|
359 |
-
print("Input", input_data, type(input_data))
|
360 |
# Use the unified processing function
|
361 |
result = parse_vectorize_and_search_unified_sync(input_data)
|
362 |
|
@@ -444,7 +451,8 @@ def predict(text, file):
|
|
444 |
"message": "Please provide either text or upload a file"
|
445 |
}, None
|
446 |
|
447 |
-
print("Input", input_data)
|
|
|
448 |
# Process using the unified function
|
449 |
return predict_unified(input_data)
|
450 |
|
|
|
199 |
def determine_input_type(input_data):
|
200 |
"""
|
201 |
Determine the type of input data and return a standardized format.
|
202 |
+
Returns: (input_type, processed_data) where input_type is 'text',
|
203 |
+
'file_path', or 'file_object'
|
204 |
"""
|
205 |
if isinstance(input_data, str):
|
206 |
# Check if it's a file path (contains file extension)
|
|
|
210 |
elif input_data.startswith('{') and 'gradio.FileData' in input_data:
|
211 |
try:
|
212 |
import ast
|
213 |
+
import json
|
214 |
+
# Try to parse as JSON first
|
215 |
+
try:
|
216 |
+
file_data = json.loads(input_data)
|
217 |
+
except json.JSONDecodeError:
|
218 |
+
# Fall back to ast.literal_eval for safer parsing
|
219 |
+
file_data = ast.literal_eval(input_data)
|
220 |
+
|
221 |
if isinstance(file_data, dict) and 'path' in file_data:
|
222 |
+
print(f"Parsed FileData: {file_data}")
|
223 |
return 'file_path', file_data['path']
|
224 |
+
except (ValueError, SyntaxError, json.JSONDecodeError) as e:
|
225 |
+
print(f"Error parsing FileData string: {e}")
|
226 |
+
print(f"Input data: {input_data}")
|
227 |
pass
|
228 |
else:
|
229 |
return 'text', input_data.strip()
|
|
|
265 |
async def parse_vectorize_and_search_unified(input_data):
|
266 |
"""
|
267 |
Unified function that handles both text and file inputs
|
268 |
+
"""
|
|
|
|
|
269 |
# Process the input to get gloss
|
270 |
gloss = process_input(input_data)
|
271 |
if not gloss:
|
|
|
364 |
"message": "Please provide text or upload a document"
|
365 |
}, None
|
366 |
|
|
|
367 |
# Use the unified processing function
|
368 |
result = parse_vectorize_and_search_unified_sync(input_data)
|
369 |
|
|
|
451 |
"message": "Please provide either text or upload a file"
|
452 |
}, None
|
453 |
|
454 |
+
print("Input to the prediction function", input_data)
|
455 |
+
print("Input type:", type(input))
|
456 |
# Process using the unified function
|
457 |
return predict_unified(input_data)
|
458 |
|
asl_gloss.py
CHANGED
@@ -10,7 +10,6 @@ that preserves the spatial and grammatical structure of ASL.
|
|
10 |
import os
|
11 |
import sys
|
12 |
import argparse
|
13 |
-
import json
|
14 |
from typing import Optional, Dict, Any
|
15 |
from pathlib import Path
|
16 |
|
|
|
10 |
import os
|
11 |
import sys
|
12 |
import argparse
|
|
|
13 |
from typing import Optional, Dict, Any
|
14 |
from pathlib import Path
|
15 |
|
document_parsing.py
CHANGED
@@ -66,7 +66,61 @@ class DocumentParser:
|
|
66 |
'.doc': 'application/msword',
|
67 |
'.epub': 'application/epub+zip'
|
68 |
}
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
|
72 |
"""
|
|
|
66 |
'.doc': 'application/msword',
|
67 |
'.epub': 'application/epub+zip'
|
68 |
}
|
69 |
+
|
70 |
+
mime_type = extension_map.get(extension, 'unknown')
|
71 |
+
|
72 |
+
# If no extension or unknown extension, try to detect by content
|
73 |
+
if mime_type == 'unknown':
|
74 |
+
mime_type = self._detect_mime_by_content(file_path)
|
75 |
+
|
76 |
+
return mime_type
|
77 |
+
|
78 |
+
def _detect_mime_by_content(self, file_path: Union[str, Path]) -> str:
|
79 |
+
"""
|
80 |
+
Detect MIME type by reading file content.
|
81 |
+
|
82 |
+
Args:
|
83 |
+
file_path: Path to the file
|
84 |
+
|
85 |
+
Returns:
|
86 |
+
MIME type string
|
87 |
+
"""
|
88 |
+
try:
|
89 |
+
with open(file_path, 'rb') as f:
|
90 |
+
# Read first 1024 bytes to detect file type
|
91 |
+
header = f.read(1024)
|
92 |
+
|
93 |
+
# PDF detection
|
94 |
+
if header.startswith(b'%PDF'):
|
95 |
+
return 'application/pdf'
|
96 |
+
|
97 |
+
# ZIP-based formats (DOCX, EPUB)
|
98 |
+
if header.startswith(b'PK\x03\x04'):
|
99 |
+
# Check if it's EPUB by looking for mimetype file
|
100 |
+
try:
|
101 |
+
import zipfile
|
102 |
+
with zipfile.ZipFile(file_path, 'r') as zf:
|
103 |
+
if 'mimetype' in zf.namelist():
|
104 |
+
with zf.open('mimetype') as mf:
|
105 |
+
mimetype = mf.read().decode('utf-8').strip()
|
106 |
+
if mimetype == 'application/epub+zip':
|
107 |
+
return 'application/epub+zip'
|
108 |
+
# If not EPUB, assume DOCX
|
109 |
+
return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
110 |
+
except:
|
111 |
+
pass
|
112 |
+
|
113 |
+
# Plain text detection (try to decode as UTF-8)
|
114 |
+
try:
|
115 |
+
header.decode('utf-8')
|
116 |
+
return 'text/plain'
|
117 |
+
except UnicodeDecodeError:
|
118 |
+
pass
|
119 |
+
|
120 |
+
except Exception as e:
|
121 |
+
logger.warning(f"Error detecting MIME type by content: {e}")
|
122 |
+
|
123 |
+
return 'unknown'
|
124 |
|
125 |
def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
|
126 |
"""
|