Spaces:

deenasun
/

ai-sl-api

Running

App Files Files Community

deenasun commited on Jun 22

Commit

26288e8

1 Parent(s): 505e15a

refactor process_input function for clearer file type detection and data processing

Browse files

Files changed (1) hide show

app.py +31 -64

app.py CHANGED Viewed

@@ -162,63 +162,42 @@ def cleanup_temp_video(file_path):
         print(f"Error cleaning up file: {e}")
-def process_input(input_data):
-    """Process input data to extract text for ASL conversion"""
     if isinstance(input_data, str):
         # Check if it's a file path (contains file extension)
         if any(ext in input_data.lower() for ext in ['.pdf', '.txt', '.docx', '.doc', '.epub']):
-            # It's a file path - extract text directly
-            try:
-                print(f"Processing file path: {input_data}")
-                from document_parsing import DocumentParser
-                parser = DocumentParser()
-                extracted_text = parser.extract_text(input_data)
-                if extracted_text:
-                    print(f"Extracted {len(extracted_text)} characters from file")
-                    # Convert the extracted text to ASL gloss
-                    gloss = asl_converter.asl_converter.convert_text(extracted_text)
-                    print(f"Converted gloss: {gloss[:100]}...")
-                    return gloss
-                else:
-                    print("No text extracted from file")
-                    return None
-            except Exception as e:
-                print(f"Error processing file path: {e}")
-                return None
         else:
-            # Direct text input
-            return input_data.strip()
     elif isinstance(input_data, dict) and 'path' in input_data:
         # This is a gradio.FileData object from API calls
-        try:
-            print(f"Processing API file: {input_data['path']}")
-            # Read the file content from the blob path
-            with open(input_data['path'], 'rb') as f:
-                file_content = f.read()
-            # Check if it's a text file or binary document
-            if input_data.get('mime_type') == 'text/plain' or input_data['path'].endswith('.txt'):
-                # Text file - decode as text
-                text_content = file_content.decode('utf-8')
-                print(f"Extracted {len(text_content)} characters from text file")
-                # Convert text to ASL gloss
-                gloss = asl_converter.asl_converter.convert_text(text_content)
-            else:
-                # Binary document - use document converter
-                gloss = asl_converter.convert_document(input_data['path'])
-            print(f"Converted gloss: {gloss[:100]}...")  # Show first 100 chars
-            return gloss
-        except Exception as e:
-            print(f"Error processing API file: {e}")
-            return None
     elif hasattr(input_data, 'name'):
-        # File input - extract text from document
         try:
-            print(f"Processing file: {input_data.name}")
-            gloss = asl_converter.convert_document(input_data.name)
-            print(f"Converted gloss: {gloss[:100]}...")  # Show first 100 chars
             return gloss
         except Exception as e:
             print(f"Error processing file: {e}")
@@ -228,6 +207,7 @@ def process_input(input_data):
         return None
 async def parse_vectorize_and_search_unified(input_data):
     """
     Unified function that handles both text and file inputs
@@ -411,21 +391,8 @@ def predict(text, file):
         # Use text input
         input_data = text.strip()
     elif file is not None:
-        # Handle different file input types
-        if isinstance(file, dict) and 'path' in file:
-            # This is a gradio.FileData object from API calls
-            print(f"Processing API file: {file}")
-            input_data = file
-        elif hasattr(file, 'name'):
-            # This is a regular file object
-            print(f"Processing regular file: {file.name}")
-            input_data = file
-        else:
-            print(f"Unknown file type: {type(file)}")
-            return {
-                "status": "error",
-                "message": "Unsupported file format"
-            }, None
     else:
         # No input provided
         return {

         print(f"Error cleaning up file: {e}")
+def determine_input_type(input_data):
+    """
+    Determine the type of input data and return a standardized format.
+    Returns: (input_type, processed_data) where input_type is 'text', 'file_path', or 'file_object'
+    """
     if isinstance(input_data, str):
         # Check if it's a file path (contains file extension)
         if any(ext in input_data.lower() for ext in ['.pdf', '.txt', '.docx', '.doc', '.epub']):
+            return 'file_path', input_data
         else:
+            return 'text', input_data.strip()
     elif isinstance(input_data, dict) and 'path' in input_data:
         # This is a gradio.FileData object from API calls
+        return 'file_path', input_data['path']
     elif hasattr(input_data, 'name'):
+        # This is a regular file object
+        return 'file_path', input_data.name
+    else:
+        return 'unknown', None
+def process_input(input_data):
+    """
+    Extract text content from various input types.
+    Returns the text content ready for ASL conversion.
+    """
+    input_type, processed_data = determine_input_type(input_data)
+    if input_type == 'text':
+        return processed_data
+    elif input_type == 'file_path':
         try:
+            print(f"Processing file: {processed_data}")
+            # Use document converter for all file types
+            gloss = asl_converter.convert_document(processed_data)
+            print(f"Converted gloss: {gloss[:100]}...")
             return gloss
         except Exception as e:
             print(f"Error processing file: {e}")
         return None
 async def parse_vectorize_and_search_unified(input_data):
     """
     Unified function that handles both text and file inputs
         # Use text input
         input_data = text.strip()
     elif file is not None:
+        # Use file input - let the centralized processor handle the type
+        input_data = file
     else:
         # No input provided
         return {