Spaces:

billyxx
/

Sprouts_Assignment

Sleeping

App Files Files Community

billyxx commited on 18 days ago

Commit

31aa939

verified ·

1 Parent(s): 272e246

Upload app.py

Browse files

Files changed (1) hide show

app.py +41 -40

app.py CHANGED Viewed

@@ -12,59 +12,60 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 def process_resumes(job_description, uploaded_files):
     if not job_description.strip():
         return "Please provide a job description.", None
     resume_texts = []
     for uploaded_file in uploaded_files:
-        # If the uploaded_file is a file-like object (has 'read' method)
-        if hasattr(uploaded_file, "read"):
-            # Reset file pointer just in case
             uploaded_file.seek(0)
-            content = uploaded_file.read()
-            # Get filename attribute, fallback if not available
-            filename = getattr(uploaded_file, "name", "unknown")
-            # Save the file to disk if you want or just process in-memory
-            # For example, save to UPLOAD_FOLDER
-            filepath = os.path.join(UPLOAD_FOLDER, os.path.basename(filename))
-            with open(filepath, "wb") as f:
-                f.write(content)
-        else:
-            # uploaded_file is probably a NamedString (str-like)
-            # Gradio provides the filename differently in this case,
-            # so you might have to assign a default or get from UI
-            content = uploaded_file
-            filepath = None
-            filename = "unknown"
-        # Process content depending on extension
-        if filename.endswith(".txt") or (filepath and filepath.endswith(".txt")):
-            text = content.decode("utf-8") if isinstance(content, bytes) else content
-        elif filename.endswith(".pdf") or (filepath and filepath.endswith(".pdf")):
-            # If saved to file, open from file
-            if filepath:
-                import pdfplumber
-                with pdfplumber.open(filepath) as pdf:
-                    pages = [page.extract_text() for page in pdf.pages if page.extract_text()]
-                    text = "\n".join(pages)
-            else:
-                # No file saved, cannot process PDF bytes easily here
-                return "Please upload PDF files via file upload.", None
-        elif filename.endswith(".docx") or (filepath and filepath.endswith(".docx")):
-            if filepath:
-                from docx import Document
-                doc = Document(filepath)
-                text = "\n".join([p.text for p in doc.paragraphs])
             else:
-                return "Please upload DOCX files via file upload.", None
         else:
             return f"Unsupported file format: {filename}", None
         resume_texts.append((filename, text))
-    # Now call rank_resumes etc.
     results = rank_resumes(job_description, resume_texts)
-    # Generate summaries
     for candidate in results:
         candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)

 def process_resumes(job_description, uploaded_files):
     if not job_description.strip():
         return "Please provide a job description.", None
+    if not uploaded_files:
+        return "Please upload at least one resume file.", None
     resume_texts = []
     for uploaded_file in uploaded_files:
+        filename = getattr(uploaded_file, "name", None)
+        if filename is None:
+            return "One of the uploaded files is missing a filename. Please upload files, not text.", None
+        # Reset file pointer
+        if hasattr(uploaded_file, "seek"):
             uploaded_file.seek(0)
+        # Process based on extension
+        ext = filename.lower().split(".")[-1]
+        if ext == "txt":
+            # Read text directly
+            if hasattr(uploaded_file, "read"):
+                content = uploaded_file.read()
+                # bytes? decode
+                text = content.decode("utf-8") if isinstance(content, bytes) else content
             else:
+                return f"Unexpected content for {filename}", None
+        elif ext == "pdf":
+            # Save temporarily to disk to use pdfplumber (which needs a file path)
+            temp_path = os.path.join(UPLOAD_FOLDER, filename)
+            with open(temp_path, "wb") as f:
+                f.write(uploaded_file.read())
+            import pdfplumber
+            with pdfplumber.open(temp_path) as pdf:
+                pages = [page.extract_text() for page in pdf.pages if page.extract_text()]
+                text = "\n".join(pages)
+        elif ext == "docx":
+            # Save temporarily to disk for python-docx
+            temp_path = os.path.join(UPLOAD_FOLDER, filename)
+            with open(temp_path, "wb") as f:
+                f.write(uploaded_file.read())
+            from docx import Document
+            doc = Document(temp_path)
+            text = "\n".join([p.text for p in doc.paragraphs])
         else:
             return f"Unsupported file format: {filename}", None
         resume_texts.append((filename, text))
+    # Rank resumes and generate summaries
     results = rank_resumes(job_description, resume_texts)
     for candidate in results:
         candidate["summary"] = summarize_resume_flan(candidate["text"], job_description)