Spaces:

rushankg
/

intratalent-v2

Sleeping

App Files Files Community

rushankg commited on Oct 26, 2024

Commit

ea97af8

verified ·

1 Parent(s): 6e7206a

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -23

app.py CHANGED Viewed

@@ -3,9 +3,24 @@ import os
 import tempfile
 from pathlib import Path
 import time
-from typing import List, Dict
 import pandas as pd
 from streamlit.runtime.uploaded_file_manager import UploadedFile
 # Initialize Streamlit app
 st.set_page_config(
@@ -14,15 +29,90 @@ st.set_page_config(
     layout="wide"
 )
-def save_uploaded_file(uploaded_file) -> str:
-    """Save uploaded file to temporary directory and return path."""
     try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
-            tmp_file.write(uploaded_file.getvalue())
-            return tmp_file.name
     except Exception as e:
-        st.error(f"Error saving file: {e}")
-        return None
 def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
     """Process multiple resumes and return results."""
@@ -34,21 +124,16 @@ def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
             st.warning(f"Skipping {file.name}: Not a PDF file")
             continue
-        temp_path = save_uploaded_file(file)
-        if temp_path:
-            try:
-                name, projects = parse_resume(temp_path)
-                results[file.name] = {
-                    "name": name,
-                    "projects": projects
-                }
-                # Update progress
-                progress_bar.progress((idx + 1) / len(uploaded_files))
-            except Exception as e:
-                st.error(f"Error processing {file.name}: {e}")
-            finally:
-                # Clean up temporary file
-                os.unlink(temp_path)
     return results

 import tempfile
 from pathlib import Path
 import time
+from typing import List, Dict, Tuple
 import pandas as pd
 from streamlit.runtime.uploaded_file_manager import UploadedFile
+from anthropic import Anthropic
+import pymongo
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Initialize MongoDB client
+MONGO_URI = os.getenv('MONGO_URI')
+mongo_client = pymongo.MongoClient(MONGO_URI)
+db = mongo_client['intratalent']
+resume_collection = db['resumes']
+# Initialize Anthropic client
+anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
 # Initialize Streamlit app
 st.set_page_config(
     layout="wide"
 )
+def extract_info_with_claude(resume_content: bytes) -> str:
+    """Extract information from resume using Claude."""
+    # Create a temporary file to store the resume content
+    with tempfile.NamedTemporaryFile(mode='w+b', suffix='.pdf', delete=False) as temp_file:
+        temp_file.write(resume_content)
+        temp_file_path = temp_file.name
+    prompt = """
+    Extract the following information from the given resume:
+    1. Full Name
+    2. List of all experiences with their descriptions (copy exactly from resume)
+    Please format the output as follows:
+    Name: [Full Name]
+    Projects:
+    1. [Project Name]: [Project Description]
+    2. [Project Name]: [Project Description]
+    ...
+    Extract all experiences, including projects, leadership, work experience, research, etc.
+    """
     try:
+        message = anthropic.messages.create(
+            model="claude-3-haiku-20240307",
+            max_tokens=4096,
+            system="You are a helpful assistant that extracts information from resumes.",
+            messages=[{
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "file",
+                        "source": temp_file_path
+                    }
+                ]
+            }]
+        )
+        extracted_info = message.content[0].text
+    except Exception as e:
+        extracted_info = f"An error occurred: {e}"
+    finally:
+        # Clean up the temporary file
+        os.unlink(temp_file_path)
+    return extracted_info
+def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
+    """Parse a resume file and return name and projects."""
+    try:
+        resume_content = uploaded_file.getvalue()
+        extracted_info = extract_info_with_claude(resume_content)
+        # Parse the extracted information
+        lines = extracted_info.split('\n')
+        name = lines[0].split(': ')[1] if len(lines) > 0 and ': ' in lines[0] else "Unknown"
+        projects = []
+        project_started = False
+        for line in lines:
+            if line.strip() == "Projects:":
+                project_started = True
+                continue
+            if project_started and line.strip():
+                project_parts = line.split(': ', 1)
+                if len(project_parts) == 2:
+                    project_name = project_parts[0].split('. ', 1)[-1]  # Remove the number
+                    project_description = project_parts[1]
+                    projects.append({"name": project_name, "description": project_description})
+        # Store in MongoDB
+        resume_data = {
+            "name": name,
+            "projects": projects,
+            "full_content": resume_content.decode('utf-8', errors='ignore')
+        }
+        resume_collection.insert_one(resume_data)
+        return name, projects
     except Exception as e:
+        st.error(f"Error processing resume: {e}")
+        return "Unknown", []
 def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
     """Process multiple resumes and return results."""
             st.warning(f"Skipping {file.name}: Not a PDF file")
             continue
+        try:
+            name, projects = parse_resume(file)
+            results[file.name] = {
+                "name": name,
+                "projects": projects
+            }
+            # Update progress
+            progress_bar.progress((idx + 1) / len(uploaded_files))
+        except Exception as e:
+            st.error(f"Error processing {file.name}: {e}")
     return results