Spaces:

Nattyboi
/

resume-api

Running

App Files Files Community

Nattyboi commited on 29 days ago

Commit

1e531e2

1 Parent(s): 4eab69a

stage 1 for ARS

Browse files

Files changed (6) hide show

Ars/ai_functions.py +210 -0
Ars/controllers.py +18 -53
Ars/embedDoc.py +109 -0
Ars/objects.py +138 -3
Ars/repositories.py +2 -2
Ars/routes.py +15 -0

Ars/ai_functions.py CHANGED Viewed

	@@ -0,0 +1,210 @@

+from pdfminer.high_level import extract_text_to_fp
+from io import BytesIO
+from objects import ai,ResumeData,AutomationRiskResult,RealWorldQuestion,SkillDepthResult,BreakDownByDomainUpdate,FlaggedRiskAreasUpdate,BoostSuggestionsUpdate
+from embedDoc import search_pinecone_text
+from fastapi import UploadFile
+def extract_text_from_bytes(pdf_bytes: bytes) -> str:
+    output_string = BytesIO()
+    with BytesIO(pdf_bytes) as input_stream:
+        extract_text_to_fp(input_stream, output_string)
+    return output_string.getvalue().decode()
+async def resume_analysis(upload_file:UploadFile) -> ResumeData:
+    contents = await upload_file.read()
+    resume = extract_text_from_bytes(pdf_bytes=contents)
+    if resume:
+        prompt = f"""
+        You are an AI resume parser. Read the following resume and extract the following structured information from the resume below if any was found:
+        1. Work experience details: job titles and descriptions.(max3)
+        2. List of technical skills (technologies, tools, platforms, etc.).(max4)
+        3. Soft and transferable skills:
+            - Leadership and collaboration
+            - Critical thinking and problem-solving
+            - Communication skills
+            - Cross-functional/interdisciplinary experience
+            - Initiative and adaptability
+        4. Career path details:
+            - Current or intended role/field
+            - Industry and sector context
+            - Career trajectory trends
+        5. Project experience (if any): project titles, descriptions, role.(max3)
+        6. Evidence of upskilling (optional): certifications, courses, learning projects, hackathons.
+        Return the extracted data as a JSON object that matches this schema:
+        Here is the resume:
+        {resume}
+        """
+        result =ai.chat(prompt=prompt,output_schema=ResumeData)
+        return result
+def calculate_automation_risk(resume:ResumeData):
+    def generate_real_world_data(resume:ResumeData):
+        prompt=f"Generata a search query using the resume details below I want to check some text that has data on future jobs reports using semantic searches  Here is the resume data: {resume} "
+        result = ai.chat(prompt=prompt,output_schema=RealWorldQuestion)
+        search_result =search_pinecone_text(query_text=result.question)
+        return search_result
+    real_world_data=generate_real_world_data(resume=resume)
+    prompt = f"""
+            You are an Automation Risk Calculator. Read the following resume Data and estimate How easily you can automate the skills and job titles of the user persona some real world data would be given to you the data may or may not provide help with your estimation:
+            Here is the resume:
+            (
+                {resume}
+            )
+            Here is the data:
+            (
+                {real_world_data}
+            )
+            Return Just Numbers MAX (100) MIN (0)Automation Risk is inverted (lower risk = higher score)
+            """
+    result=ai.chat(prompt=prompt,output_schema=AutomationRiskResult)
+    return result
+def calculate_skill_depth(resume:ResumeData):
+    prompt = f"""
+            You are an Skill Depth Calculator. Read the following resume Data and Score based on number of years per skill, seniority of roles, certification presence:
+            Here is the resume:
+            (
+                {resume}
+            )
+            Return Just Numbers MAX (100) MIN (0)
+            """
+    result=ai.chat(prompt=prompt,output_schema=SkillDepthResult)
+    return result
+def calculate_Ai_collab_readiness(resume:ResumeData):
+    prompt = f"""
+            You are an AI Collab Readiness Calculator. Read the following resume Data and Score based How ready are they to use AI, not just compete with it?- Based on:
+    - Mention of AI tools (ChatGPT, Copilot, etc.)
+    - Stated openness to automation
+    - How do they currently use AI tools in your work or studies?”:
+            Here is the resume:
+            (
+                {resume}
+            )
+            Return Just Numbers MAX (100) MIN (0)
+            """
+    result=ai.chat(prompt=prompt,output_schema=SkillDepthResult)
+    return result
+def generate_domain_breakdown(resume:ResumeData):
+    prompt = f"""
+            You currently have only one important task and that is to generate_domain_breakdown. Read the following resume Data and Classify skills/domains using taxonomy (e.g., "Software Development", "Marketing", "Data Science"):
+    - **Breakdown by Domain EXAMPLE**:
+    - Technical: 65
+    - Creative: 80
+    - Strategy: 75
+    - Collaboration: 60:
+            Here is the resume:
+            (
+                {resume}
+            )
+            Return Proper values Numbers MAX (100) MIN (0)
+            """
+    result=ai.chat(prompt=prompt,output_schema=BreakDownByDomainUpdate)
+    return result
+def generate_flagged_risk_areas(resume:ResumeData):
+    prompt = f"""
+You are a Resume Evaluation AI. Your task is to analyze the provided resume and identify potential risk areas based on the content. Focus on:
+1. Missing recent certifications
+2. Lack of AI tool usage
+3. Overreliance on automatable tasks (e.g., data entry, spreadsheets)
+For each risk area, return:
+- A clear explanation (1–2 sentences MAX)
+**Example Output Format**:
+- "risk_areas: Your spreadsheet skills are highly automatable."
+- "risk_areas: Your portfolio lacks AI-integrated work."
+Here is the resume to evaluate:
+({resume})
+"""
+    result=ai.chat(prompt=prompt,output_schema=FlaggedRiskAreasUpdate)
+    return result
+def generate_boost_suggestion(resume:ResumeData):
+    prompt = f"""
+You are a Resume Enhancement AI. Based on the resume provided below, generate clear, AI-assisted improvement suggestions to help the user strengthen weak areas.
+Focus on identifying skill or experience gaps and suggest practical next steps, such as:
+- Learning relevant AI tools
+- Enrolling in advanced courses
+- Expanding project depth
+- Gaining certifications
+**Examples of Suggestions**:
+- "Consider learning ChatGPT to enhance your coding workflow."
+- "Deepen your skill in Python with an advanced online course."
+- "Add recent certifications to show ongoing learning."
+- "Incorporate AI tools into your portfolio projects."
+**Rules**:
+- Be specific and relevant to the resume.
+- Each suggestion should be 1–2 sentences.
+- Return 2–4 actionable suggestions.
+Here is the resume to analyze:
+({resume})
+"""
+    result=ai.chat(prompt=prompt,output_schema=BoostSuggestionsUpdate)
+    return result

Ars/controllers.py CHANGED Viewed

@@ -1,53 +1,18 @@
-# TODO: perform calculation Risk Calculation and also generate Breakdown by Domain from resume, generate Flagged Risk areas from resume and boost suggestions from resume
-# NOTE: to perform risk calculation we use this formula ResilienceScore = (1 - AutomationRisk) * 0.5 + SkillDepth * 0.3 + AICollabReadiness * 0.2
-# NOTE: - **Automation Risk** is inverted (lower risk = higher score)
-# NOTE:- Normalize each sub-score to a scale of 0–100
-# NOTE: - Final score is a 0–100 **Crayonics Resilience Score**
-# TODO: things I'm gonna need 1. An AI , 2. Cache Controller 3. Database functions
-from Ars.core import r
-def analyze_resume(resume_text: str) -> dict:
-    # Step 1: Check cache
-    cached = r.get(resume_text)
-    if cached:
-        return cached
-    # Step 2: Extract skills, domains, and experience info
-    extracted_data = extract_resume_insights(resume_text)
-    # Step 3: Score sub-components
-    automation_risk = calculate_automation_risk(extracted_data)
-    skill_depth = calculate_skill_depth(extracted_data)
-    ai_collab_readiness = calculate_ai_collab_readiness(extracted_data)
-    # Normalize sub-scores to 0–100
-    automation_risk = normalize_score(1 - automation_risk)
-    skill_depth = normalize_score(skill_depth)
-    ai_collab_readiness = normalize_score(ai_collab_readiness)
-    # Step 4: Final score
-    resilience_score = (automation_risk * 0.5 +
-                        skill_depth * 0.3 +
-                        ai_collab_readiness * 0.2)
-    # Step 5: Breakdown, flags, suggestions
-    breakdown = generate_domain_breakdown(extracted_data)
-    risk_flags = identify_risk_flags(extracted_data)
-    suggestions = generate_boost_suggestions(risk_flags, extracted_data)
-    # Step 6: Package and cache
-    result = {
-        "resilience_score": round(resilience_score, 2),
-        "breakdown_by_domain": breakdown,
-        "flagged_risk_areas": risk_flags,
-        "boost_suggestions": suggestions,
-    }
-    r.setex(name=resume_text,value= result,time=3600)
-    db_controller.save_analysis_result(resume_text, result)
-    return result

+from core import r
+from repositories import create_boost_suggestions,create_breakdown_by_domain,create_flagged_risk_areas,create_user_resilience
+from fastapi import UploadFile
+from ai_functions import resume_analysis,calculate_automation_risk,calculate_Ai_collab_readiness,calculate_skill_depth,generate_boost_suggestion,generate_domain_breakdown,generate_flagged_risk_areas
+async def resilience_analysis(file:UploadFile):
+    resume= await resume_analysis(file)
+    risk = calculate_automation_risk(resume)
+    skill_depth =  calculate_skill_depth(resume)
+    ai_readiness = calculate_Ai_collab_readiness(resume)
+    ResilienceScore = ((1-(risk.result/100))*0.5+(skill_depth.result/100)*0.3+(ai_readiness.result/100)*0.2)
+    flagged_risk =generate_flagged_risk_areas(resume=resume)
+    boost_suggestion = generate_boost_suggestion(resume=resume)
+    domain_breakdown = generate_domain_breakdown(resume=resume)
+    print("flagged_risk",flagged_risk)
+    print("boost_suggestion",boost_suggestion)
+    print("domain_breakdown",domain_breakdown)
+    return {"overall score": ResilienceScore,"flagged Risk": flagged_risk,"boost suggestion":boost_suggestion,"domain breakdown":domain_breakdown,"resume":resume}

Ars/embedDoc.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from pinecone import Pinecone
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+import os
+from dotenv import load_dotenv
+import time
+load_dotenv()
+def chunk_list(lst, chunk_size):
+    """Yield successive chunks of size `chunk_size` from list."""
+    for i in range(0, len(lst), chunk_size):
+        yield lst[i:i + chunk_size]
+def upsert_text_with_chunks(
+    text: str,
+    *,
+    index_host: str = "https://resume-42eo81u.svc.aped-4627-b74a.pinecone.io",
+    namespace: str = "default",
+    chunk_size: int = 1000,
+    chunk_overlap: int = 200
+) -> None:
+    """
+    Splits a long text into overlapping chunks and upserts them directly into a Pinecone index
+    that has integrated embedding enabled.
+    Args:
+        text (str): The full text document to embed.
+        index_host (str): Pinecone index host URL.
+        namespace (str): Pinecone namespace to upsert into.
+        chunk_size (int): Max characters per chunk.
+        chunk_overlap (int): Overlap in characters between chunks.
+    """
+    api_key = os.getenv("PINECONE_API_KEY")
+    if not api_key:
+        raise EnvironmentError("Set PINECONE_API_KEY in environment")
+    pc = Pinecone(api_key=api_key)
+    index = pc.Index(host=index_host)
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        length_function=len,
+        is_separator_regex=False
+    )
+    chunks = splitter.split_text(text)
+    if not chunks:
+        print("No chunks generated — exiting.")
+        return
+    records = [
+        {
+            "_id": f"chunk-{i}",
+            "text": chunk
+        }
+        for i, chunk in enumerate(chunks)
+    ]
+    for batch in chunk_list(records, 50):
+        print("Inserting")
+        index.upsert_records(records=batch, namespace=namespace)
+        time.sleep(60)
+        print("resting")
+    print(f"✅ Upserted {len(records)} valid chunks (out of {len(chunks)}) into namespace '{namespace}'.")
+from pinecone import Pinecone
+from typing import List, Dict
+def search_pinecone_text(
+    query_text: str,
+    index_host: str = "https://resume-42eo81u.svc.aped-4627-b74a.pinecone.io",
+    namespace: str = "default",
+    top_k: int = 2,
+    fields: List[str] = ["category", "text"]
+) -> List[Dict]:
+    """
+    Search a Pinecone index using a text query.
+    Args:
+        api_key (str): Your Pinecone API key.
+        index_host (str): The specific index host URL.
+        namespace (str): The namespace to search within.
+        query_text (str): The input text to search for.
+        top_k (int): Number of top results to return.
+        fields (List[str]): Metadata fields to include in the response.
+    Returns:
+        List[Dict]: The top matching results.
+    """
+    api_key = os.getenv("PINECONE_API_KEY")
+    pc = Pinecone(api_key=api_key)
+    index = pc.Index(host=index_host)
+    results = index.search(
+        namespace=namespace,
+        query={"inputs": {"text": query_text}, "top_k": top_k},
+        fields=fields
+    )
+    print()
+    hits =results.result['hits']
+    result=[]
+    for hit in hits:
+        text = hit['fields']['text']
+        score = hit['_score']
+        result.append({"text":text,"score":score})
+    return   result

Ars/objects.py CHANGED Viewed

@@ -1,7 +1,69 @@
 from datetime import datetime
-from pydantic import model_validator, BaseModel
-from typing import List, Optional, Union
 from bson import ObjectId
 class UserResilienceScoreCreate(BaseModel):
@@ -30,7 +92,6 @@ class BreakDownByDomainCreate(BaseModel):
 class BreakDownByDomainUpdate(BaseModel):
     Technical:Optional[float]=None
     Creative:Optional[float]=None
     Strategy:Optional[float]=None
@@ -50,7 +111,81 @@ class BoostSuggestionsCreate(BaseModel):
     boost_suggestions:List[str]
 class BoostSuggestionsUpdate(BaseModel):

 from datetime import datetime
+from pydantic import Field, model_validator, BaseModel
+from typing import List, Optional, Union,Type, TypeVar
 from bson import ObjectId
+import openai
+from google import genai
+from google.genai import types
+import os
+from dotenv import load_dotenv
+load_dotenv()
+GOOGLE_API_KEY=os.getenv("GEMINI_API_KEY")
+OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
+T = TypeVar("T", bound=BaseModel)
+class AIWrapper:
+    def __init__(self, provider='openai'):
+        self.provider = provider.lower()
+        if self.provider == 'openai':
+            openai.api_key = OPENAI_API_KEY
+        elif self.provider == 'gemini':
+            self.gemini_client = genai.Client(
+    api_key=GOOGLE_API_KEY,
+    http_options=types.HttpOptions(api_version='v1alpha')
+)
+        else:
+            raise ValueError("Provider must be 'openai' or 'gemini'")
+    def chat(self, prompt: str,output_schema:Type[T]) -> T:
+        """
+        Generate a response from the AI provider and return it parsed into the specified schema.
+        Args:
+            prompt (str): The input prompt.
+            output_schema (Type[T]): A Pydantic model representing the output schema.
+        Returns:
+            T: Parsed AI response as an instance of the output_schema.
+        """
+        if self.provider == 'openai':
+            return self._openai_chat(prompt)
+        elif self.provider == 'gemini':
+            return self._gemini_chat(prompt,output_schema=output_schema)
+    def _openai_chat(self, prompt: str) -> str:
+        response = openai.ChatCompletion.create(
+            model="gpt-4",
+            messages=[
+                {"role": "user", "content": prompt}
+            ]
+        )
+        return response['choices'][0]['message']['content']
+    def _gemini_chat(self, prompt: str, output_schema: Type[T]) -> T:
+        response = self.gemini_client.models.generate_content(
+    model='gemini-2.0-flash-001',
+    contents=prompt,
+    config=types.GenerateContentConfig(
+        response_mime_type='application/json',
+        response_schema=output_schema,
+    ),
+)
+        return response.parsed
+ai = AIWrapper(provider='gemini')
 class UserResilienceScoreCreate(BaseModel):
 class BreakDownByDomainUpdate(BaseModel):
     Technical:Optional[float]=None
     Creative:Optional[float]=None
     Strategy:Optional[float]=None
     boost_suggestions:List[str]
+class ProjectExperienceDetails(BaseModel):
+    ProjectTitles: str = Field(..., description="The title(s) of the project(s) involved in.")
+    descriptions: str = Field(..., description="Detailed description of the project and what it entailed.")
+    RoleInTheProject: str = Field(..., description="The specific role played within the project.")
+class WorkExperienceDetails(BaseModel):
+    JobTitles: str = Field(..., description="The job titles held in past employment.")
+    JobDescriptions: str = Field(..., description="Summary of responsibilities and duties in these jobs.")
+class SoftTransferableSkills(BaseModel):
+    LeadershipAndCollaborationIndicators: str = Field(..., description="Evidence or examples demonstrating leadership and teamwork.")
+    CriticalThinkingOrProblemSolvingVerb: str = Field(..., description="Examples of critical thinking or problem solving performed.")
+    CommunicationSkills: str = Field(None, description="Description of communication skills and contexts.")
+    CrossFunctionalOrInterdisciplinaryExperience: str = Field(..., description="Experience working across teams or disciplines.")
+    InitiativeAndAdaptabilityLanguage: str = Field(..., description="Examples of taking initiative and adapting to change.")
+class CareerPathInformation(BaseModel):
+    CurrentOrIntendedRoleOrField: str = Field(..., description="Current or intended professional role or field of work.")
+    IndustryAndSectorContext: str = Field(..., description="Context about the industry and sector related to the career path.")
+    CareerTrajectoryTrends: str = Field(..., description="Observed or expected trends in the career trajectory or sector.")
+class EvidenceOfUpskillingAndLifelongLearning(BaseModel):
+    CertificationsCoursesOrBootcampsListed: Optional[List[str]] = Field(None, description="List of certifications, courses, or bootcamps completed.")
+    SelfInitiatedLearningProjectsOrNonDegreeEducationalAchievements: Optional[List[str]] = Field(None, description="List of personal projects or non-degree achievements.")
+    ParticipationInHackathonsClubsOrProfessionalCommunities: Optional[List[str]] = Field(None, description="Involvement in hackathons, clubs, or professional groups.")
+class AIRelatedKeywords(BaseModel):
+    AiToolsAndTechnologies: Optional[List[str]] = Field(
+        None,
+        description="List of AI tools and technologies mentioned in the resume, e.g., ChatGPT, TensorFlow."
+    )
+    conceptsAndTechniques: Optional[List[str]] = Field(
+        None,
+        description="AI concepts or techniques like NLP, computer vision, or reinforcement learning."
+    )
+    aiIntegratedProjectsMentioned: Optional[List[str]] = Field(
+        None,
+        description="Names or descriptions of projects where AI was applied."
+    )
+    usageContextDescriptions: Optional[List[str]] = Field(
+        None,
+        description="Sentences or phrases describing how AI was used in projects or tasks."
+    )
+class ResumeData(BaseModel):
+    workExperienceDetails:Optional[List[WorkExperienceDetails]]=None
+    listOfExplicitTechnicalSkills:Optional[List[str]]=None
+    softTransferableSkills:List[SoftTransferableSkills]
+    projectExperienceDetails:Optional[List[ProjectExperienceDetails]]=None
+    careerPathInformation:CareerPathInformation
+    evidenceOfUpskillingAndLifelongLearning:Optional[EvidenceOfUpskillingAndLifelongLearning]=None
+    aiRelatedKeywords:AIRelatedKeywords
+class RealWorldQuestion(BaseModel):
+    question:str
+class AutomationRiskResult(BaseModel):
+    result: int = Field(...,description="The result of an automation risk estimation done using realword data and resume data of a user")
+class SkillDepthResult(BaseModel):
+    result: int = Field(...,description="Score based on number of years per skill, seniority of roles, certification presence")
+class AICollabReadiness(BaseModel):
+    result: int = Field(...,description="Score based on How ready are they to use AI, not just compete with it?")
 class BoostSuggestionsUpdate(BaseModel):

Ars/repositories.py CHANGED Viewed

@@ -1,9 +1,9 @@
-from Ars.core import db
 from bson import ObjectId
 from fastapi import HTTPException
 from typing import Optional, List
 from motor.motor_asyncio import AsyncIOMotorDatabase
-from Ars.objects import UserResilienceScoreCreate, UserResilienceScoreOut,BreakDownByDomainCreate,BreakDownByDomainOut,FlaggedRiskAreasCreate,FlaggedRiskAreasOut,BoostSuggestionsCreate,BoostSuggestionsOut,BoostSuggestionsUpdate,UserResilienceScoreUpdate,FlaggedRiskAreasUpdate,BreakDownByDomainUpdate
 async def create_user_resilience(  data: UserResilienceScoreCreate) -> UserResilienceScoreOut:

+from core import db
 from bson import ObjectId
 from fastapi import HTTPException
 from typing import Optional, List
 from motor.motor_asyncio import AsyncIOMotorDatabase
+from objects import UserResilienceScoreCreate, UserResilienceScoreOut,BreakDownByDomainCreate,BreakDownByDomainOut,FlaggedRiskAreasCreate,FlaggedRiskAreasOut,BoostSuggestionsCreate,BoostSuggestionsOut,BoostSuggestionsUpdate,UserResilienceScoreUpdate,FlaggedRiskAreasUpdate,BreakDownByDomainUpdate
 async def create_user_resilience(  data: UserResilienceScoreCreate) -> UserResilienceScoreOut:

Ars/routes.py CHANGED Viewed

	@@ -0,0 +1,15 @@

+import base64
+from fastapi import FastAPI, File, UploadFile,HTTPException
+from controllers import resilience_analysis
+from embedDoc import upsert_text_with_chunks,search_pinecone_text
+ARS = FastAPI()
+@ARS.post("/risk-analysis")
+async def perform_risk_analysis(file: UploadFile = File(...)):
+    if file.content_type != "application/pdf":
+        return HTTPException(status_code=400, detail={"error": "File must be a PDF."})
+    ResilienceScore = await resilience_analysis(file=file)
+    return ResilienceScore