Ayush Rai commited on
Commit
15e9798
·
1 Parent(s): 0d691e8

Initial commit with LFS-tracked PDF

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .env
4
+ /data/
5
+ chroma_db/
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # install OS deps needed by PyMuPDF / some packages
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ build-essential libgl1 git curl \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ # create a non-root user
9
+ RUN useradd -m -u 1000 appuser
10
+ USER appuser
11
+ ENV PATH="/home/appuser/.local/bin:$PATH"
12
+ WORKDIR /home/appuser/app
13
+
14
+ # copy requirements and install
15
+ COPY --chown=appuser:appuser requirements.txt /home/appuser/app/requirements.txt
16
+ RUN python -m pip install --upgrade pip
17
+ RUN pip install --no-cache-dir -r /home/appuser/app/requirements.txt
18
+
19
+ # copy project files
20
+ COPY --chown=appuser:appuser . /home/appuser/app
21
+
22
+ # ensure HF cache + persistence go to /data
23
+ ENV HF_HOME=/data/.huggingface
24
+ ENV PERSIST_DIR=/data/chroma_db
25
+ ENV RESUME_PATH=/home/appuser/app/media/resume/resume-ayush.pdf
26
+
27
+ EXPOSE 7860
28
+ # use uvicorn to serve FastAPI (HF will call this)
29
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
agent_startup.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
+
5
+ # Optionally write Google SA JSON (secret) to /data/sa.json for Gemini auth
6
+ sa_json = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
7
+ if sa_json:
8
+ sa_path = "/data/sa.json"
9
+ if not os.path.exists(sa_path):
10
+ with open(sa_path, "w", encoding="utf-8") as f:
11
+ f.write(sa_json)
12
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = sa_path
13
+
14
+ # --- embeddings + vectordb + llm (same as yours) ---
15
+ from langchain_huggingface import HuggingFaceEmbeddings
16
+ from langchain_chroma import Chroma
17
+ from langchain_google_genai import ChatGoogleGenerativeAI
18
+ from langchain_community.document_loaders import PyMuPDFLoader
19
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
20
+
21
+ print("loading embedding")
22
+ embedding_model = HuggingFaceEmbeddings(
23
+ model_name="sentence-transformers/paraphrase-MiniLM-L3-v2",
24
+ model_kwargs={'device': "cpu"}
25
+ )
26
+
27
+ persist_dir = os.getenv("PERSIST_DIR", "/data/chroma_db")
28
+ os.makedirs(persist_dir, exist_ok=True)
29
+
30
+ print("loading vectordb")
31
+ vectordb = Chroma(
32
+ persist_directory=persist_dir,
33
+ embedding_function=embedding_model,
34
+ collection_name="general_embeddings"
35
+ )
36
+
37
+ # Load resume into DB if empty
38
+ try:
39
+ ids = vectordb.get().get("ids", [])
40
+ except Exception:
41
+ ids = []
42
+ if len(ids) == 0:
43
+ resume_path = os.getenv("RESUME_PATH", "media/resume/resume-ayush.pdf")
44
+ if os.path.exists(resume_path):
45
+ loader = PyMuPDFLoader(resume_path)
46
+ docs = loader.load()
47
+ splitter = RecursiveCharacterTextSplitter(chunk_size=580, chunk_overlap=60)
48
+ split_docs = splitter.split_documents(docs)
49
+ vectordb.add_documents(split_docs)
50
+
51
+ print("loading llm")
52
+ llm = ChatGoogleGenerativeAI(temperature=0.2, model=os.getenv("GEMINI_MODEL", "gemini-2.0-flash"))
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, Request
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from dotenv import load_dotenv
5
+ from portfolio_agent import portfolio_agent
6
+
7
+ load_dotenv()
8
+
9
+ app = FastAPI()
10
+
11
+ # Allow frontend access
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"],
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ @app.get("/")
21
+ def root():
22
+ return {"status": "Portfolio Agent API is running"}
23
+
24
+ @app.post("/chat")
25
+ async def chat(request: Request):
26
+ data = await request.json()
27
+ message = data.get("message", "")
28
+ extra_context = os.getenv("extra_context")
29
+
30
+ answer = portfolio_agent(message, extra_context)
31
+ return {"reply": answer}
media/resume/resume-ayush.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74822c224e0dbbb1271533582f7738a4046ec42393e3660afaf25dc0a0bf45de
3
+ size 108292
portfolio_agent.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import PromptTemplate
2
+ from langchain.chains.retrieval import create_retrieval_chain
3
+ from langchain.chains.combine_documents import create_stuff_documents_chain
4
+ from agent_startup import vectordb, llm
5
+
6
+ def portfolio_agent(question: str, extra_context: str):
7
+ prompt = PromptTemplate(
8
+ template="""You are my intelligent assistant, representing me to recruiters and HR professionals visiting my portfolio.
9
+ Use the following resume information: {context} and extra details: {extra_context} to answer questions as if you are me.
10
+
11
+ Your goal is to provide clear, confident, and engaging responses that highlight my strengths, achievements, and suitability for exciting opportunities.
12
+ Be professional, personable, and persuasive. Where relevant, emphasize my unique skills, experience, and passion for growth.
13
+
14
+ Question from recruiter/HR: {input}
15
+
16
+ Your answer (as me):
17
+ """,
18
+ input_variables=["context", "extra_context", "input"]
19
+ )
20
+
21
+ combine_docs_chain = create_stuff_documents_chain(llm, prompt)
22
+ rag_chain = create_retrieval_chain(
23
+ retriever=vectordb.as_retriever(search_kwargs={"k": 5}),
24
+ combine_docs_chain=combine_docs_chain,
25
+ )
26
+
27
+ result = rag_chain.invoke({
28
+ "context": "",
29
+ "extra_context": extra_context,
30
+ "input": question
31
+ })
32
+
33
+ return result["answer"]
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-dotenv
4
+ langchain
5
+ langchain-core
6
+ langchain-chroma
7
+ langchain-community
8
+ langchain-huggingface
9
+ langchain-google-genai
10
+ PyMuPDF
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10