Spaces:
Sleeping
Sleeping
Ayush Rai
commited on
Commit
·
15e9798
1
Parent(s):
0d691e8
Initial commit with LFS-tracked PDF
Browse files- .gitignore +5 -0
- Dockerfile +29 -0
- agent_startup.py +52 -0
- app.py +31 -0
- media/resume/resume-ayush.pdf +3 -0
- portfolio_agent.py +33 -0
- requirements.txt +10 -0
- runtime.txt +1 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
*.pyc
|
3 |
+
.env
|
4 |
+
/data/
|
5 |
+
chroma_db/
|
Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
# install OS deps needed by PyMuPDF / some packages
|
4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
5 |
+
build-essential libgl1 git curl \
|
6 |
+
&& rm -rf /var/lib/apt/lists/*
|
7 |
+
|
8 |
+
# create a non-root user
|
9 |
+
RUN useradd -m -u 1000 appuser
|
10 |
+
USER appuser
|
11 |
+
ENV PATH="/home/appuser/.local/bin:$PATH"
|
12 |
+
WORKDIR /home/appuser/app
|
13 |
+
|
14 |
+
# copy requirements and install
|
15 |
+
COPY --chown=appuser:appuser requirements.txt /home/appuser/app/requirements.txt
|
16 |
+
RUN python -m pip install --upgrade pip
|
17 |
+
RUN pip install --no-cache-dir -r /home/appuser/app/requirements.txt
|
18 |
+
|
19 |
+
# copy project files
|
20 |
+
COPY --chown=appuser:appuser . /home/appuser/app
|
21 |
+
|
22 |
+
# ensure HF cache + persistence go to /data
|
23 |
+
ENV HF_HOME=/data/.huggingface
|
24 |
+
ENV PERSIST_DIR=/data/chroma_db
|
25 |
+
ENV RESUME_PATH=/home/appuser/app/media/resume/resume-ayush.pdf
|
26 |
+
|
27 |
+
EXPOSE 7860
|
28 |
+
# use uvicorn to serve FastAPI (HF will call this)
|
29 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
agent_startup.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
load_dotenv()
|
4 |
+
|
5 |
+
# Optionally write Google SA JSON (secret) to /data/sa.json for Gemini auth
|
6 |
+
sa_json = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
|
7 |
+
if sa_json:
|
8 |
+
sa_path = "/data/sa.json"
|
9 |
+
if not os.path.exists(sa_path):
|
10 |
+
with open(sa_path, "w", encoding="utf-8") as f:
|
11 |
+
f.write(sa_json)
|
12 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = sa_path
|
13 |
+
|
14 |
+
# --- embeddings + vectordb + llm (same as yours) ---
|
15 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
16 |
+
from langchain_chroma import Chroma
|
17 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
18 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
19 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
20 |
+
|
21 |
+
print("loading embedding")
|
22 |
+
embedding_model = HuggingFaceEmbeddings(
|
23 |
+
model_name="sentence-transformers/paraphrase-MiniLM-L3-v2",
|
24 |
+
model_kwargs={'device': "cpu"}
|
25 |
+
)
|
26 |
+
|
27 |
+
persist_dir = os.getenv("PERSIST_DIR", "/data/chroma_db")
|
28 |
+
os.makedirs(persist_dir, exist_ok=True)
|
29 |
+
|
30 |
+
print("loading vectordb")
|
31 |
+
vectordb = Chroma(
|
32 |
+
persist_directory=persist_dir,
|
33 |
+
embedding_function=embedding_model,
|
34 |
+
collection_name="general_embeddings"
|
35 |
+
)
|
36 |
+
|
37 |
+
# Load resume into DB if empty
|
38 |
+
try:
|
39 |
+
ids = vectordb.get().get("ids", [])
|
40 |
+
except Exception:
|
41 |
+
ids = []
|
42 |
+
if len(ids) == 0:
|
43 |
+
resume_path = os.getenv("RESUME_PATH", "media/resume/resume-ayush.pdf")
|
44 |
+
if os.path.exists(resume_path):
|
45 |
+
loader = PyMuPDFLoader(resume_path)
|
46 |
+
docs = loader.load()
|
47 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=580, chunk_overlap=60)
|
48 |
+
split_docs = splitter.split_documents(docs)
|
49 |
+
vectordb.add_documents(split_docs)
|
50 |
+
|
51 |
+
print("loading llm")
|
52 |
+
llm = ChatGoogleGenerativeAI(temperature=0.2, model=os.getenv("GEMINI_MODEL", "gemini-2.0-flash"))
|
app.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from fastapi import FastAPI, Request
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from portfolio_agent import portfolio_agent
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
app = FastAPI()
|
10 |
+
|
11 |
+
# Allow frontend access
|
12 |
+
app.add_middleware(
|
13 |
+
CORSMiddleware,
|
14 |
+
allow_origins=["*"],
|
15 |
+
allow_credentials=True,
|
16 |
+
allow_methods=["*"],
|
17 |
+
allow_headers=["*"],
|
18 |
+
)
|
19 |
+
|
20 |
+
@app.get("/")
|
21 |
+
def root():
|
22 |
+
return {"status": "Portfolio Agent API is running"}
|
23 |
+
|
24 |
+
@app.post("/chat")
|
25 |
+
async def chat(request: Request):
|
26 |
+
data = await request.json()
|
27 |
+
message = data.get("message", "")
|
28 |
+
extra_context = os.getenv("extra_context")
|
29 |
+
|
30 |
+
answer = portfolio_agent(message, extra_context)
|
31 |
+
return {"reply": answer}
|
media/resume/resume-ayush.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74822c224e0dbbb1271533582f7738a4046ec42393e3660afaf25dc0a0bf45de
|
3 |
+
size 108292
|
portfolio_agent.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.prompts import PromptTemplate
|
2 |
+
from langchain.chains.retrieval import create_retrieval_chain
|
3 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
4 |
+
from agent_startup import vectordb, llm
|
5 |
+
|
6 |
+
def portfolio_agent(question: str, extra_context: str):
|
7 |
+
prompt = PromptTemplate(
|
8 |
+
template="""You are my intelligent assistant, representing me to recruiters and HR professionals visiting my portfolio.
|
9 |
+
Use the following resume information: {context} and extra details: {extra_context} to answer questions as if you are me.
|
10 |
+
|
11 |
+
Your goal is to provide clear, confident, and engaging responses that highlight my strengths, achievements, and suitability for exciting opportunities.
|
12 |
+
Be professional, personable, and persuasive. Where relevant, emphasize my unique skills, experience, and passion for growth.
|
13 |
+
|
14 |
+
Question from recruiter/HR: {input}
|
15 |
+
|
16 |
+
Your answer (as me):
|
17 |
+
""",
|
18 |
+
input_variables=["context", "extra_context", "input"]
|
19 |
+
)
|
20 |
+
|
21 |
+
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
|
22 |
+
rag_chain = create_retrieval_chain(
|
23 |
+
retriever=vectordb.as_retriever(search_kwargs={"k": 5}),
|
24 |
+
combine_docs_chain=combine_docs_chain,
|
25 |
+
)
|
26 |
+
|
27 |
+
result = rag_chain.invoke({
|
28 |
+
"context": "",
|
29 |
+
"extra_context": extra_context,
|
30 |
+
"input": question
|
31 |
+
})
|
32 |
+
|
33 |
+
return result["answer"]
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
python-dotenv
|
4 |
+
langchain
|
5 |
+
langchain-core
|
6 |
+
langchain-chroma
|
7 |
+
langchain-community
|
8 |
+
langchain-huggingface
|
9 |
+
langchain-google-genai
|
10 |
+
PyMuPDF
|
runtime.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python-3.10
|