Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,10 +10,20 @@ from anthropic import Anthropic
|
|
10 |
import pymongo
|
11 |
from dotenv import load_dotenv
|
12 |
import fitz # PyMuPDF
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Load environment variables
|
15 |
load_dotenv()
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Initialize MongoDB client
|
18 |
MONGO_URI = os.getenv('MONGO_URI')
|
19 |
mongo_client = pymongo.MongoClient(MONGO_URI)
|
@@ -94,6 +104,62 @@ def extract_info_with_claude(resume_text: str) -> str:
|
|
94 |
|
95 |
return extracted_info
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
|
98 |
"""Parse a resume file and return name and projects."""
|
99 |
try:
|
@@ -135,8 +201,8 @@ def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
|
|
135 |
"projects": projects,
|
136 |
"full_content": resume_text
|
137 |
}
|
138 |
-
|
139 |
-
st.write("💾 Stored data in
|
140 |
|
141 |
return name, projects
|
142 |
|
|
|
10 |
import pymongo
|
11 |
from dotenv import load_dotenv
|
12 |
import fitz # PyMuPDF
|
13 |
+
import voyageai
|
14 |
+
from pinecone.grpc import PineconeGRPC as Pinecone
|
15 |
+
from pinecone import ServerlessSpec
|
16 |
+
from pinecone import Index
|
17 |
|
18 |
# Load environment variables
|
19 |
load_dotenv()
|
20 |
|
21 |
+
# Initialize VoyageAI constants
|
22 |
+
VOYAGEAI_BATCH_SIZE = 128
|
23 |
+
|
24 |
+
# Initialize Pinecone
|
25 |
+
PINECONE_ID = "intratalent-v2"
|
26 |
+
|
27 |
# Initialize MongoDB client
|
28 |
MONGO_URI = os.getenv('MONGO_URI')
|
29 |
mongo_client = pymongo.MongoClient(MONGO_URI)
|
|
|
104 |
|
105 |
return extracted_info
|
106 |
|
107 |
+
def get_pinecone_index(database_id: str) -> Index:
|
108 |
+
# initialize connection to pinecone
|
109 |
+
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
|
110 |
+
|
111 |
+
# if the index does not exist, we create it
|
112 |
+
if not pc.has_index(database_id):
|
113 |
+
pc.create_index(
|
114 |
+
database_id,
|
115 |
+
dimension=shape[1],
|
116 |
+
spec=ServerlessSpec(
|
117 |
+
cloud='aws',
|
118 |
+
region='us-east-1'
|
119 |
+
),
|
120 |
+
metric='cosine'
|
121 |
+
)
|
122 |
+
|
123 |
+
# connect to index
|
124 |
+
index = pc.Index(index_name)
|
125 |
+
|
126 |
+
def add_to_voyage(person_name: str, person_projects: list) -> None:
|
127 |
+
embeds = []
|
128 |
+
metas = []
|
129 |
+
ids = []
|
130 |
+
index = get_pinecone_index(PINECONE_ID)
|
131 |
+
vo = voyageai.Client(api_key=os.getenv('VOYAGEAI_API_KEY'))
|
132 |
+
|
133 |
+
for i in range(len(person_projects)):
|
134 |
+
# Get the ith project
|
135 |
+
project = person_projects[i]
|
136 |
+
|
137 |
+
# Embed the description
|
138 |
+
embed = vo.embed(
|
139 |
+
texts=project["description"],
|
140 |
+
model='voyage-3-lite',
|
141 |
+
truncation=False
|
142 |
+
).embeddings[0]
|
143 |
+
embeds.append(embed)
|
144 |
+
|
145 |
+
# Create metadata using person's name + project name
|
146 |
+
meta = f"{person_name} {project["name"]}"
|
147 |
+
metas.append(meta)
|
148 |
+
|
149 |
+
# Give it a unique id
|
150 |
+
id = i
|
151 |
+
ids.append(i)
|
152 |
+
|
153 |
+
# create list of (id, vector, metadata) tuples to be upserted
|
154 |
+
to_upsert = list(zip(ids, embeds, meta))
|
155 |
+
|
156 |
+
for i in range(0, shape[0], VOYAGEAI_BATCH_SIZE):
|
157 |
+
i_end = min(i+VOYAGEAI_BATCH_SIZE, shape[0])
|
158 |
+
index.upsert(vectors=to_upsert[i:i_end])
|
159 |
+
|
160 |
+
# let's view the index statistics
|
161 |
+
st.write(index.describe_index_stats())
|
162 |
+
|
163 |
def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
|
164 |
"""Parse a resume file and return name and projects."""
|
165 |
try:
|
|
|
201 |
"projects": projects,
|
202 |
"full_content": resume_text
|
203 |
}
|
204 |
+
add_to_voyage(name, projects)
|
205 |
+
st.write("💾 Stored data in VoyageAI")
|
206 |
|
207 |
return name, projects
|
208 |
|