rushankg commited on
Commit
6fcd09e
·
verified ·
1 Parent(s): a693879

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -2
app.py CHANGED
@@ -10,10 +10,20 @@ from anthropic import Anthropic
10
  import pymongo
11
  from dotenv import load_dotenv
12
  import fitz # PyMuPDF
 
 
 
 
13
 
14
  # Load environment variables
15
  load_dotenv()
16
 
 
 
 
 
 
 
17
  # Initialize MongoDB client
18
  MONGO_URI = os.getenv('MONGO_URI')
19
  mongo_client = pymongo.MongoClient(MONGO_URI)
@@ -94,6 +104,62 @@ def extract_info_with_claude(resume_text: str) -> str:
94
 
95
  return extracted_info
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
98
  """Parse a resume file and return name and projects."""
99
  try:
@@ -135,8 +201,8 @@ def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
135
  "projects": projects,
136
  "full_content": resume_text
137
  }
138
- resume_collection.insert_one(resume_data)
139
- st.write("💾 Stored data in MongoDB")
140
 
141
  return name, projects
142
 
 
10
  import pymongo
11
  from dotenv import load_dotenv
12
  import fitz # PyMuPDF
13
+ import voyageai
14
+ from pinecone.grpc import PineconeGRPC as Pinecone
15
+ from pinecone import ServerlessSpec
16
+ from pinecone import Index
17
 
18
  # Load environment variables
19
  load_dotenv()
20
 
21
+ # Initialize VoyageAI constants
22
+ VOYAGEAI_BATCH_SIZE = 128
23
+
24
+ # Initialize Pinecone
25
+ PINECONE_ID = "intratalent-v2"
26
+
27
  # Initialize MongoDB client
28
  MONGO_URI = os.getenv('MONGO_URI')
29
  mongo_client = pymongo.MongoClient(MONGO_URI)
 
104
 
105
  return extracted_info
106
 
107
+ def get_pinecone_index(database_id: str) -> Index:
108
+ # initialize connection to pinecone
109
+ pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
110
+
111
+ # if the index does not exist, we create it
112
+ if not pc.has_index(database_id):
113
+ pc.create_index(
114
+ database_id,
115
+ dimension=shape[1],
116
+ spec=ServerlessSpec(
117
+ cloud='aws',
118
+ region='us-east-1'
119
+ ),
120
+ metric='cosine'
121
+ )
122
+
123
+ # connect to index
124
+ index = pc.Index(index_name)
125
+
126
+ def add_to_voyage(person_name: str, person_projects: list) -> None:
127
+ embeds = []
128
+ metas = []
129
+ ids = []
130
+ index = get_pinecone_index(PINECONE_ID)
131
+ vo = voyageai.Client(api_key=os.getenv('VOYAGEAI_API_KEY'))
132
+
133
+ for i in range(len(person_projects)):
134
+ # Get the ith project
135
+ project = person_projects[i]
136
+
137
+ # Embed the description
138
+ embed = vo.embed(
139
+ texts=project["description"],
140
+ model='voyage-3-lite',
141
+ truncation=False
142
+ ).embeddings[0]
143
+ embeds.append(embed)
144
+
145
+ # Create metadata using person's name + project name
146
+ meta = f"{person_name} {project["name"]}"
147
+ metas.append(meta)
148
+
149
+ # Give it a unique id
150
+ id = i
151
+ ids.append(i)
152
+
153
+ # create list of (id, vector, metadata) tuples to be upserted
154
+ to_upsert = list(zip(ids, embeds, meta))
155
+
156
+ for i in range(0, shape[0], VOYAGEAI_BATCH_SIZE):
157
+ i_end = min(i+VOYAGEAI_BATCH_SIZE, shape[0])
158
+ index.upsert(vectors=to_upsert[i:i_end])
159
+
160
+ # let's view the index statistics
161
+ st.write(index.describe_index_stats())
162
+
163
  def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
164
  """Parse a resume file and return name and projects."""
165
  try:
 
201
  "projects": projects,
202
  "full_content": resume_text
203
  }
204
+ add_to_voyage(name, projects)
205
+ st.write("💾 Stored data in VoyageAI")
206
 
207
  return name, projects
208