Test
Browse files- app.py +388 -0
- requirements.txt +172 -0
app.py
ADDED
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
import psycopg2
|
4 |
+
from psycopg2 import sql
|
5 |
+
import pandas as pd
|
6 |
+
from datetime import date
|
7 |
+
import numpy as np
|
8 |
+
import spacy
|
9 |
+
from sentence_transformers import SentenceTransformer, util
|
10 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
12 |
+
from io import StringIO
|
13 |
+
from spacy.matcher import PhraseMatcher
|
14 |
+
from skillNer.general_params import SKILL_DB
|
15 |
+
from skillNer.skill_extractor_class import SkillExtractor
|
16 |
+
from psycopg2.extensions import register_adapter, AsIs
|
17 |
+
register_adapter(np.int64, AsIs)
|
18 |
+
import warnings
|
19 |
+
warnings.filterwarnings('ignore')
|
20 |
+
|
21 |
+
db_params = {
|
22 |
+
'host': 'dpg-clur07la73kc73bjt21g-a.oregon-postgres.render.com',
|
23 |
+
'database': 'anudip',
|
24 |
+
'user': 'anu',
|
25 |
+
'password': 'GdMdskphcmhZZblHM30cPw75gl4l8oxJ',
|
26 |
+
}
|
27 |
+
nlp = spacy.load("en_core_web_lg")
|
28 |
+
# init skill extractor
|
29 |
+
skill_extractor = SkillExtractor(nlp, SKILL_DB, PhraseMatcher)
|
30 |
+
|
31 |
+
with st.sidebar:
|
32 |
+
st.title("JD Skills Extraction & Matching Engine")
|
33 |
+
st.markdown('''
|
34 |
+
## About
|
35 |
+
Goal is to extract the skills from input document and extract all the skills
|
36 |
+
''')
|
37 |
+
def tuple_to_int(tup):
|
38 |
+
if len(tup) == 1:
|
39 |
+
return tup[0]
|
40 |
+
else:
|
41 |
+
return tup[0] * (10 ** (len(tup) - 1)) + tuple_to_int(tup[1:])
|
42 |
+
|
43 |
+
def skill_check(dbQuery):
|
44 |
+
conn = psycopg2.connect(**db_params)
|
45 |
+
cursor = conn.cursor()
|
46 |
+
df = pd.read_sql_query(dbQuery, conn)
|
47 |
+
Required_Skills=''
|
48 |
+
for index, row in df.iterrows():
|
49 |
+
|
50 |
+
skillname = row['skillname']
|
51 |
+
Required_Skills = Required_Skills + ', '+ skillname
|
52 |
+
|
53 |
+
Required_Skills = Required_Skills[2:]
|
54 |
+
return Required_Skills
|
55 |
+
def display_skills(id):
|
56 |
+
jd=str(id)
|
57 |
+
query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'"
|
58 |
+
RequiredSkills_Hard = skill_check(query)
|
59 |
+
|
60 |
+
query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'"
|
61 |
+
RequiredSkills_Soft = skill_check(query)
|
62 |
+
|
63 |
+
query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'"
|
64 |
+
RequiredSkills_G1 = skill_check(query)
|
65 |
+
|
66 |
+
query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'"
|
67 |
+
RequiredSkills_G2 = skill_check(query)
|
68 |
+
|
69 |
+
print('')
|
70 |
+
print("Required Skills : " + RequiredSkills_Hard)
|
71 |
+
print('')
|
72 |
+
print("Required Soft Skills : " + RequiredSkills_Soft)
|
73 |
+
print('')
|
74 |
+
print("Good to have Skills : " + RequiredSkills_G1 + " " + RequiredSkills_G2)
|
75 |
+
return RequiredSkills_Hard + "@" + RequiredSkills_Soft + "@" + RequiredSkills_G1 + "@" + RequiredSkills_G2
|
76 |
+
|
77 |
+
def latestSkillDetails(jid):
|
78 |
+
query = "select * from jdmaster where isskillsextracted=1 order by jdmasterid desc limit 1 "
|
79 |
+
conn = psycopg2.connect(**db_params)
|
80 |
+
df = pd.read_sql_query(query, conn)
|
81 |
+
filename = df.iat[0,2]
|
82 |
+
fileId = df.iat[0,0]
|
83 |
+
|
84 |
+
upload = df.iat[0,3]
|
85 |
+
if(fileId != jid):
|
86 |
+
print("Skill Details for File : " + str(filename) + " , ID " + str(fileId) + " , Uploaded on " + str(upload))
|
87 |
+
data = display_skills(fileId)
|
88 |
+
jid = df.iat[0,0]
|
89 |
+
return data
|
90 |
+
|
91 |
+
def SkillExtract():
|
92 |
+
print("Extracting Skills for the JD...")
|
93 |
+
# Connect to the PostgreSQL database
|
94 |
+
conn = psycopg2.connect(**db_params)
|
95 |
+
cursor = conn.cursor()
|
96 |
+
|
97 |
+
|
98 |
+
# Retrieve "id" and "description" columns from the table
|
99 |
+
#query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
|
100 |
+
query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted in (0)"
|
101 |
+
|
102 |
+
# Use Pandas to read the data into a DataFrame
|
103 |
+
df = pd.read_sql_query(query, conn)
|
104 |
+
|
105 |
+
# Print the DataFrame (for demonstration purposes)
|
106 |
+
#print(df)
|
107 |
+
|
108 |
+
skill_details = 'Programming'
|
109 |
+
skill_type = 'Technical'
|
110 |
+
weightage = -1.0
|
111 |
+
is_active = True
|
112 |
+
Skillid = 1
|
113 |
+
jdMasterid = 1
|
114 |
+
OldSkillCount = 0
|
115 |
+
NewSkillCount = 0
|
116 |
+
if(len(df.index) > 0):
|
117 |
+
print("Total JDs for Extractraction : " + str(len(df.index)))
|
118 |
+
for index, row in df.iterrows():
|
119 |
+
# Access individual columns using column names
|
120 |
+
id_value = row['jdmasterid']
|
121 |
+
filename_jd = row['filename']
|
122 |
+
OldSkillCount = 0
|
123 |
+
NewSkillCount = 0
|
124 |
+
skill_score = 0.0
|
125 |
+
print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
|
126 |
+
|
127 |
+
description_value = row['jobdescription']
|
128 |
+
#print(description_value)
|
129 |
+
|
130 |
+
annotations = skill_extractor.annotate(description_value)
|
131 |
+
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
|
132 |
+
skills_list = []
|
133 |
+
for result in matches:
|
134 |
+
if(1==1):
|
135 |
+
|
136 |
+
isOld = "Yes"
|
137 |
+
skill_id = result['skill_id']
|
138 |
+
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
|
139 |
+
skill_name = skill_name1.split("(")[0].strip()
|
140 |
+
skill_type = skill_extractor.skills_db[skill_id]['skill_type']
|
141 |
+
skill_score = round(result['score'],2)
|
142 |
+
|
143 |
+
|
144 |
+
if( skill_name in skills_list):
|
145 |
+
continue
|
146 |
+
skills_list.append(skill_name)
|
147 |
+
#print("Skill Identified : ", j['doc_node_value'])
|
148 |
+
query = "SELECT skillid FROM skillmaster WHERE skillDetails IN (%s)"
|
149 |
+
params = (skill_name,) # Replace 'Test' with your actual variable or user input
|
150 |
+
cursor.execute(query, params)
|
151 |
+
if cursor.rowcount > 0:
|
152 |
+
print("Skill Identified : ", skill_name)
|
153 |
+
result = cursor.fetchall()
|
154 |
+
for row in result:
|
155 |
+
row_as_int = [int(element) for element in row]
|
156 |
+
#print("Skill Already in SkillMaster")
|
157 |
+
OldSkillCount = OldSkillCount + 1
|
158 |
+
isOld = "Yes"
|
159 |
+
query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
|
160 |
+
params = (row_as_int[0],id_value,)
|
161 |
+
cursor.execute(query, params)
|
162 |
+
if cursor.rowcount > 0:
|
163 |
+
weightage = -1.0
|
164 |
+
#print("Skill Already in SkillMaster and JDSkillDetails")
|
165 |
+
else:
|
166 |
+
Skillid = row_as_int[0]
|
167 |
+
jdMasterid = id_value
|
168 |
+
insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
|
169 |
+
cursor.execute(insert_query, (Skillid, jdMasterid))
|
170 |
+
conn.commit()
|
171 |
+
#print("Skill Already in SkillMaster and Inserted in JDSkillDetails")
|
172 |
+
#print(row_as_int)
|
173 |
+
else:
|
174 |
+
NewSkillCount = NewSkillCount + 1
|
175 |
+
isOld = "No"
|
176 |
+
skill_details = skill_name
|
177 |
+
weightage = -1.0
|
178 |
+
skill_score = skill_score * 100
|
179 |
+
skill_score1 = str(skill_score)
|
180 |
+
#skill_score = skill_score.astype(float)
|
181 |
+
#print(skill_score)
|
182 |
+
insert_query = sql.SQL("""INSERT INTO SkillMaster (SkillDetails, SkillType, Weightage, IsActive, skill_score)
|
183 |
+
VALUES (%s, %s, %s, %s, %s) RETURNING SkillID""")
|
184 |
+
cursor.execute(insert_query, (skill_details, skill_type, weightage, is_active, skill_score1))
|
185 |
+
conn.commit()
|
186 |
+
generated_skill_id = cursor.fetchone()[0]
|
187 |
+
Skillid = generated_skill_id
|
188 |
+
jdMasterid = id_value
|
189 |
+
insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
|
190 |
+
cursor.execute(insert_query, (Skillid, jdMasterid))
|
191 |
+
conn.commit()
|
192 |
+
print("Skill Identified : ", skill_name)
|
193 |
+
#print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
|
194 |
+
|
195 |
+
query = "update public.jdmaster set isskillsextracted = 1 where jdmasterid = (%s)"
|
196 |
+
|
197 |
+
params = (id_value,)
|
198 |
+
cursor.execute(query, params)
|
199 |
+
conn.commit()
|
200 |
+
print("Skills Updated for Skills Extraction for file ", filename_jd)
|
201 |
+
print("Total Skills : ", len(skills_list))
|
202 |
+
|
203 |
+
def SkillExtraction(file):
|
204 |
+
annotations = skill_extractor.annotate(file)
|
205 |
+
|
206 |
+
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
|
207 |
+
|
208 |
+
skills_dict = {}
|
209 |
+
for result in matches:
|
210 |
+
skill_id = result['skill_id']
|
211 |
+
skill_name = skill_extractor.skills_db[skill_id]['skill_name']
|
212 |
+
skill_type = skill_extractor.skills_db[skill_id]['skill_type']
|
213 |
+
skill_score = round(result['score'],2)
|
214 |
+
st.write("Skills----------")
|
215 |
+
st.write(skill_name)
|
216 |
+
st.write(skill_type)
|
217 |
+
st.write(skill_score)
|
218 |
+
st.write("Skills----------")
|
219 |
+
|
220 |
+
def SkillMatcher():
|
221 |
+
print("Checking Best Course for the JD...")
|
222 |
+
conn = psycopg2.connect(**db_params)
|
223 |
+
cursor_obj = conn.cursor()
|
224 |
+
|
225 |
+
query = "select * from JDDetailsCoursematching"
|
226 |
+
cursor_obj.execute(query)
|
227 |
+
jd_data = cursor_obj.fetchall()
|
228 |
+
#connection_obj.commit()
|
229 |
+
print(jd_data)
|
230 |
+
query = "select * from CourseDetailsForMatching"
|
231 |
+
cursor_obj.execute(query)
|
232 |
+
cv_data = cursor_obj.fetchall()
|
233 |
+
print(cv_data)
|
234 |
+
#connection_obj.commit()
|
235 |
+
query = "select jdmasterid || '-' || courseid from courseskillmatch"
|
236 |
+
cursor_obj.execute(query)
|
237 |
+
match_data = cursor_obj.fetchall()
|
238 |
+
|
239 |
+
jd_skills = {}
|
240 |
+
for obj in jd_data:
|
241 |
+
if obj[0] not in jd_skills:
|
242 |
+
jd_skills[obj[0]] = []
|
243 |
+
|
244 |
+
jd_skills[obj[0]].append(obj[1])
|
245 |
+
|
246 |
+
cv_skills = {}
|
247 |
+
for obj in cv_data:
|
248 |
+
if obj[0] not in cv_skills:
|
249 |
+
cv_skills[obj[0]] = []
|
250 |
+
|
251 |
+
cv_skills[obj[0]].append(obj[1])
|
252 |
+
|
253 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
254 |
+
count = 0
|
255 |
+
MatchSkillsId = 0
|
256 |
+
isAlreadyInDb = False
|
257 |
+
TopScore = 0
|
258 |
+
CourseId = 0
|
259 |
+
for jd in jd_skills:
|
260 |
+
for cv in cv_skills:
|
261 |
+
#if(cv in match_data[1] and jd in match_data[0]):
|
262 |
+
#print("Already record : " + str(cv) + " , " + str(jd))
|
263 |
+
isAlreadyInDb = False
|
264 |
+
match_details = str(jd) + "-" + str(cv)
|
265 |
+
for i in match_data:
|
266 |
+
if(i[0] == match_details):
|
267 |
+
print( "Already in Database -----------" + i[0])
|
268 |
+
isAlreadyInDb = True
|
269 |
+
break
|
270 |
+
|
271 |
+
if(isAlreadyInDb == True):
|
272 |
+
continue
|
273 |
+
#print(match_details)
|
274 |
+
count += 1
|
275 |
+
sentence1 = " ".join(cv_skills[cv])
|
276 |
+
sentence2 = " ".join(jd_skills[jd])
|
277 |
+
embedding1 = model.encode(sentence1, convert_to_tensor=True)
|
278 |
+
embedding2 = model.encode(sentence2, convert_to_tensor=True)
|
279 |
+
|
280 |
+
# Compute cosine similarity between the two sentence embeddings
|
281 |
+
cosine_similarit = util.cos_sim(embedding1, embedding2)
|
282 |
+
if(TopScore < cosine_similarit * 100):
|
283 |
+
TopScore = cosine_similarit * 100
|
284 |
+
CourseId = cv
|
285 |
+
|
286 |
+
|
287 |
+
common = set(cv_skills[cv]) & set(jd_skills[jd])
|
288 |
+
if(1==2):
|
289 |
+
if(MatchSkillsId == 0):
|
290 |
+
query = "select coalesce(max(skillmatchid),0) + 1 from courseskillmatch"
|
291 |
+
cursor_obj.execute(query)
|
292 |
+
MatchId = cursor_obj.fetchall()
|
293 |
+
MatchSkillsId = tuple_to_int( MatchId[0])
|
294 |
+
|
295 |
+
|
296 |
+
|
297 |
+
if(1==2):
|
298 |
+
record = (MatchSkillsId, cv, jd, cosine_similarit[0][0].item(),1)
|
299 |
+
query = """INSERT INTO public.courseskillmatch(SkillMatchID, courseid, JDMasterID, MatchScore,isactive) VALUES (%s,%s,%s,%s,%s)"""
|
300 |
+
cursor_obj.execute(query, record)
|
301 |
+
conn.commit()
|
302 |
+
MatchSkillsId = MatchSkillsId + 1
|
303 |
+
print( str( MatchSkillsId) + " "+"Updating in DB - JD {} CV {} ".format(jd, cv), cosine_similarit[0][0].item())
|
304 |
+
#print(TopScore)
|
305 |
+
|
306 |
+
query = "select filename from coursemaster where masterid = " + str(CourseId)
|
307 |
+
df = pd.read_sql_query(query, conn)
|
308 |
+
MatchId = df.iat[0,0].split('\\')[1].split('.')[0]
|
309 |
+
print("------------------------Beta Results for Course - " + MatchId)
|
310 |
+
return MatchId
|
311 |
+
cursor_obj.close()
|
312 |
+
conn.close()
|
313 |
+
|
314 |
+
|
315 |
+
|
316 |
+
|
317 |
+
def uploadFile(text,filePath):
|
318 |
+
conn = psycopg2.connect(**db_params)
|
319 |
+
cursor = conn.cursor()
|
320 |
+
query = "Select max(jdmasterid) from JdMaster"
|
321 |
+
df = pd.read_sql_query(query, conn)
|
322 |
+
MasterId = df.iat[0,0] + 1
|
323 |
+
#print(MasterId)
|
324 |
+
query =sql.SQL("""INSERT INTO JDMaster (jdmasterid,jobdescription, filename, UploadedDate, IsDetailsExtracted,IsSkillsExtracted,source) VALUES (%s,%s,%s,%s,%s,%s,%s)""")
|
325 |
+
cursor.execute(query, (MasterId,text,filePath, date.today(),0,0,"JD"))
|
326 |
+
conn.commit()
|
327 |
+
print("File Uploaded...")
|
328 |
+
|
329 |
+
def submit (uploaded_resume, query):
|
330 |
+
|
331 |
+
if uploaded_resume:
|
332 |
+
|
333 |
+
fName = uploaded_resume.name
|
334 |
+
if fName.endswith("pdf"):
|
335 |
+
pdf_reader = PdfReader(uploaded_resume)
|
336 |
+
text = ""
|
337 |
+
for page in pdf_reader.pages:
|
338 |
+
text += page.extract_text()
|
339 |
+
#text = extract_text(filePath)
|
340 |
+
|
341 |
+
elif fName.endswith("doc") or fName.endswith("docx"):
|
342 |
+
text = StringIO(uploaded_resume.getvalue().decode("utf-8"))
|
343 |
+
text = text.read()
|
344 |
+
|
345 |
+
else:
|
346 |
+
text = uploaded_resume.getvalue().decode()
|
347 |
+
#Pdf Text Extraction
|
348 |
+
|
349 |
+
|
350 |
+
|
351 |
+
if query:
|
352 |
+
|
353 |
+
##st.header("Results : ")
|
354 |
+
#print(query)
|
355 |
+
with st.spinner('Processing...'):
|
356 |
+
|
357 |
+
uploadFile(str(text),fName)
|
358 |
+
SkillExtract()
|
359 |
+
profile = SkillMatcher()
|
360 |
+
details = latestSkillDetails(1).split('@')
|
361 |
+
|
362 |
+
st.subheader('Required Skills : ', divider='rainbow')
|
363 |
+
st.write(details[0])
|
364 |
+
st.subheader('Required Soft Skills : ', divider='rainbow')
|
365 |
+
st.write(details[1])
|
366 |
+
st.subheader('Good to have Skills : ', divider='rainbow')
|
367 |
+
st.write(details[2] + " " + details[3])
|
368 |
+
st.success('Suggested Course - ' + profile)
|
369 |
+
#st.write("Reuired Skills : " + details[0])
|
370 |
+
else:
|
371 |
+
SkillMatcher()
|
372 |
+
|
373 |
+
|
374 |
+
|
375 |
+
def main():
|
376 |
+
st.header("Skills Extraction")
|
377 |
+
|
378 |
+
form = st.form(key='some_form')
|
379 |
+
uploaded_resume = form.file_uploader("Upload Job Description")
|
380 |
+
query = form.text_area(
|
381 |
+
"Skills Extraction",
|
382 |
+
placeholder="Skills?",
|
383 |
+
key="question"
|
384 |
+
)
|
385 |
+
form.form_submit_button("Run", on_click=submit(uploaded_resume=uploaded_resume, query=query))
|
386 |
+
|
387 |
+
if __name__ == '__main__':
|
388 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiobotocore==2.3.4
|
2 |
+
aiohttp==3.8.6
|
3 |
+
aioitertools==0.11.0
|
4 |
+
aiosignal==1.3.1
|
5 |
+
annotated-types==0.6.0
|
6 |
+
anyio==4.0.0
|
7 |
+
argon2-cffi==23.1.0
|
8 |
+
argon2-cffi-bindings==21.2.0
|
9 |
+
asttokens==2.4.0
|
10 |
+
async-lru==2.0.4
|
11 |
+
async-timeout==4.0.3
|
12 |
+
attrs==23.1.0
|
13 |
+
Babel==2.13.0
|
14 |
+
backcall==0.2.0
|
15 |
+
beautifulsoup4==4.12.2
|
16 |
+
bleach==6.1.0
|
17 |
+
blis==0.7.11
|
18 |
+
boto3==1.21.21
|
19 |
+
botocore==1.24.21
|
20 |
+
catalogue==2.0.10
|
21 |
+
certifi==2023.7.22
|
22 |
+
cffi==1.16.0
|
23 |
+
charset-normalizer==3.3.0
|
24 |
+
click==8.1.7
|
25 |
+
cloudpathlib==0.15.1
|
26 |
+
colorama==0.4.6
|
27 |
+
comm==0.1.4
|
28 |
+
confection==0.1.3
|
29 |
+
cymem==2.0.8
|
30 |
+
debugpy==1.8.0
|
31 |
+
decorator==5.1.1
|
32 |
+
defusedxml==0.7.1
|
33 |
+
direnv==2020.12.3
|
34 |
+
distlib==0.3.7
|
35 |
+
distro==1.8.0
|
36 |
+
en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.4.1/en_core_web_lg-3.4.1-py3-none-any.whl#sha256=7b1681d44181b1ae6517044c9beed90cb71faaa0d7dc92bf18fbe590847051d5
|
37 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl#sha256=83872781dc64893d45d9dbe940e05f80df7e7196e169ea29e2e9742fed079549
|
38 |
+
env-file==2020.12.3
|
39 |
+
exceptiongroup==1.1.3
|
40 |
+
executing==2.0.0
|
41 |
+
fairscale==0.4.13
|
42 |
+
fastjsonschema==2.18.1
|
43 |
+
filelock==3.7.1
|
44 |
+
fire==0.5.0
|
45 |
+
frozenlist==1.4.0
|
46 |
+
fsspec==2022.5.0
|
47 |
+
h11==0.14.0
|
48 |
+
httpcore==1.0.2
|
49 |
+
httpx==0.25.2
|
50 |
+
huggingface-hub==0.17.3
|
51 |
+
idna==3.4
|
52 |
+
importlib-metadata==6.8.0
|
53 |
+
ipykernel==6.25.2
|
54 |
+
ipython==8.16.1
|
55 |
+
jedi==0.19.1
|
56 |
+
jellyfish==1.0.1
|
57 |
+
Jinja2==3.1.2
|
58 |
+
jmespath==1.0.1
|
59 |
+
joblib==1.3.2
|
60 |
+
json5==0.9.14
|
61 |
+
jsonschema==4.19.1
|
62 |
+
jsonschema-specifications==2023.7.1
|
63 |
+
jupyter-events==0.7.0
|
64 |
+
jupyter-lsp==2.2.0
|
65 |
+
jupyter_client==8.4.0
|
66 |
+
jupyter_core==5.4.0
|
67 |
+
jupyter_server==2.7.3
|
68 |
+
jupyter_server_terminals==0.4.4
|
69 |
+
jupyterlab==4.0.7
|
70 |
+
jupyterlab-pygments==0.2.2
|
71 |
+
jupyterlab_server==2.25.0
|
72 |
+
langcodes==3.3.0
|
73 |
+
MarkupSafe==2.1.3
|
74 |
+
matplotlib-inline==0.1.6
|
75 |
+
mistune==3.0.2
|
76 |
+
mpmath==1.3.0
|
77 |
+
multidict==6.0.4
|
78 |
+
murmurhash==1.0.10
|
79 |
+
nbclient==0.8.0
|
80 |
+
nbconvert==7.9.2
|
81 |
+
nbformat==5.9.2
|
82 |
+
nervaluate==0.1.8
|
83 |
+
nest-asyncio==1.5.8
|
84 |
+
networkx==3.1
|
85 |
+
nltk==3.8.1
|
86 |
+
notebook==7.0.4
|
87 |
+
notebook_shim==0.2.3
|
88 |
+
numpy==1.24.4
|
89 |
+
ojd-daps-skills==1.0.2
|
90 |
+
openai==0.28.0
|
91 |
+
overrides==7.4.0
|
92 |
+
packaging==23.2
|
93 |
+
pandas==1.3.5
|
94 |
+
pandocfilters==1.5.0
|
95 |
+
parso==0.8.3
|
96 |
+
pathy==0.10.2
|
97 |
+
pickleshare==0.7.5
|
98 |
+
Pillow==10.1.0
|
99 |
+
platformdirs==3.11.0
|
100 |
+
preshed==3.0.9
|
101 |
+
prometheus-client==0.17.1
|
102 |
+
prompt-toolkit==3.0.39
|
103 |
+
psutil==5.9.5
|
104 |
+
psycopg2-binary==2.9.9
|
105 |
+
pure-eval==0.2.2
|
106 |
+
pycparser==2.21
|
107 |
+
pydantic==1.9.2
|
108 |
+
pydantic_core==2.10.1
|
109 |
+
Pygments==2.16.1
|
110 |
+
PyPDF2==3.0.1
|
111 |
+
python-dateutil==2.8.2
|
112 |
+
python-json-logger==2.0.7
|
113 |
+
pytz==2023.3.post1
|
114 |
+
pywin32==306
|
115 |
+
pywinpty==2.0.12
|
116 |
+
PyYAML==6.0.1
|
117 |
+
pyzmq==25.1.1
|
118 |
+
referencing==0.30.2
|
119 |
+
regex==2023.10.3
|
120 |
+
requests==2.31.0
|
121 |
+
rfc3339-validator==0.1.4
|
122 |
+
rfc3986-validator==0.1.1
|
123 |
+
rpds-py==0.10.6
|
124 |
+
s3fs==2022.5.0
|
125 |
+
s3transfer==0.5.2
|
126 |
+
safetensors==0.4.0
|
127 |
+
scikit-learn==1.3.1
|
128 |
+
scipy==1.10.1
|
129 |
+
Send2Trash==1.8.2
|
130 |
+
sentence-transformers==2.2.2
|
131 |
+
sentencepiece==0.1.99
|
132 |
+
sh==1.14.2
|
133 |
+
six==1.16.0
|
134 |
+
skillNer==1.0.3
|
135 |
+
smart-open==6.4.0
|
136 |
+
sniffio==1.3.0
|
137 |
+
soupsieve==2.5
|
138 |
+
spacy==3.4.0
|
139 |
+
spacy-legacy==3.0.12
|
140 |
+
spacy-loggers==1.0.5
|
141 |
+
srsly==2.4.8
|
142 |
+
stack-data==0.6.3
|
143 |
+
sympy==1.12
|
144 |
+
termcolor==2.4.0
|
145 |
+
terminado==0.17.1
|
146 |
+
thinc==8.1.12
|
147 |
+
threadpoolctl==3.2.0
|
148 |
+
tinycss2==1.2.1
|
149 |
+
tokenizers==0.13.3
|
150 |
+
tomli==2.0.1
|
151 |
+
toolz==0.12.0
|
152 |
+
torch==2.1.0
|
153 |
+
torchvision==0.16.0
|
154 |
+
tornado==6.3.3
|
155 |
+
tqdm==4.64.0
|
156 |
+
traitlets==5.11.2
|
157 |
+
transformers==4.33.3
|
158 |
+
typer==0.4.1
|
159 |
+
typing_extensions==4.5.0
|
160 |
+
tzdata==2023.3
|
161 |
+
urllib3==1.26.18
|
162 |
+
values==2020.12.3
|
163 |
+
virtualenv==20.24.5
|
164 |
+
virtualenvwrapper-win==1.2.7
|
165 |
+
wasabi==0.10.1
|
166 |
+
wcwidth==0.2.8
|
167 |
+
weasel==0.3.2
|
168 |
+
webencodings==0.5.1
|
169 |
+
websocket-client==1.6.4
|
170 |
+
wrapt==1.15.0
|
171 |
+
yarl==1.9.2
|
172 |
+
zipp==3.17.0
|