minskiter commited on
Commit
f0b9b57
1 Parent(s): 1e4d53d

feat(docker): update docker config

Browse files
README.md CHANGED
@@ -11,3 +11,28 @@ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ ### 环境
16
+
17
+ 1. 最新的Docker Engine - Community(Linux版本)
18
+ version: 20.0.4
19
+
20
+ ### 如何部署
21
+
22
+ 0. 登陆Huggingface申请模型下载权限
23
+ - minskiter/resume-token-classification
24
+ https://huggingface.co/minskiter/resume-token-classification
25
+
26
+ - minskiter/resume-token-classification-name-0708
27
+ https://huggingface.co/minskiter/resume-token-classification-name-0708
28
+
29
+ 1. 首先设置环境变量
30
+ ```bash
31
+ export HF_Token={Your Huggingface Token}
32
+ ```
33
+
34
+ 2. 执行部署
35
+ ```
36
+ sudo docker compose up -d --build
37
+ ```
38
+ 新版使用`docker compose`,旧版使用`docker-compose`
docker-compose.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3'
2
+
3
+ services:
4
+ ner:
5
+ build: .
6
+ restart: always
7
+ ports:
8
+ - "50050:50051"
9
+ environment:
10
+ - HF_Token=${HF_Token}
11
+
dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8
2
+
3
+ # Path: /app
4
+ WORKDIR /app
5
+
6
+ # CACHE
7
+ COPY requirements.txt ./
8
+ RUN pip install -r requirements.txt
9
+
10
+ # CACHE GRPC
11
+ COPY requirements.grpc.txt ./
12
+ RUN pip install -r requirements.grpc.txt
13
+
14
+ COPY . ./
15
+ RUN ./grpc.sh
16
+
17
+ EXPOSE 50051
18
+
19
+ ENTRYPOINT [ "python", "./server.py" ]
grpc.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m grpc_tools.protoc -I./ --python_out=./ --pyi_out=./ --grpc_python_out=./ ./protos/resume.proto
predictor/__init__.py CHANGED
@@ -7,6 +7,8 @@ import pandas as pd
7
  import math
8
  import queue
9
  from datetime import date
 
 
10
 
11
  class Predictor():
12
 
@@ -22,6 +24,7 @@ class Predictor():
22
  raise ValueError("'common' pipeline is None")
23
  self.pipelines = pipelines
24
  self.today = today
 
25
  self.__init_split_data()
26
  self.__init_schools_data(paths)
27
  self.__init_patterns()
@@ -111,7 +114,7 @@ class Predictor():
111
  raise
112
  if datestr=="至今":
113
  return self.today
114
- return self.today
115
 
116
  def split_to_blocks(
117
  self,
@@ -158,7 +161,7 @@ class Predictor():
158
  text: str,
159
  start: int,
160
  end: int,
161
- max_expand_length=20,
162
  ):
163
  expand_l,expand_r = start,end
164
  for l in range(max(start-max_expand_length,0), start):
@@ -200,15 +203,15 @@ class Predictor():
200
  }
201
  # 获取名字,先过滤所有空白字符,防止名字中间有空格
202
  remove_blanks_text, index_mapper = self.remove_blanks(text, re.compile(r' '))
203
- for name_match in self.name_pattern.finditer(remove_blanks_text):
204
- start,end = name_match.span()
205
- expand_text, start, end = self.get_expand_span(remove_blanks_text, start, end)
206
- entities = self.pipelines['name'](expand_text)
207
  for entity in entities:
208
  if entity['entity']=='NAME' and self.name_pattern.match(entity['word']) is not None:
209
  obj = {
210
- 'start': index_mapper[start+entity['start']],
211
- 'end': index_mapper[start+entity['end']-1]+1,
212
  'entity': 'NAME',
213
  'text': entity['word']
214
  }
@@ -220,7 +223,10 @@ class Predictor():
220
  if not repeat:
221
  obj['origin'] = text[obj['start']:obj['end']]
222
  return_obj['name'].append(obj)
 
 
223
  # 获取年龄
 
224
  for age_match in self.age_patterns[0].finditer(remove_blanks_text):
225
  age = None
226
  s,e = -1,-1
@@ -266,6 +272,9 @@ class Predictor():
266
  'entity': 'AGE',
267
  'origin': text[index_mapper[s]:index_mapper[e-1]+1]
268
  })
 
 
 
269
  # 获取学校
270
  for school_match in self.school_pattern.finditer(remove_blanks_text):
271
  start,end = school_match.span()
@@ -309,6 +318,9 @@ class Predictor():
309
  obj['origin'] = text[obj['start']:obj['end']]
310
  obj['level'] = self.schools[obj['text']]
311
  return_obj['schools'].append(obj)
 
 
 
312
  # 获取学历
313
  for i,pattern in enumerate(self.edu_patterns):
314
  for edu_match in pattern.finditer(remove_blanks_text):
@@ -331,16 +343,23 @@ class Predictor():
331
  if not repeat:
332
  obj['origin'] = text[obj['start']:obj['end']]
333
  return_obj['edus'].append(obj)
 
 
 
334
  # 如果有工作经历
335
  if self.works_key_pattern.search(remove_blanks_text) is not None:
336
  for job_time_match in self.job_time_patterns.finditer(remove_blanks_text):
337
  origin_start,origin_end = job_time_match.span()
338
  # convert_to_date
339
  fr = self.to_date(job_time_match.group(1))
 
 
340
  fs,fe = job_time_match.span(1)
341
  to = self.to_date(job_time_match.group(3))
 
 
342
  ts,te = job_time_match.span(3)
343
- expand_text, start, end = self.get_expand_span(remove_blanks_text, origin_start, origin_end, max_expand_length=100)
344
  entities = self.pipelines['common'](expand_text)
345
  objs = []
346
  for entity in entities:
@@ -390,6 +409,9 @@ class Predictor():
390
  work_month += diff_y * 12 + diff_m
391
  last_end = end
392
  return_obj['work_time'] = math.ceil(work_month/12)
 
 
 
393
  # 获取手机号码
394
  for phone_match in self.phone_pattern.finditer(text):
395
  start,end = phone_match.span()
@@ -400,6 +422,9 @@ class Predictor():
400
  'origin': text[start:end],
401
  'text': re.sub('\s','',text[start:end])
402
  })
 
 
 
403
  for email_match in self.email_pattern.finditer(text):
404
  start,end = email_match.span()
405
  return_obj['email'].append({
@@ -409,6 +434,9 @@ class Predictor():
409
  'origin': text[start:end],
410
  'text': re.sub('\s','',text[start:end])
411
  })
 
 
 
412
  for gender_match in self.gender_pattern.finditer(text):
413
  start,end = gender_match.span(2)
414
  return_obj['gender'].append({
@@ -418,6 +446,9 @@ class Predictor():
418
  'word': text[start:end],
419
  'text': text[start:end]
420
  })
 
 
 
421
  for block in self.split_to_blocks(remove_blanks_text):
422
  entities = self.pipelines["common"](block["text"])
423
  for entity in entities:
@@ -436,6 +467,8 @@ class Predictor():
436
  break
437
  if not repeat:
438
  return_obj['titles'].append(obj)
 
 
439
  return return_obj
440
 
441
  def __call__(self, *args: Any, **kwds: Any) -> Any:
 
7
  import math
8
  import queue
9
  from datetime import date
10
+ import time
11
+ import logging
12
 
13
  class Predictor():
14
 
 
24
  raise ValueError("'common' pipeline is None")
25
  self.pipelines = pipelines
26
  self.today = today
27
+ self.logger = logging.getLogger(__name__)
28
  self.__init_split_data()
29
  self.__init_schools_data(paths)
30
  self.__init_patterns()
 
114
  raise
115
  if datestr=="至今":
116
  return self.today
117
+ return None
118
 
119
  def split_to_blocks(
120
  self,
 
161
  text: str,
162
  start: int,
163
  end: int,
164
+ max_expand_length=10,
165
  ):
166
  expand_l,expand_r = start,end
167
  for l in range(max(start-max_expand_length,0), start):
 
203
  }
204
  # 获取名字,先过滤所有空白字符,防止名字中间有空格
205
  remove_blanks_text, index_mapper = self.remove_blanks(text, re.compile(r' '))
206
+ start_time = time.perf_counter()
207
+ for block in self.split_to_blocks(remove_blanks_text):
208
+ block_text,block_l = block['text'],block['start']
209
+ entities = self.pipelines['name'](block_text)
210
  for entity in entities:
211
  if entity['entity']=='NAME' and self.name_pattern.match(entity['word']) is not None:
212
  obj = {
213
+ 'start': index_mapper[block_l+entity['start']],
214
+ 'end': index_mapper[block_l+entity['end']-1]+1,
215
  'entity': 'NAME',
216
  'text': entity['word']
217
  }
 
223
  if not repeat:
224
  obj['origin'] = text[obj['start']:obj['end']]
225
  return_obj['name'].append(obj)
226
+ end_time = time.perf_counter()
227
+ self.logger.info(f"process name time: {end_time-start_time}")
228
  # 获取年龄
229
+ start_time = time.perf_counter()
230
  for age_match in self.age_patterns[0].finditer(remove_blanks_text):
231
  age = None
232
  s,e = -1,-1
 
272
  'entity': 'AGE',
273
  'origin': text[index_mapper[s]:index_mapper[e-1]+1]
274
  })
275
+ end_time = time.perf_counter()
276
+ self.logger.info(f"process age time: {end_time-start_time}")
277
+ start_time = time.perf_counter()
278
  # 获取学校
279
  for school_match in self.school_pattern.finditer(remove_blanks_text):
280
  start,end = school_match.span()
 
318
  obj['origin'] = text[obj['start']:obj['end']]
319
  obj['level'] = self.schools[obj['text']]
320
  return_obj['schools'].append(obj)
321
+ end_time = time.perf_counter()
322
+ self.logger.info(f"process school time: {end_time-start_time}")
323
+ start_time = time.perf_counter()
324
  # 获取学历
325
  for i,pattern in enumerate(self.edu_patterns):
326
  for edu_match in pattern.finditer(remove_blanks_text):
 
343
  if not repeat:
344
  obj['origin'] = text[obj['start']:obj['end']]
345
  return_obj['edus'].append(obj)
346
+ end_time = time.perf_counter()
347
+ self.logger.info(f"process edu time: {end_time-start_time}")
348
+ start_time = time.perf_counter()
349
  # 如果有工作经历
350
  if self.works_key_pattern.search(remove_blanks_text) is not None:
351
  for job_time_match in self.job_time_patterns.finditer(remove_blanks_text):
352
  origin_start,origin_end = job_time_match.span()
353
  # convert_to_date
354
  fr = self.to_date(job_time_match.group(1))
355
+ if fr is None:
356
+ continue
357
  fs,fe = job_time_match.span(1)
358
  to = self.to_date(job_time_match.group(3))
359
+ if to is None:
360
+ continue
361
  ts,te = job_time_match.span(3)
362
+ expand_text, start, end = self.get_expand_span(remove_blanks_text, origin_start, origin_end, max_expand_length=50)
363
  entities = self.pipelines['common'](expand_text)
364
  objs = []
365
  for entity in entities:
 
409
  work_month += diff_y * 12 + diff_m
410
  last_end = end
411
  return_obj['work_time'] = math.ceil(work_month/12)
412
+ end_time = time.perf_counter()
413
+ self.logger.info(f"process work time: {end_time-start_time}")
414
+ start_time = time.perf_counter()
415
  # 获取手机号码
416
  for phone_match in self.phone_pattern.finditer(text):
417
  start,end = phone_match.span()
 
422
  'origin': text[start:end],
423
  'text': re.sub('\s','',text[start:end])
424
  })
425
+ end_time = time.perf_counter()
426
+ self.logger.info(f"process phone time: {end_time-start_time}")
427
+ start_time = time.perf_counter()
428
  for email_match in self.email_pattern.finditer(text):
429
  start,end = email_match.span()
430
  return_obj['email'].append({
 
434
  'origin': text[start:end],
435
  'text': re.sub('\s','',text[start:end])
436
  })
437
+ end_time = time.perf_counter()
438
+ self.logger.info(f"process email time: {end_time-start_time}")
439
+ start_time = time.perf_counter()
440
  for gender_match in self.gender_pattern.finditer(text):
441
  start,end = gender_match.span(2)
442
  return_obj['gender'].append({
 
446
  'word': text[start:end],
447
  'text': text[start:end]
448
  })
449
+ end_time = time.perf_counter()
450
+ self.logger.info(f"process gender time: {end_time-start_time}")
451
+ start_time = time.perf_counter()
452
  for block in self.split_to_blocks(remove_blanks_text):
453
  entities = self.pipelines["common"](block["text"])
454
  for entity in entities:
 
467
  break
468
  if not repeat:
469
  return_obj['titles'].append(obj)
470
+ end_time = time.perf_counter()
471
+ self.logger.info(f"process title time: {end_time-start_time}")
472
  return return_obj
473
 
474
  def __call__(self, *args: Any, **kwds: Any) -> Any:
protos/resume.proto ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ syntax = "proto3";
2
+
3
+ service Resume{
4
+ // Get Resume Info
5
+ rpc GetInfo(TextRequest) returns (ResumeEntitiesResponse);
6
+ rpc Health(PingRequest) returns (PongResponse);
7
+ }
8
+
9
+ message PingRequest{
10
+ string ping = 1;
11
+ }
12
+
13
+ message PongResponse{
14
+ string done = 1;
15
+ }
16
+
17
+ message TextRequest{
18
+ string text = 1;
19
+ }
20
+
21
+ message Entity{
22
+ string entity = 1;
23
+ int32 start = 2;
24
+ int32 end = 3;
25
+ optional string text = 4;
26
+ string origin = 5;
27
+ optional string level = 6;
28
+ }
29
+
30
+ message JobEntity {
31
+ Entity start = 1;
32
+ Entity end = 2;
33
+ Entity company = 3;
34
+ }
35
+
36
+ message ResumeEntitiesResponse{
37
+ repeated Entity names = 1;
38
+ repeated Entity emails = 2;
39
+ repeated Entity phones = 3;
40
+ repeated Entity schools = 4;
41
+ repeated Entity edus = 5;
42
+ repeated Entity ages = 6;
43
+ repeated Entity genders = 7;
44
+ int32 work_years = 8;
45
+ repeated JobEntity jobs = 9;
46
+ repeated Entity titles = 10;
47
+ }
48
+
protos/resume_pb2.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: protos/resume.proto
4
+ """Generated protocol buffer code."""
5
+ from google.protobuf.internal import builder as _builder
6
+ from google.protobuf import descriptor as _descriptor
7
+ from google.protobuf import descriptor_pool as _descriptor_pool
8
+ from google.protobuf import symbol_database as _symbol_database
9
+ # @@protoc_insertion_point(imports)
10
+
11
+ _sym_db = _symbol_database.Default()
12
+
13
+
14
+
15
+
16
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13protos/resume.proto\"\x1b\n\x0bPingRequest\x12\x0c\n\x04ping\x18\x01 \x01(\t\"\x1c\n\x0cPongResponse\x12\x0c\n\x04\x64one\x18\x01 \x01(\t\"\x1b\n\x0bTextRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\"~\n\x06\x45ntity\x12\x0e\n\x06\x65ntity\x18\x01 \x01(\t\x12\r\n\x05start\x18\x02 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x05\x12\x11\n\x04text\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x0e\n\x06origin\x18\x05 \x01(\t\x12\x12\n\x05level\x18\x06 \x01(\tH\x01\x88\x01\x01\x42\x07\n\x05_textB\x08\n\x06_level\"S\n\tJobEntity\x12\x16\n\x05start\x18\x01 \x01(\x0b\x32\x07.Entity\x12\x14\n\x03\x65nd\x18\x02 \x01(\x0b\x32\x07.Entity\x12\x18\n\x07\x63ompany\x18\x03 \x01(\x0b\x32\x07.Entity\"\x8b\x02\n\x16ResumeEntitiesResponse\x12\x16\n\x05names\x18\x01 \x03(\x0b\x32\x07.Entity\x12\x17\n\x06\x65mails\x18\x02 \x03(\x0b\x32\x07.Entity\x12\x17\n\x06phones\x18\x03 \x03(\x0b\x32\x07.Entity\x12\x18\n\x07schools\x18\x04 \x03(\x0b\x32\x07.Entity\x12\x15\n\x04\x65\x64us\x18\x05 \x03(\x0b\x32\x07.Entity\x12\x15\n\x04\x61ges\x18\x06 \x03(\x0b\x32\x07.Entity\x12\x18\n\x07genders\x18\x07 \x03(\x0b\x32\x07.Entity\x12\x12\n\nwork_years\x18\x08 \x01(\x05\x12\x18\n\x04jobs\x18\t \x03(\x0b\x32\n.JobEntity\x12\x17\n\x06titles\x18\n \x03(\x0b\x32\x07.Entity2a\n\x06Resume\x12\x30\n\x07GetInfo\x12\x0c.TextRequest\x1a\x17.ResumeEntitiesResponse\x12%\n\x06Health\x12\x0c.PingRequest\x1a\r.PongResponseb\x06proto3')
17
+
18
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
19
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'protos.resume_pb2', globals())
20
+ if _descriptor._USE_C_DESCRIPTORS == False:
21
+
22
+ DESCRIPTOR._options = None
23
+ _PINGREQUEST._serialized_start=23
24
+ _PINGREQUEST._serialized_end=50
25
+ _PONGRESPONSE._serialized_start=52
26
+ _PONGRESPONSE._serialized_end=80
27
+ _TEXTREQUEST._serialized_start=82
28
+ _TEXTREQUEST._serialized_end=109
29
+ _ENTITY._serialized_start=111
30
+ _ENTITY._serialized_end=237
31
+ _JOBENTITY._serialized_start=239
32
+ _JOBENTITY._serialized_end=322
33
+ _RESUMEENTITIESRESPONSE._serialized_start=325
34
+ _RESUMEENTITIESRESPONSE._serialized_end=592
35
+ _RESUME._serialized_start=594
36
+ _RESUME._serialized_end=691
37
+ # @@protoc_insertion_point(module_scope)
protos/resume_pb2.pyi ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.protobuf.internal import containers as _containers
2
+ from google.protobuf import descriptor as _descriptor
3
+ from google.protobuf import message as _message
4
+ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
5
+
6
+ DESCRIPTOR: _descriptor.FileDescriptor
7
+
8
+ class Entity(_message.Message):
9
+ __slots__ = ["end", "entity", "level", "origin", "start", "text"]
10
+ END_FIELD_NUMBER: _ClassVar[int]
11
+ ENTITY_FIELD_NUMBER: _ClassVar[int]
12
+ LEVEL_FIELD_NUMBER: _ClassVar[int]
13
+ ORIGIN_FIELD_NUMBER: _ClassVar[int]
14
+ START_FIELD_NUMBER: _ClassVar[int]
15
+ TEXT_FIELD_NUMBER: _ClassVar[int]
16
+ end: int
17
+ entity: str
18
+ level: str
19
+ origin: str
20
+ start: int
21
+ text: str
22
+ def __init__(self, entity: _Optional[str] = ..., start: _Optional[int] = ..., end: _Optional[int] = ..., text: _Optional[str] = ..., origin: _Optional[str] = ..., level: _Optional[str] = ...) -> None: ...
23
+
24
+ class JobEntity(_message.Message):
25
+ __slots__ = ["company", "end", "start"]
26
+ COMPANY_FIELD_NUMBER: _ClassVar[int]
27
+ END_FIELD_NUMBER: _ClassVar[int]
28
+ START_FIELD_NUMBER: _ClassVar[int]
29
+ company: Entity
30
+ end: Entity
31
+ start: Entity
32
+ def __init__(self, start: _Optional[_Union[Entity, _Mapping]] = ..., end: _Optional[_Union[Entity, _Mapping]] = ..., company: _Optional[_Union[Entity, _Mapping]] = ...) -> None: ...
33
+
34
+ class PingRequest(_message.Message):
35
+ __slots__ = ["ping"]
36
+ PING_FIELD_NUMBER: _ClassVar[int]
37
+ ping: str
38
+ def __init__(self, ping: _Optional[str] = ...) -> None: ...
39
+
40
+ class PongResponse(_message.Message):
41
+ __slots__ = ["done"]
42
+ DONE_FIELD_NUMBER: _ClassVar[int]
43
+ done: str
44
+ def __init__(self, done: _Optional[str] = ...) -> None: ...
45
+
46
+ class ResumeEntitiesResponse(_message.Message):
47
+ __slots__ = ["ages", "edus", "emails", "genders", "jobs", "names", "phones", "schools", "titles", "work_years"]
48
+ AGES_FIELD_NUMBER: _ClassVar[int]
49
+ EDUS_FIELD_NUMBER: _ClassVar[int]
50
+ EMAILS_FIELD_NUMBER: _ClassVar[int]
51
+ GENDERS_FIELD_NUMBER: _ClassVar[int]
52
+ JOBS_FIELD_NUMBER: _ClassVar[int]
53
+ NAMES_FIELD_NUMBER: _ClassVar[int]
54
+ PHONES_FIELD_NUMBER: _ClassVar[int]
55
+ SCHOOLS_FIELD_NUMBER: _ClassVar[int]
56
+ TITLES_FIELD_NUMBER: _ClassVar[int]
57
+ WORK_YEARS_FIELD_NUMBER: _ClassVar[int]
58
+ ages: _containers.RepeatedCompositeFieldContainer[Entity]
59
+ edus: _containers.RepeatedCompositeFieldContainer[Entity]
60
+ emails: _containers.RepeatedCompositeFieldContainer[Entity]
61
+ genders: _containers.RepeatedCompositeFieldContainer[Entity]
62
+ jobs: _containers.RepeatedCompositeFieldContainer[JobEntity]
63
+ names: _containers.RepeatedCompositeFieldContainer[Entity]
64
+ phones: _containers.RepeatedCompositeFieldContainer[Entity]
65
+ schools: _containers.RepeatedCompositeFieldContainer[Entity]
66
+ titles: _containers.RepeatedCompositeFieldContainer[Entity]
67
+ work_years: int
68
+ def __init__(self, names: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ..., emails: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ..., phones: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ..., schools: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ..., edus: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ..., ages: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ..., genders: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ..., work_years: _Optional[int] = ..., jobs: _Optional[_Iterable[_Union[JobEntity, _Mapping]]] = ..., titles: _Optional[_Iterable[_Union[Entity, _Mapping]]] = ...) -> None: ...
69
+
70
+ class TextRequest(_message.Message):
71
+ __slots__ = ["text"]
72
+ TEXT_FIELD_NUMBER: _ClassVar[int]
73
+ text: str
74
+ def __init__(self, text: _Optional[str] = ...) -> None: ...
protos/resume_pb2_grpc.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2
+ """Client and server classes corresponding to protobuf-defined services."""
3
+ import grpc
4
+
5
+ from protos import resume_pb2 as protos_dot_resume__pb2
6
+
7
+
8
+ class ResumeStub(object):
9
+ """Missing associated documentation comment in .proto file."""
10
+
11
+ def __init__(self, channel):
12
+ """Constructor.
13
+
14
+ Args:
15
+ channel: A grpc.Channel.
16
+ """
17
+ self.GetInfo = channel.unary_unary(
18
+ '/Resume/GetInfo',
19
+ request_serializer=protos_dot_resume__pb2.TextRequest.SerializeToString,
20
+ response_deserializer=protos_dot_resume__pb2.ResumeEntitiesResponse.FromString,
21
+ )
22
+ self.Health = channel.unary_unary(
23
+ '/Resume/Health',
24
+ request_serializer=protos_dot_resume__pb2.PingRequest.SerializeToString,
25
+ response_deserializer=protos_dot_resume__pb2.PongResponse.FromString,
26
+ )
27
+
28
+
29
+ class ResumeServicer(object):
30
+ """Missing associated documentation comment in .proto file."""
31
+
32
+ def GetInfo(self, request, context):
33
+ """Get Resume Info
34
+ """
35
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
36
+ context.set_details('Method not implemented!')
37
+ raise NotImplementedError('Method not implemented!')
38
+
39
+ def Health(self, request, context):
40
+ """Missing associated documentation comment in .proto file."""
41
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
42
+ context.set_details('Method not implemented!')
43
+ raise NotImplementedError('Method not implemented!')
44
+
45
+
46
+ def add_ResumeServicer_to_server(servicer, server):
47
+ rpc_method_handlers = {
48
+ 'GetInfo': grpc.unary_unary_rpc_method_handler(
49
+ servicer.GetInfo,
50
+ request_deserializer=protos_dot_resume__pb2.TextRequest.FromString,
51
+ response_serializer=protos_dot_resume__pb2.ResumeEntitiesResponse.SerializeToString,
52
+ ),
53
+ 'Health': grpc.unary_unary_rpc_method_handler(
54
+ servicer.Health,
55
+ request_deserializer=protos_dot_resume__pb2.PingRequest.FromString,
56
+ response_serializer=protos_dot_resume__pb2.PongResponse.SerializeToString,
57
+ ),
58
+ }
59
+ generic_handler = grpc.method_handlers_generic_handler(
60
+ 'Resume', rpc_method_handlers)
61
+ server.add_generic_rpc_handlers((generic_handler,))
62
+
63
+
64
+ # This class is part of an EXPERIMENTAL API.
65
+ class Resume(object):
66
+ """Missing associated documentation comment in .proto file."""
67
+
68
+ @staticmethod
69
+ def GetInfo(request,
70
+ target,
71
+ options=(),
72
+ channel_credentials=None,
73
+ call_credentials=None,
74
+ insecure=False,
75
+ compression=None,
76
+ wait_for_ready=None,
77
+ timeout=None,
78
+ metadata=None):
79
+ return grpc.experimental.unary_unary(request, target, '/Resume/GetInfo',
80
+ protos_dot_resume__pb2.TextRequest.SerializeToString,
81
+ protos_dot_resume__pb2.ResumeEntitiesResponse.FromString,
82
+ options, channel_credentials,
83
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
84
+
85
+ @staticmethod
86
+ def Health(request,
87
+ target,
88
+ options=(),
89
+ channel_credentials=None,
90
+ call_credentials=None,
91
+ insecure=False,
92
+ compression=None,
93
+ wait_for_ready=None,
94
+ timeout=None,
95
+ metadata=None):
96
+ return grpc.experimental.unary_unary(request, target, '/Resume/Health',
97
+ protos_dot_resume__pb2.PingRequest.SerializeToString,
98
+ protos_dot_resume__pb2.PongResponse.FromString,
99
+ options, channel_credentials,
100
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
requirements.grpc.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ grpcio==1.54.2
2
+ grpcio-tools==1.54.2
server.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import grpc
3
+ from concurrent import futures
4
+ import protos.resume_pb2
5
+ import protos.resume_pb2_grpc
6
+ from huggingface_hub import login
7
+ import os
8
+ from predictor import Predictor
9
+ from transformers import pipeline
10
+ from datetime import date
11
+
12
+ HF_TOKEN = os.environ["HF_Token"]
13
+ PORT = os.environ.get("PORT", "50051")
14
+ login(HF_TOKEN)
15
+
16
+ class Resume(protos.resume_pb2_grpc.ResumeServicer):
17
+
18
+ def __init__(self):
19
+ self.done = False
20
+ self.logger = logging.getLogger(__name__)
21
+ self.predictor = Predictor(
22
+ pipelines={
23
+ "name": pipeline("nerpipe", model="minskiter/resume-token-classification-name-0708",trust_remote_code=True,use_auth_token=True),
24
+ "common": pipeline("nerpipe",model="minskiter/resume-token-classification",trust_remote_code=True,use_auth_token=True)
25
+ },
26
+ paths=[
27
+ "data/W020230619818476939351.xls",
28
+ "data/W020230619818476975218.xls"
29
+ ],
30
+ today=date(2023,4,1)
31
+ )
32
+ self.done = True
33
+
34
+ def Health(self, request, context):
35
+ self.logger.info("Health check")
36
+ if request.ping=="PING":
37
+ if self.done:
38
+ return protos.resume_pb2.PongResponse(done="OK")
39
+ else:
40
+ return protos.resume_pb2.PongResponse(done="Pending")
41
+ return protos.resume_pb2.PongResponse(done="PING request is not valid")
42
+
43
+ def GetInfo(self, request, context):
44
+ entities = self.predictor(request.text)
45
+ logging.info(entities)
46
+ res = protos.resume_pb2.ResumeEntitiesResponse()
47
+ for name in entities['name']:
48
+ res.names.append(protos.resume_pb2.Entity(
49
+ entity=name['entity'],
50
+ start=name['start'],
51
+ end=name['end'],
52
+ text=name.get('text',None),
53
+ origin=name["origin"]
54
+ ))
55
+ for age in entities['age']:
56
+ res.ages.append(protos.resume_pb2.Entity(
57
+ entity=age['entity'],
58
+ start=age['start'],
59
+ end=age['end'],
60
+ text=age.get('text',None),
61
+ origin=age["origin"]
62
+ ))
63
+ for gender in entities['gender']:
64
+ res.genders.append(protos.resume_pb2.Entity(
65
+ entity=gender['entity'],
66
+ start=gender['start'],
67
+ end=gender['end'],
68
+ text=gender.get('text',None),
69
+ origin=gender["origin"]
70
+ ))
71
+ for email in entities['email']:
72
+ res.emails.append(protos.resume_pb2.Entity(
73
+ entity=email['entity'],
74
+ start=email['start'],
75
+ end=email['end'],
76
+ text=email.get('text',None),
77
+ origin=email["origin"]
78
+ ))
79
+ for phone in entities['phone']:
80
+ res.phones.append(protos.resume_pb2.Entity(
81
+ entity=phone['entity'],
82
+ start=phone['start'],
83
+ end=phone['end'],
84
+ text=phone.get('text',None),
85
+ origin=phone["origin"]
86
+ ))
87
+ for edu in entities['edus']:
88
+ res.edus.append(protos.resume_pb2.Entity(
89
+ entity=edu['entity'],
90
+ start=edu['start'],
91
+ end=edu['end'],
92
+ text=edu.get('text',None),
93
+ origin=edu["origin"]
94
+ ))
95
+ for school in entities['schools']:
96
+ res.schools.append(protos.resume_pb2.Entity(
97
+ entity=school['entity'],
98
+ start=school['start'],
99
+ end=school['end'],
100
+ text=school.get('text',None),
101
+ origin=school["origin"],
102
+ level=school.get('level',None)
103
+ ))
104
+ for company,start,end in entities['jobs']:
105
+ jobEntity = protos.resume_pb2.JobEntity(
106
+ start=protos.resume_pb2.Entity(
107
+ entity=start['entity'],
108
+ start=start['start'],
109
+ end=start['end'],
110
+ text=start.get('text',None),
111
+ origin=start["origin"]
112
+ ),
113
+ end=protos.resume_pb2.Entity(
114
+ entity=end['entity'],
115
+ start=end['start'],
116
+ end=end['end'],
117
+ text=end.get('text',None),
118
+ origin=end["origin"]
119
+ ),
120
+ company=protos.resume_pb2.Entity(
121
+ entity=company['entity'],
122
+ start=company['start'],
123
+ end=company['end'],
124
+ text=company.get('text',None),
125
+ origin=company["origin"]
126
+ )
127
+ )
128
+ res.jobs.append(jobEntity)
129
+ for title in entities['titles']:
130
+ res.titles.append(protos.resume_pb2.Entity(
131
+ entity=title['entity'],
132
+ start=title['start'],
133
+ end=title['end'],
134
+ text=title.get('text',None),
135
+ origin=title["origin"]
136
+ ))
137
+ res.work_years = entities['work_time']
138
+ return res
139
+
140
+
141
+ def serve(port = "50051"):
142
+ logger = logging.getLogger(__name__)
143
+ server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
144
+ protos.resume_pb2_grpc.add_ResumeServicer_to_server(Resume(), server)
145
+ server.add_insecure_port('[::]:' + port)
146
+ logger.info("Starting server on port %s", port)
147
+ server.start()
148
+ logger.info("Running..")
149
+ server.wait_for_termination()
150
+
151
+ if __name__ == '__main__':
152
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
153
+ serve(PORT)