jeanpoll commited on
Commit
61e5adb
Β·
1 Parent(s): c564c57

update with new model

Browse files
email_parser/_models_signatures.py CHANGED
@@ -81,15 +81,17 @@ def f_retrieve_entities_for_line(df_ner, start=0, end=1e12):
81
  df = df_ner.query(f"""(start>= {start} and end <= {end}) or (start<={start} and end>={end})""")
82
  return df
83
 
 
84
  embedder_model = SentenceTransformer("distiluse-base-multilingual-cased-v1")
85
 
 
86
  def f_create_embedding_inv_dist_feature(text1, text2):
87
- """ Computing distance between two texts based on their embedding
88
- provided by the SentenceTransformer above"""
89
- embedding_merci = embedder_model.encode(text1)
90
- embedding_line = embedder_model.encode(text2)
91
- dist = distance.cosine(embedding_merci, embedding_line)
92
- return 1 / (dist + 0.01)
93
 
94
 
95
  def f_create_email_lines_features(text, df_ner=None, position_offset=0):
 
81
  df = df_ner.query(f"""(start>= {start} and end <= {end}) or (start<={start} and end>={end})""")
82
  return df
83
 
84
+
85
  embedder_model = SentenceTransformer("distiluse-base-multilingual-cased-v1")
86
 
87
+
88
  def f_create_embedding_inv_dist_feature(text1, text2):
89
+ """ Computing distance between two texts based on their embedding
90
+ provided by the SentenceTransformer above"""
91
+ embedding_merci = embedder_model.encode(text1)
92
+ embedding_line = embedder_model.encode(text2)
93
+ dist = distance.cosine(embedding_merci, embedding_line)
94
+ return min(5, 1 / (dist + 0.0001))
95
 
96
 
97
  def f_create_email_lines_features(text, df_ner=None, position_offset=0):
email_parser/config.ini CHANGED
@@ -3,5 +3,5 @@ ner_model_fr = Jean-Baptiste/camembert-ner-with-dates
3
  ner_model_en = Jean-Baptiste/roberta-large-ner-english
4
  device = -1
5
  default_lang = en
6
- name_model_signature = model_signature_lstm_v10
7
  path_models = models
 
3
  ner_model_en = Jean-Baptiste/roberta-large-ner-english
4
  device = -1
5
  default_lang = en
6
+ name_model_signature = model_signature_lstm_v16
7
  path_models = models
email_parser/models/model_signature_lstm_v10/variables/variables.data-00000-of-00001 DELETED
Binary file (116 kB)
 
email_parser/models/model_signature_lstm_v10/variables/variables.index DELETED
Binary file (3.48 kB)
 
email_parser/models/{model_signature_lstm_v10 β†’ model_signature_lstm_v16}/keras_metadata.pb RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1df1ebcda9b9f2ca0855f67117d5c8b7db0d89c46c346273a536f2eec13c5665
3
- size 22060
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a9ab3d0262141747b5aafa348c57e1a5813e03cf1e5c291162b37cdf846143
3
+ size 22142
email_parser/models/{model_signature_lstm_v10 β†’ model_signature_lstm_v16}/minmax_scaler.p RENAMED
File without changes
email_parser/models/{model_signature_lstm_v10 β†’ model_signature_lstm_v16}/saved_model.pb RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a28bac82659a6bc1cf949dc04d01a09db681cab64c9388ff1267d53fa3d11fb2
3
- size 5272723
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70ff3069a27b192313bbdd9685f106fc46ef2082eab6479697f575bad555eb4f
3
+ size 5296390
email_parser/models/{model_signature_lstm_v10 β†’ model_signature_lstm_v16}/standard_scaler.p RENAMED
File without changes
email_parser/models/model_signature_lstm_v16/variables/variables.data-00000-of-00001 ADDED
Binary file (117 kB). View file
 
email_parser/models/model_signature_lstm_v16/variables/variables.index ADDED
Binary file (3.51 kB). View file