Spaces:

chidamnat2002
/

intent_classifier

Running

App Files Files Community

Chidam Gopal commited on Oct 21, 2024

Commit

624162d

unverified ·

1 Parent(s): 788c760

included state and city in NER

Browse files

Files changed (1) hide show

infer_location.py +5 -15

infer_location.py CHANGED Viewed

@@ -22,20 +22,6 @@ class LocationFinder:
         # Load the ONNX model
         self.ort_session = ort.InferenceSession(model_path)
-        # State abbreviations list for post-processing
-        self.state_abbr = {
-            "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY",
-            "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND",
-            "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"
-        }
-    # # Helper function to correct misclassified state abbreviations
-    # def correct_state_abbreviation(self, token, predicted_label):
-    #     if token.upper() in self.state_abbr and predicted_label == "I-CITY":
-    #         return "I-STATE"
-    #     return predicted_label
     def find_location(self, sequence, verbose=False):
         inputs = self.tokenizer(sequence,
                                 return_tensors="np",  # ONNX requires inputs in NumPy format
@@ -80,6 +66,11 @@ class LocationFinder:
         state_entities = []
         org_entities = []
         city_state_entities = []
         for i, (token, predicted_id, prob) in enumerate(zip(tokens, predicted_ids[0], predicted_probs[0])):
             if prob > threshold:
                 if token in ["[CLS]", "[SEP]", "[PAD]"]:
@@ -115,7 +106,6 @@ class LocationFinder:
         return {
             'city': city_res,
             'state': state_res,
-            'organization': org_res,
         }
 if __name__ == '__main__':

         # Load the ONNX model
         self.ort_session = ort.InferenceSession(model_path)
     def find_location(self, sequence, verbose=False):
         inputs = self.tokenizer(sequence,
                                 return_tensors="np",  # ONNX requires inputs in NumPy format
         state_entities = []
         org_entities = []
         city_state_entities = []
+        city_entities = []
+        state_entities = []
+        city_state_entities = []
+        org_entities = []
         for i, (token, predicted_id, prob) in enumerate(zip(tokens, predicted_ids[0], predicted_probs[0])):
             if prob > threshold:
                 if token in ["[CLS]", "[SEP]", "[PAD]"]:
         return {
             'city': city_res,
             'state': state_res,
         }
 if __name__ == '__main__':