Spaces:

ligolab
/

demo_icd10

Build error

App Files Files Community

lyangas commited on May 24, 2023

Commit

1bb21bd

1 Parent(s): 8e8bb53

first demo code added

Browse files

Files changed (4) hide show

app.py +23 -0
pretrain_logistic_regression_model.pkl +3 -0
required_classes.py +74 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+print('INFO: import modules')
+import gradio as gr
+import pickle
+from required_classes import *
+print('INFO: loading model')
+try:
+    with open('pretrain_logistic_regression_model.pkl', 'rb') as f:
+        model = pickle.load(f)
+    model.batch_size = 1
+    print('INFO: model loaded')
+except Exception as e:
+    print(f"ERROR: loading model failed with: {str(e)}")
+def classify(text):
+    pred_classes = model.predict([text])
+    output_text = ' '.join(pred_classes)
+    return output_text
+print('INFO: starting gradio interface')
+iface = gr.Interface(fn=classify, inputs="text", outputs="text")
+iface.launch()

pretrain_logistic_regression_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b16160b18246480dcac36128ff59da49c4410472a98bc4955a4841655f7ed50
+size 434671977

required_classes.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import numpy as np
+from typing import List
+class BertEmbedder:
+    def __init__(self, model_path:str, cut_head:bool=False):
+        """
+            cut_head = True if the model have classifier head
+        """
+        self.embedder = BertForSequenceClassification.from_pretrained(model_path)
+        self.max_length = self.embedder.config.max_position_embeddings
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path, max_length=self.max_length)
+        if cut_head:
+            self.embedder = self.embedder.bert
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.embedder.to(self.device)
+    def __call__(self, text: str):
+        encoded_input = self.tokenizer(text,
+                                       return_tensors='pt',
+                                       max_length=self.max_length,
+                                       padding=True,
+                                       truncation=True).to(self.device)
+        model_output = self.embedder(**encoded_input)
+        text_embed = model_output.pooler_output[0].cpu()
+        return text_embed
+    def batch_predict(self, texts: List[str]):
+        encoded_input = self.tokenizer(texts,
+                                       return_tensors='pt',
+                                       max_length=self.max_length,
+                                       padding=True,
+                                       truncation=True).to(self.device)
+        model_output = self.embedder(**encoded_input)
+        texts_embeds = model_output.pooler_output.cpu()
+        return texts_embeds
+class PredictModel:
+    def __init__(self, embedder, classifier, batch_size=8):
+        self.batch_size = batch_size
+        self.embedder = embedder
+        self.classifier = classifier
+    def _texts2vecs(self, texts, log=False):
+        embeds = []
+        batches_texts = np.array_split(texts, len(texts) // self.batch_size)
+        if log:
+            iterator = tqdm(batches_texts)
+        else:
+            iterator = batches_texts
+        for batch_texts in iterator:
+            batch_texts = batch_texts.tolist()
+            embeds += self.embedder.batch_predict(batch_texts).tolist()
+        embeds = np.array(embeds)
+        return embeds
+    def fit(self, texts: List[str], labels: List[str], log: bool=False):
+        if log:
+            print('Start text2vec transform')
+        embeds = self._texts2vecs(texts, log)
+        if log:
+            print('Start classifier fitting')
+        self.classifier.fit(embeds, labels)
+    def predict(self, texts: List[str], log: bool=False):
+        if log:
+            print('Start text2vec transform')
+        embeds = self._texts2vecs(texts, log)
+        if log:
+            print('Start classifier prediction')
+        prediction = self.classifier.predict(embeds)
+        return prediction

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy==1.22.4
+torch==2.0.1
+scikit-learn==1.2.2
+transformers==4.29.2
+gradio==3.11