Spaces:

mliutdchra
/

hra_qa_bot_v1

Sleeping

App Files Files Community

Mengmeng Liu commited on Jul 14, 2023

Commit

a5b8812

1 Parent(s): bbea195

initial commit

Browse files

Files changed (12) hide show

app.py +48 -0
load_model.py +13 -0
models/deepset/tinyroberta-squad/config.json +30 -0
models/deepset/tinyroberta-squad/merges.txt +0 -0
models/deepset/tinyroberta-squad/pytorch_model.bin +3 -0
models/deepset/tinyroberta-squad/special_tokens_map.json +15 -0
models/deepset/tinyroberta-squad/tokenizer.json +0 -0
models/deepset/tinyroberta-squad/tokenizer_config.json +15 -0
models/deepset/tinyroberta-squad/training_args.bin +3 -0
models/deepset/tinyroberta-squad/vocab.json +0 -0
test.py +30 -0
test_text.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from io import StringIO
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+from transformers import Trainer
+import torch
+st.title('HRA Document QA')
+file_name = st.file_uploader("Upload the document that you want to ask questions")
+if file_name is not None:
+    text = file_name.getvalue()
+    stringio = StringIO(file_name.getvalue().decode("utf-8"))
+    context = stringio.read()
+    question = st.chat_input("Ask some questions about this document")
+    with st.chat_message("user"):
+        st.write("Hello 👋 I am an HRA chatbot~")
+        st.write("Here's the document that you uploaded:")
+        st.write(context)
+        if question:
+            st.write("You asked a question:")
+            st.write(question)
+            tokenizer = AutoTokenizer.from_pretrained("./models/deepset/tinyroberta-squad")
+            model = AutoModelForQuestionAnswering.from_pretrained("./models/deepset/tinyroberta-squad")
+            inputs = tokenizer(question, context, return_tensors="pt")
+            with torch.no_grad():
+                outputs = model(**inputs)
+            answer_start_index = outputs.start_logits.argmax()
+            answer_end_index = outputs.end_logits.argmax()
+            predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+            st.write("Answer:")
+            st.write(tokenizer.decode(predict_answer_tokens, skip_special_tokens=True))

load_model.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Load model directly
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+from transformers import Trainer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("deepset/tinyroberta-squad2")
+model = AutoModelForQuestionAnswering.from_pretrained("deepset/tinyroberta-squad2")
+trainer = Trainer(model=model, tokenizer=tokenizer)
+trainer.save_model("./models/deepset/tinyroberta-squad")
+print("sucessfully saved model")

models/deepset/tinyroberta-squad/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "deepset/tinyroberta-squad2",
+  "architectures": [
+    "RobertaForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "language": "english",
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "name": "Roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

models/deepset/tinyroberta-squad/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/deepset/tinyroberta-squad/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3216bcdc78b3c899a482179b996f48da35fee3a654aa55422597315e84f180f3
+size 326155437

models/deepset/tinyroberta-squad/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

models/deepset/tinyroberta-squad/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/deepset/tinyroberta-squad/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

models/deepset/tinyroberta-squad/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:569ee3bdcc22004d6ba63b65a3d195d9f3033a90b386bae47e1edbf619acf483
+size 3899

models/deepset/tinyroberta-squad/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

test.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# Load model directly
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+from transformers import Trainer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("./models/deepset/tinyroberta-squad")
+model = AutoModelForQuestionAnswering.from_pretrained("./models/deepset/tinyroberta-squad")
+question, text = "Where did robert graduate?", "Robert A. Kauffman is president of Healthcare Risk Advisors (HRA), leading the expansion of the company’s self-insurance and risk transfer solutions for large medical practices, hospitals, and health systems. Rob previously served as senior vice president, secretary, and general counsel of FOJP Service Corporation (“FOJP”) and Hospitals Insurance Company (“HIC”).Rob has built a distinguished career in insurance and risk management. Prior to his roles at FOJP and HIC, he was senior vice president, secretary, general counsel, and chief compliance officer at Harleysville Insurance. He was also a partner at Reed Smith, an international law firm specializing in complex litigation, strategic transactions, and regulatory matters.In addition to his private sector experience, Rob served with distinction as an Assistant U.S. Attorney in the Criminal Division of the United States Attorney’s Office for the Eastern District of Pennsylvania.Mr. Kauffman earned his Bachelor of Arts and Juris Doctor degrees from the University of Pennsylvania."
+inputs = tokenizer(question, text, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+print(tokenizer.decode(predict_answer_tokens, skip_special_tokens=True))
+# target is "nice puppet"
+# target_start_index = torch.tensor([14])
+# target_end_index = torch.tensor([15])
+# outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
+# loss = outputs.loss
+# round(loss.item(), 2)

test_text.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+Robert A. Kauffman is president of Healthcare Risk Advisors (HRA), leading the expansion of the company’s self-insurance and risk transfer solutions for large medical practices, hospitals, and health systems. Rob previously served as senior vice president, secretary, and general counsel of FOJP Service Corporation (“FOJP”) and Hospitals Insurance Company (“HIC”).
+Rob has built a distinguished career in insurance and risk management. Prior to his roles at FOJP and HIC, he was senior vice president, secretary, general counsel, and chief compliance officer at Harleysville Insurance. He was also a partner at Reed Smith, an international law firm specializing in complex litigation, strategic transactions, and regulatory matters.
+In addition to his private sector experience, Rob served with distinction as an Assistant U.S. Attorney in the Criminal Division of the United States Attorney’s Office for the Eastern District of Pennsylvania.
+Mr. Kauffman earned his Bachelor of Arts and Juris Doctor degrees from the University of Pennsylvania.