import streamlit as st import pandas as pd import numpy as np from io import StringIO from transformers import AutoTokenizer, AutoModelForQuestionAnswering from transformers import Trainer import torch st.title('HRA Document QA') file_name = st.file_uploader("Upload the document that you want to ask questions") if file_name is not None: text = file_name.getvalue() stringio = StringIO(file_name.getvalue().decode("utf-8")) context = stringio.read() question = st.chat_input("Ask some questions about this document") with st.chat_message("user"): st.write("Hello 👋 I am an HRA chatbot~") st.write("Here's the document that you uploaded:") st.write(context) if question: st.write("You asked a question:") st.write(question) tokenizer = AutoTokenizer.from_pretrained("./models/deepset/tinyroberta-squad") model = AutoModelForQuestionAnswering.from_pretrained("./models/deepset/tinyroberta-squad") inputs = tokenizer(question, context, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) answer_start_index = outputs.start_logits.argmax() answer_end_index = outputs.end_logits.argmax() predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] st.write("Answer:") st.write(tokenizer.decode(predict_answer_tokens, skip_special_tokens=True))