|
import streamlit as st |
|
import pandas as pd |
|
from transformers import pipeline, AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM |
|
from peft import PeftModel, PeftConfig |
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left") |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = 'left' |
|
|
|
|
|
peft_config = PeftConfig.from_pretrained("pseudolab/K23_MiniMed") |
|
peft_model = MistralForCausalLM.from_pretrained("pseudolab/K23_MiniMed", trust_remote_code=True) |
|
peft_model = PeftModel.from_pretrained(peft_model, "pseudolab/K23_MiniMed") |
|
|
|
text_generator = pipeline('text-generation', model=peft_model, tokenizer=tokenizer) |
|
|
|
|
|
def prepare_context(data): |
|
|
|
data_str = data.to_string(index=False, header=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
input_ids = data_str |
|
|
|
return input_ids |
|
|
|
def fn(uploaded_file) -> str: |
|
data = pd.read_csv(uploaded_file) |
|
ret = "" |
|
|
|
|
|
context = prepare_context(data) |
|
|
|
generated_text = text_generator(context)[0]['generated_text'] |
|
ret += generated_text |
|
|
|
|
|
prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know." |
|
|
|
if prompt: |
|
|
|
input_ids = tokenizer.encode(prompt, return_tensors="pt") |
|
|
|
|
|
|
|
generated_text = text_generator(prompt)[0]['generated_text'] |
|
ret += generated_text |
|
|
|
return ret |
|
|
|
|
|
demo = gr.Interface(fn=fn, inputs="file", outputs="text", theme="pseudolab/huggingface-korea-theme") |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(show_api=False) |
|
|