Atharva Prashant Pawar
v1
478270a
raw
history blame
1.85 kB
import streamlit as st
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers
# Define the Streamlit app
st.title("Mistral Model Integration")
# Create a text input for the user to enter their prompt
instruction = st.text_area("Enter your prompt:")
# Function to interact with Mistral Model
# def mistral_model(prompt, token_limit):
# # Your model loading and inference code here (from the code you provided)
# # ...
# return responses
def mistral_model(prompt, token_limit):
# Initialize the model and tokenizer
model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded"
adapters_name = "atharvapawar/flaskCodemistral-7b-mj-finetuned"
device = "cuda" # Use "cuda" for GPU or "cpu" for CPU
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Load the adapter
model = PeftModel.from_pretrained(model, adapters_name)
# Generate responses
text = "[INST]" + prompt + "[/INST]"
encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False)
model.to(device)
generated_ids = model.generate(**encoded, max_length=token_limit, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
return decoded
# Check if the user entered a prompt
if instruction:
# Add a slider for selecting the token limit
token_limit = st.slider("Select token limit", min_value=10, max_value=500, value=250)
# Create a button to trigger model inference
if st.button("Generate Response"):
responses = mistral_model(instruction, token_limit)
st.write("Generated Responses:")
for response in responses:
st.write(response)
# # Finally, run the Streamlit app
# if __name__ == "__main__":
# st.run()