Spaces:

atharvapawar
/

api-Codemistral-7b-mj-finetuned

Runtime error

api-Codemistral-7b-mj-finetuned / app.py

Atharva Prashant Pawar

478270a over 1 year ago

1.85 kB

	import streamlit as st
	import torch
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import transformers

	# Define the Streamlit app
	st.title("Mistral Model Integration")

	# Create a text input for the user to enter their prompt
	instruction = st.text_area("Enter your prompt:")

	# Function to interact with Mistral Model
	# def mistral_model(prompt, token_limit):
	# # Your model loading and inference code here (from the code you provided)
	# # ...

	# return responses

	def mistral_model(prompt, token_limit):
	# Initialize the model and tokenizer
	model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded"
	adapters_name = "atharvapawar/flaskCodemistral-7b-mj-finetuned"
	device = "cuda" # Use "cuda" for GPU or "cpu" for CPU

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Load the adapter
	model = PeftModel.from_pretrained(model, adapters_name)

	# Generate responses
	text = "[INST]" + prompt + "[/INST]"
	encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False)
	model.to(device)
	generated_ids = model.generate(**encoded, max_length=token_limit, do_sample=True)
	decoded = tokenizer.batch_decode(generated_ids)

	return decoded





	# Check if the user entered a prompt
	if instruction:
	# Add a slider for selecting the token limit
	token_limit = st.slider("Select token limit", min_value=10, max_value=500, value=250)

	# Create a button to trigger model inference
	if st.button("Generate Response"):
	responses = mistral_model(instruction, token_limit)
	st.write("Generated Responses:")
	for response in responses:
	st.write(response)

	# # Finally, run the Streamlit app
	# if __name__ == "__main__":
	# st.run()