Spaces:
Sleeping
Sleeping
File size: 1,663 Bytes
d397597 46977f8 d397597 46977f8 d397597 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, TextStreamer
import streamlit as st
# Initialize Streamlit UI
st.title("Legal Query Chatbot")
st.write("Ask questions related to Indian traffic laws and get AI-generated responses.")
# Load LoRA fine-tuned model and tokenizer
model_path = "lora_model"
load_in_4bit = True
# Load the model
model = AutoPeftModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16 if not load_in_4bit else torch.float32,
load_in_4bit=load_in_4bit,
device_map="auto"
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Enable inference mode
model.eval()
# Streamlit input for user prompt
user_input = st.text_input("Enter your legal query:", "What are the penalties for breaking a red light in India?")
if user_input:
# Prepare the prompt
messages = [{"role": "user", "content": user_input}]
# Tokenize input
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to("cuda" if torch.cuda.is_available() else "cpu")
# Streamlit progress indicator
with st.spinner("Generating response..."):
# Use a text streamer for efficient streaming output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
# Generate response
output = model.generate(
input_ids=inputs,
streamer=text_streamer,
max_new_tokens=128,
use_cache=True,
temperature=1.5,
min_p=0.1
)
st.success("Generation Complete!")
|