Spaces:
Running
Running
File size: 2,453 Bytes
1e0e52a 881e6c7 1e0e52a 881e6c7 1e0e52a b8dc4ec 1e0e52a b8dc4ec 1e0e52a 881e6c7 1e0e52a 881e6c7 1e0e52a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import os
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
@st.cache_resource
def load_model(model_size: str = "32B"):
"""
Load model and tokenizer based on size selection
Note: You'll need to replace these with actual HuggingFace model IDs
"""
model_map = {
"0.5B": "Qwen/Qwen-0.5B",
"1.5B": "Qwen/Qwen-1.5B",
"7B": "Qwen/Qwen-7B",
# ... add other model sizes as needed
}
model_id = model_map.get(model_size, "Qwen/Qwen-7B") # default to 7B if size not found
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
return model, tokenizer
def process_query(query: str, model_size: str = "7B") -> str:
"""
Process a single query and return the response
"""
if not query:
return ""
try:
model, tokenizer = load_model(model_size)
# Prepare the input
inputs = tokenizer(query, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.pad_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.replace(query, "").strip()
except Exception as e:
return f"Error: {str(e)}"
def main():
st.title("Qwen2.5-Coder Interface")
# Model size selection
model_size = st.radio(
"Select Model Size:",
options=["0.5B", "1.5B", "3B", "7B", "14B", "32B"],
index=5 # Default to 32B (last option)
)
# Input text area
query = st.text_area(
"Input",
placeholder="Enter your query here...",
height=150
)
# Generate button
if st.button("Generate"):
if query:
with st.spinner("Generating response..."):
response = process_query(query, model_size)
st.text_area("Output", value=response, height=300)
else:
st.warning("Please enter a query first.")
if __name__ == "__main__":
main() |