File size: 2,453 Bytes
1e0e52a
881e6c7
1e0e52a
 
 
881e6c7
1e0e52a
 
 
 
 
 
 
 
 
 
 
 
 
 
b8dc4ec
1e0e52a
 
 
b8dc4ec
 
1e0e52a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
881e6c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e0e52a
881e6c7
 
 
 
 
 
 
 
1e0e52a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

@st.cache_resource
def load_model(model_size: str = "32B"):
    """
    Load model and tokenizer based on size selection
    Note: You'll need to replace these with actual HuggingFace model IDs
    """
    model_map = {
        "0.5B": "Qwen/Qwen-0.5B",
        "1.5B": "Qwen/Qwen-1.5B",
        "7B": "Qwen/Qwen-7B",
        # ... add other model sizes as needed
    }
    
    model_id = model_map.get(model_size, "Qwen/Qwen-7B")  # default to 7B if size not found
    
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    return model, tokenizer

def process_query(query: str, model_size: str = "7B") -> str:
    """
    Process a single query and return the response
    """
    if not query:
        return ""
    
    try:
        model, tokenizer = load_model(model_size)
        
        # Prepare the input
        inputs = tokenizer(query, return_tensors="pt").to(model.device)
        
        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=512,
                temperature=0.7,
                top_p=0.9,
                pad_token_id=tokenizer.pad_token_id
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response.replace(query, "").strip()
        
    except Exception as e:
        return f"Error: {str(e)}"

def main():
    st.title("Qwen2.5-Coder Interface")
    
    # Model size selection
    model_size = st.radio(
        "Select Model Size:",
        options=["0.5B", "1.5B", "3B", "7B", "14B", "32B"],
        index=5  # Default to 32B (last option)
    )
    
    # Input text area
    query = st.text_area(
        "Input",
        placeholder="Enter your query here...",
        height=150
    )
    
    # Generate button
    if st.button("Generate"):
        if query:
            with st.spinner("Generating response..."):
                response = process_query(query, model_size)
                st.text_area("Output", value=response, height=300)
        else:
            st.warning("Please enter a query first.")

if __name__ == "__main__":
    main()