Zwounds commited on
Commit
dc70758
·
verified ·
1 Parent(s): 58c3f5a

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. demo.py +19 -45
  2. requirements.txt +1 -3
demo.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
  import logging
5
 
6
  # Setup logging
@@ -36,56 +35,31 @@ SYSTEM_INSTRUCTION = """Convert natural language queries into boolean search que
36
  - Use OR with parentheses for alternatives"""
37
 
38
  def load_model():
39
- """Load the model and set up tokenizer."""
40
  logger.info("Loading model...")
41
- model = AutoModelForCausalLM.from_pretrained(
42
- "Zwounds/boolean-search-model",
43
- device_map="cpu",
44
- torch_dtype=torch.float32
45
  )
46
- tokenizer = AutoTokenizer.from_pretrained("Zwounds/boolean-search-model")
47
- tokenizer.use_default_system_prompt = False
48
  logger.info("Model loaded successfully")
49
-
50
- return model, tokenizer
51
-
52
- def extract_response(output: str) -> str:
53
- """Extract the response part from the output."""
54
- start_marker = "<|start_header_id|>assistant<|end_header_id|>"
55
- end_marker = "<|eot_id|>"
56
-
57
- start_idx = output.find(start_marker)
58
- if start_idx != -1:
59
- start_idx += len(start_marker)
60
- end_idx = output.find(end_marker, start_idx)
61
- if end_idx != -1:
62
- return output[start_idx:end_idx].strip()
63
-
64
- return output.strip()
65
 
66
- def get_boolean_query(query: str, model=None, tokenizer=None) -> str:
67
  """Generate boolean query from natural language."""
68
- # Format the conversation
69
- conversation = [
70
- {"role": "system", "content": SYSTEM_INSTRUCTION},
71
- {"role": "user", "content": query}
72
- ]
73
-
74
- # Format into chat template
75
- prompt = tokenizer.apply_chat_template(conversation, tokenize=False)
76
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
77
 
78
- # Generate response
79
  outputs = model.generate(
80
- **inputs,
81
- max_new_tokens=64,
82
- do_sample=False,
83
- use_cache=True,
84
- pad_token_id=tokenizer.pad_token_id,
85
- eos_token_id=tokenizer.eos_token_id
86
  )
87
 
88
- return extract_response(tokenizer.batch_decode(outputs)[0])
 
 
89
 
90
  # Example queries demonstrating various cases
91
  examples = [
@@ -137,7 +111,7 @@ examples = [
137
 
138
  # Load model globally
139
  logger.info("Initializing model...")
140
- model, tokenizer = load_model()
141
 
142
  # Create Gradio interface
143
  title = "Natural Language to Boolean Search"
@@ -153,7 +127,7 @@ description = """Convert natural language queries into boolean search expression
153
  """
154
 
155
  demo = gr.Interface(
156
- fn=lambda x: get_boolean_query(x, model, tokenizer),
157
  inputs=[
158
  gr.Textbox(
159
  label="Enter your natural language query",
 
1
  import gradio as gr
2
+ from vllm import LLM
 
3
  import logging
4
 
5
  # Setup logging
 
35
  - Use OR with parentheses for alternatives"""
36
 
37
  def load_model():
38
+ """Load the model using vLLM."""
39
  logger.info("Loading model...")
40
+ model = LLM(
41
+ model="Zwounds/boolean-search-model",
42
+ tensor_parallel_size=1 # For CPU
 
43
  )
 
 
44
  logger.info("Model loaded successfully")
45
+ return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ def get_boolean_query(query: str, model=None) -> str:
48
  """Generate boolean query from natural language."""
49
+ # Format the conversation with proper markers
50
+ prompt = f"""<|start_header_id|>system<|end_header_id|>{SYSTEM_INSTRUCTION}<|start_header_id|>user<|end_header_id|>{query}<|start_header_id|>assistant<|end_header_id|>"""
 
 
 
 
 
 
 
51
 
52
+ # Generate with vllm
53
  outputs = model.generate(
54
+ prompt,
55
+ max_tokens=64,
56
+ temperature=0.0, # Deterministic
57
+ stop_tokens=["<|eot_id|>"]
 
 
58
  )
59
 
60
+ # Extract response
61
+ response = outputs[0].outputs[0].text.strip()
62
+ return response
63
 
64
  # Example queries demonstrating various cases
65
  examples = [
 
111
 
112
  # Load model globally
113
  logger.info("Initializing model...")
114
+ model = load_model()
115
 
116
  # Create Gradio interface
117
  title = "Natural Language to Boolean Search"
 
127
  """
128
 
129
  demo = gr.Interface(
130
+ fn=lambda x: get_boolean_query(x, model),
131
  inputs=[
132
  gr.Textbox(
133
  label="Enter your natural language query",
requirements.txt CHANGED
@@ -1,5 +1,3 @@
1
  gradio>=4.0.0
2
- transformers>=4.0.0
3
- torch>=1.0.0
4
  huggingface-hub>=0.19.4
5
- accelerate>=0.26.0
 
1
  gradio>=4.0.0
2
+ vllm>=0.3.0
 
3
  huggingface-hub>=0.19.4