Zwounds commited on
Commit
84dad28
·
verified ·
1 Parent(s): e635ed4

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. demo.py +18 -28
demo.py CHANGED
@@ -8,21 +8,18 @@ logging.basicConfig(level=logging.INFO)
8
  logger = logging.getLogger(__name__)
9
 
10
  def load_model():
11
- """Load fine-tuned model without quantization for CPU compatibility."""
12
- logger.info("Loading model...")
13
 
14
- # Use explicit AutoTokenizer instead of LlamaTokenizer
15
- tokenizer = AutoTokenizer.from_pretrained(
16
- "Zwounds/boolean-search-model"
17
- )
18
-
19
- # Load model in the most compatible way for Spaces
20
  model = AutoModelForCausalLM.from_pretrained(
21
- "Zwounds/boolean-search-model",
22
- low_cpu_mem_usage=True,
23
- torch_dtype=torch.float32 # Use standard floating point for CPU
 
24
  )
25
- return model, tokenizer
 
26
 
27
  def format_prompt(query):
28
  """Format query with instruction prompt."""
@@ -82,28 +79,21 @@ Example conversions showing proper quoting:
82
  def get_boolean_query(query):
83
  """Generate boolean query from natural language."""
84
  prompt = format_prompt(query)
85
- device = "cuda" if torch.cuda.is_available() else "cpu"
86
-
87
- # Tokenize and generate response
88
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
89
- outputs = model.generate(
90
- **inputs,
91
- max_new_tokens=32,
92
- do_sample=False,
93
- use_cache=True,
94
- eos_token_id=tokenizer.eos_token_id
95
- )
96
 
97
- # Extract response section and clean output
98
- full_response = tokenizer.decode(outputs[0])
99
- response = full_response.split("### Response:")[-1].strip()
100
- # Remove end of text token if present
101
  cleaned_response = response.replace("<|end_of_text|>", "").strip()
102
  return cleaned_response
103
 
104
  # Load model globally
105
  logger.info("Initializing model...")
106
- model, tokenizer = load_model()
107
  logger.info("Model loaded successfully")
108
 
109
  # Example queries using more natural language
 
8
  logger = logging.getLogger(__name__)
9
 
10
  def load_model():
11
+ """Load the GGUF model from Hugging Face."""
12
+ logger.info("Loading GGUF model...")
13
 
14
+ # Load model directly from HF Hub
 
 
 
 
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
+ "Zwounds/boolean-search-model",
17
+ model_file="boolean-model.gguf", # Specify the exact filename
18
+ model_type="llama",
19
+ gpu_layers=0 # Use CPU only for HF Spaces compatibility
20
  )
21
+
22
+ return model
23
 
24
  def format_prompt(query):
25
  """Format query with instruction prompt."""
 
79
  def get_boolean_query(query):
80
  """Generate boolean query from natural language."""
81
  prompt = format_prompt(query)
82
+
83
+
84
+ # Generate response
85
+ response = model(prompt, max_new_tokens=64, temperature=0)
86
+
87
+ # Extract response section
88
+ if "### Response:" in response:
89
+ response = response.split("### Response:")[-1].strip()
 
 
 
90
 
 
 
 
 
91
  cleaned_response = response.replace("<|end_of_text|>", "").strip()
92
  return cleaned_response
93
 
94
  # Load model globally
95
  logger.info("Initializing model...")
96
+ model = load_model()
97
  logger.info("Model loaded successfully")
98
 
99
  # Example queries using more natural language