Zwounds commited on
Commit
e34d0c9
·
verified ·
1 Parent(s): 84dad28

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. demo.py +28 -15
  2. requirements.txt +3 -4
demo.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import logging
 
5
 
6
  # Setup logging
7
  logging.basicConfig(level=logging.INFO)
@@ -11,12 +12,18 @@ def load_model():
11
  """Load the GGUF model from Hugging Face."""
12
  logger.info("Loading GGUF model...")
13
 
14
- # Load model directly from HF Hub
15
- model = AutoModelForCausalLM.from_pretrained(
16
- "Zwounds/boolean-search-model",
17
- model_file="boolean-model.gguf", # Specify the exact filename
18
- model_type="llama",
19
- gpu_layers=0 # Use CPU only for HF Spaces compatibility
 
 
 
 
 
 
20
  )
21
 
22
  return model
@@ -79,17 +86,23 @@ Example conversions showing proper quoting:
79
  def get_boolean_query(query):
80
  """Generate boolean query from natural language."""
81
  prompt = format_prompt(query)
82
-
83
 
84
  # Generate response
85
- response = model(prompt, max_new_tokens=64, temperature=0)
 
 
 
 
 
 
 
 
86
 
87
- # Extract response section
88
- if "### Response:" in response:
89
- response = response.split("### Response:")[-1].strip()
90
 
91
- cleaned_response = response.replace("<|end_of_text|>", "").strip()
92
- return cleaned_response
93
 
94
  # Load model globally
95
  logger.info("Initializing model...")
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
  import logging
5
+ import os
6
 
7
  # Setup logging
8
  logging.basicConfig(level=logging.INFO)
 
12
  """Load the GGUF model from Hugging Face."""
13
  logger.info("Loading GGUF model...")
14
 
15
+ # Download the model from HF Hub
16
+ model_path = hf_hub_download(
17
+ repo_id="Zwounds/boolean-search-model",
18
+ filename="boolean-model.gguf",
19
+ repo_type="model"
20
+ )
21
+
22
+ # Load the model with llama-cpp-python
23
+ model = Llama(
24
+ model_path=model_path,
25
+ n_ctx=2048, # Context window
26
+ n_gpu_layers=0 # Use CPU only for HF Spaces compatibility
27
  )
28
 
29
  return model
 
86
  def get_boolean_query(query):
87
  """Generate boolean query from natural language."""
88
  prompt = format_prompt(query)
 
89
 
90
  # Generate response
91
+ response = model(
92
+ prompt,
93
+ max_tokens=64,
94
+ temperature=0,
95
+ stop=["<|end_of_text|>", "###"] # Stop at these tokens
96
+ )
97
+
98
+ # Extract generated text
99
+ text = response["choices"][0]["text"].strip()
100
 
101
+ # Extract response section if present
102
+ if "### Response:" in text:
103
+ text = text.split("### Response:")[-1].strip()
104
 
105
+ return text
 
106
 
107
  # Load model globally
108
  logger.info("Initializing model...")
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- torch==2.0.1
2
- transformers==4.37.2
3
- sentencepiece==0.1.99
4
- gradio>=4.0.0
 
1
+ gradio>=4.0.0
2
+ llama-cpp-python==0.2.56
3
+ huggingface-hub>=0.19.4