Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- demo.py +28 -15
- requirements.txt +3 -4
demo.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
from
|
4 |
import logging
|
|
|
5 |
|
6 |
# Setup logging
|
7 |
logging.basicConfig(level=logging.INFO)
|
@@ -11,12 +12,18 @@ def load_model():
|
|
11 |
"""Load the GGUF model from Hugging Face."""
|
12 |
logger.info("Loading GGUF model...")
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
"Zwounds/boolean-search-model",
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
)
|
21 |
|
22 |
return model
|
@@ -79,17 +86,23 @@ Example conversions showing proper quoting:
|
|
79 |
def get_boolean_query(query):
|
80 |
"""Generate boolean query from natural language."""
|
81 |
prompt = format_prompt(query)
|
82 |
-
|
83 |
|
84 |
# Generate response
|
85 |
-
response = model(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
# Extract response section
|
88 |
-
if "### Response:" in
|
89 |
-
|
90 |
|
91 |
-
|
92 |
-
return cleaned_response
|
93 |
|
94 |
# Load model globally
|
95 |
logger.info("Initializing model...")
|
|
|
1 |
import gradio as gr
|
2 |
+
from llama_cpp import Llama
|
3 |
+
from huggingface_hub import hf_hub_download
|
4 |
import logging
|
5 |
+
import os
|
6 |
|
7 |
# Setup logging
|
8 |
logging.basicConfig(level=logging.INFO)
|
|
|
12 |
"""Load the GGUF model from Hugging Face."""
|
13 |
logger.info("Loading GGUF model...")
|
14 |
|
15 |
+
# Download the model from HF Hub
|
16 |
+
model_path = hf_hub_download(
|
17 |
+
repo_id="Zwounds/boolean-search-model",
|
18 |
+
filename="boolean-model.gguf",
|
19 |
+
repo_type="model"
|
20 |
+
)
|
21 |
+
|
22 |
+
# Load the model with llama-cpp-python
|
23 |
+
model = Llama(
|
24 |
+
model_path=model_path,
|
25 |
+
n_ctx=2048, # Context window
|
26 |
+
n_gpu_layers=0 # Use CPU only for HF Spaces compatibility
|
27 |
)
|
28 |
|
29 |
return model
|
|
|
86 |
def get_boolean_query(query):
|
87 |
"""Generate boolean query from natural language."""
|
88 |
prompt = format_prompt(query)
|
|
|
89 |
|
90 |
# Generate response
|
91 |
+
response = model(
|
92 |
+
prompt,
|
93 |
+
max_tokens=64,
|
94 |
+
temperature=0,
|
95 |
+
stop=["<|end_of_text|>", "###"] # Stop at these tokens
|
96 |
+
)
|
97 |
+
|
98 |
+
# Extract generated text
|
99 |
+
text = response["choices"][0]["text"].strip()
|
100 |
|
101 |
+
# Extract response section if present
|
102 |
+
if "### Response:" in text:
|
103 |
+
text = text.split("### Response:")[-1].strip()
|
104 |
|
105 |
+
return text
|
|
|
106 |
|
107 |
# Load model globally
|
108 |
logger.info("Initializing model...")
|
requirements.txt
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
gradio>=4.0.0
|
|
|
1 |
+
gradio>=4.0.0
|
2 |
+
llama-cpp-python==0.2.56
|
3 |
+
huggingface-hub>=0.19.4
|
|