Spaces:

Zwounds
/

Boolean_Search_Query_Model

Runtime error

Zwounds commited on Mar 18

Commit

e34d0c9

verified ·

1 Parent(s): 84dad28

Upload folder using huggingface_hub

Files changed (2) hide show

demo.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import logging
 # Setup logging
 logging.basicConfig(level=logging.INFO)
@@ -11,12 +12,18 @@ def load_model():
     """Load the GGUF model from Hugging Face."""
     logger.info("Loading GGUF model...")
-    # Load model directly from HF Hub
-    model = AutoModelForCausalLM.from_pretrained(
-        "Zwounds/boolean-search-model",
-        model_file="boolean-model.gguf",  # Specify the exact filename
-        model_type="llama",
-        gpu_layers=0  # Use CPU only for HF Spaces compatibility
     )
     return model
@@ -79,17 +86,23 @@ Example conversions showing proper quoting:
 def get_boolean_query(query):
     """Generate boolean query from natural language."""
     prompt = format_prompt(query)
     # Generate response
-    response = model(prompt, max_new_tokens=64, temperature=0)
-    # Extract response section
-    if "### Response:" in response:
-        response = response.split("### Response:")[-1].strip()
-    cleaned_response = response.replace("<|end_of_text|>", "").strip()
-    return cleaned_response
 # Load model globally
 logger.info("Initializing model...")

 import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 import logging
+import os
 # Setup logging
 logging.basicConfig(level=logging.INFO)
     """Load the GGUF model from Hugging Face."""
     logger.info("Loading GGUF model...")
+    # Download the model from HF Hub
+    model_path = hf_hub_download(
+        repo_id="Zwounds/boolean-search-model",
+        filename="boolean-model.gguf",
+        repo_type="model"
+    )
+    # Load the model with llama-cpp-python
+    model = Llama(
+        model_path=model_path,
+        n_ctx=2048,  # Context window
+        n_gpu_layers=0  # Use CPU only for HF Spaces compatibility
     )
     return model
 def get_boolean_query(query):
     """Generate boolean query from natural language."""
     prompt = format_prompt(query)
     # Generate response
+    response = model(
+        prompt,
+        max_tokens=64,
+        temperature=0,
+        stop=["<|end_of_text|>", "###"]  # Stop at these tokens
+    )
+    # Extract generated text
+    text = response["choices"][0]["text"].strip()
+    # Extract response section if present
+    if "### Response:" in text:
+        text = text.split("### Response:")[-1].strip()
+    return text
 # Load model globally
 logger.info("Initializing model...")

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-torch==2.0.1
-transformers==4.37.2
-sentencepiece==0.1.99
-gradio>=4.0.0

+gradio>=4.0.0
+llama-cpp-python==0.2.56
+huggingface-hub>=0.19.4