Zwounds's picture
Upload folder using huggingface_hub
e34d0c9 verified
raw
history blame
6.88 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import logging
import os
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_model():
"""Load the GGUF model from Hugging Face."""
logger.info("Loading GGUF model...")
# Download the model from HF Hub
model_path = hf_hub_download(
repo_id="Zwounds/boolean-search-model",
filename="boolean-model.gguf",
repo_type="model"
)
# Load the model with llama-cpp-python
model = Llama(
model_path=model_path,
n_ctx=2048, # Context window
n_gpu_layers=0 # Use CPU only for HF Spaces compatibility
)
return model
def format_prompt(query):
"""Format query with instruction prompt."""
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Convert this natural language query into a boolean search query by following these rules:
1. FIRST: Remove all meta-terms from this list (they should NEVER appear in output):
- articles, papers, research, studies
- examining, investigating, analyzing
- findings, documents, literature
- publications, journals, reviews
Example: "Research examining X" β†’ just "X"
2. SECOND: Remove generic implied terms that don't add search value:
- Remove words like "practices," "techniques," "methods," "approaches," "strategies"
- Remove words like "impacts," "effects," "influences," "role," "applications"
- For example: "sustainable agriculture practices" β†’ "sustainable agriculture"
- For example: "teaching methodologies" β†’ "teaching"
- For example: "leadership styles" β†’ "leadership"
3. THEN: Format the remaining terms:
CRITICAL QUOTING RULES:
- Multi-word phrases MUST ALWAYS be in quotes - NO EXCEPTIONS
- Examples of correct quoting:
- Wrong: machine learning AND deep learning
- Right: "machine learning" AND "deep learning"
- Wrong: natural language processing
- Right: "natural language processing"
- Single words must NEVER have quotes (e.g., science, research, learning)
- Use AND to connect required concepts
- Use OR with parentheses for alternatives (e.g., ("soil health" OR biodiversity))
Example conversions showing proper quoting:
"Research on machine learning for natural language processing"
β†’ "machine learning" AND "natural language processing"
"Studies examining anxiety depression stress in workplace"
β†’ (anxiety OR depression OR stress) AND workplace
"Articles about deep learning impact on computer vision"
β†’ "deep learning" AND "computer vision"
"Research on sustainable agriculture practices and their impact on soil health or biodiversity"
β†’ "sustainable agriculture" AND ("soil health" OR biodiversity)
"Articles about effective teaching methods for second language acquisition"
β†’ teaching AND "second language acquisition"
### Input:
{query}
### Response:
"""
def get_boolean_query(query):
"""Generate boolean query from natural language."""
prompt = format_prompt(query)
# Generate response
response = model(
prompt,
max_tokens=64,
temperature=0,
stop=["<|end_of_text|>", "###"] # Stop at these tokens
)
# Extract generated text
text = response["choices"][0]["text"].strip()
# Extract response section if present
if "### Response:" in text:
text = text.split("### Response:")[-1].strip()
return text
# Load model globally
logger.info("Initializing model...")
model = load_model()
logger.info("Model loaded successfully")
# Example queries using more natural language
examples = [
# Testing removal of meta-terms
["Find research papers examining the long-term effects of meditation on brain structure"],
# Testing removal of generic implied terms (practices, techniques, methods)
["Articles about deep learning techniques for natural language processing tasks"],
# Testing removal of impact/effect terms
["Studies on the impact of early childhood nutrition on cognitive development"],
# Testing handling of technology applications
["Information on virtual reality applications in architectural design and urban planning"],
# Testing proper OR relationship with parentheses
["Research on electric vehicles adoption in urban environments or rural communities"],
# Testing proper quoting of multi-word concepts only
["Articles on biodiversity loss in coral reefs and rainforest ecosystems"],
# Testing removal of strategy/approach terms
["Studies about different teaching approaches for children with learning disabilities"],
# Testing complex OR relationships
["Research examining social media influence on political polarization or public discourse"],
# Testing implied terms in specific industries
["Articles about implementation strategies for blockchain in supply chain management or financial services"],
# Testing qualifiers that don't add search value
["Research on effective leadership styles in multicultural organizations"],
# Testing removal of multiple implied terms
["Studies on the effects of microplastic pollution techniques on marine ecosystem health"],
# Testing domain-specific implied terms
["Articles about successful cybersecurity protection methods for critical infrastructure"],
# Testing generalized vs specific concepts
["Research papers on quantum computing algorithms for cryptography or optimization problems"],
# Testing implied terms in outcome descriptions
["Studies examining the relationship between sleep quality and academic performance outcomes"],
# Testing complex nesting of concepts
["Articles about renewable energy integration challenges in developing countries or island nations"]
]
# Create Gradio interface with metadata for deployment
title = "Boolean Search Query Generator"
description = "Convert natural language queries into boolean search expressions. The model will remove search-related terms (like 'articles', 'research', etc.), handle generic implied terms (like 'practices', 'methods'), and format the core concepts using proper boolean syntax."
demo = gr.Interface(
fn=get_boolean_query,
inputs=[
gr.Textbox(
label="Enter your natural language query",
placeholder="e.g., I'm looking for information about climate change and renewable energy"
)
],
outputs=gr.Textbox(label="Boolean Search Query"),
title=title,
description=description,
examples=examples,
theme=gr.themes.Soft()
)
if __name__ == "__main__":
demo.launch()