Spaces:
Runtime error
Runtime error
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
import logging | |
import os | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def load_model(): | |
"""Load the GGUF model from Hugging Face.""" | |
logger.info("Loading GGUF model...") | |
# Download the model from HF Hub | |
model_path = hf_hub_download( | |
repo_id="Zwounds/boolean-search-model", | |
filename="boolean-model.gguf", | |
repo_type="model" | |
) | |
# Load the model with llama-cpp-python | |
model = Llama( | |
model_path=model_path, | |
n_ctx=2048, # Context window | |
n_gpu_layers=0 # Use CPU only for HF Spaces compatibility | |
) | |
return model | |
def format_prompt(query): | |
"""Format query with instruction prompt.""" | |
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
### Instruction: | |
Convert this natural language query into a boolean search query by following these rules: | |
1. FIRST: Remove all meta-terms from this list (they should NEVER appear in output): | |
- articles, papers, research, studies | |
- examining, investigating, analyzing | |
- findings, documents, literature | |
- publications, journals, reviews | |
Example: "Research examining X" β just "X" | |
2. SECOND: Remove generic implied terms that don't add search value: | |
- Remove words like "practices," "techniques," "methods," "approaches," "strategies" | |
- Remove words like "impacts," "effects," "influences," "role," "applications" | |
- For example: "sustainable agriculture practices" β "sustainable agriculture" | |
- For example: "teaching methodologies" β "teaching" | |
- For example: "leadership styles" β "leadership" | |
3. THEN: Format the remaining terms: | |
CRITICAL QUOTING RULES: | |
- Multi-word phrases MUST ALWAYS be in quotes - NO EXCEPTIONS | |
- Examples of correct quoting: | |
- Wrong: machine learning AND deep learning | |
- Right: "machine learning" AND "deep learning" | |
- Wrong: natural language processing | |
- Right: "natural language processing" | |
- Single words must NEVER have quotes (e.g., science, research, learning) | |
- Use AND to connect required concepts | |
- Use OR with parentheses for alternatives (e.g., ("soil health" OR biodiversity)) | |
Example conversions showing proper quoting: | |
"Research on machine learning for natural language processing" | |
β "machine learning" AND "natural language processing" | |
"Studies examining anxiety depression stress in workplace" | |
β (anxiety OR depression OR stress) AND workplace | |
"Articles about deep learning impact on computer vision" | |
β "deep learning" AND "computer vision" | |
"Research on sustainable agriculture practices and their impact on soil health or biodiversity" | |
β "sustainable agriculture" AND ("soil health" OR biodiversity) | |
"Articles about effective teaching methods for second language acquisition" | |
β teaching AND "second language acquisition" | |
### Input: | |
{query} | |
### Response: | |
""" | |
def get_boolean_query(query): | |
"""Generate boolean query from natural language.""" | |
prompt = format_prompt(query) | |
# Generate response | |
response = model( | |
prompt, | |
max_tokens=64, | |
temperature=0, | |
stop=["<|end_of_text|>", "###"] # Stop at these tokens | |
) | |
# Extract generated text | |
text = response["choices"][0]["text"].strip() | |
# Extract response section if present | |
if "### Response:" in text: | |
text = text.split("### Response:")[-1].strip() | |
return text | |
# Load model globally | |
logger.info("Initializing model...") | |
model = load_model() | |
logger.info("Model loaded successfully") | |
# Example queries using more natural language | |
examples = [ | |
# Testing removal of meta-terms | |
["Find research papers examining the long-term effects of meditation on brain structure"], | |
# Testing removal of generic implied terms (practices, techniques, methods) | |
["Articles about deep learning techniques for natural language processing tasks"], | |
# Testing removal of impact/effect terms | |
["Studies on the impact of early childhood nutrition on cognitive development"], | |
# Testing handling of technology applications | |
["Information on virtual reality applications in architectural design and urban planning"], | |
# Testing proper OR relationship with parentheses | |
["Research on electric vehicles adoption in urban environments or rural communities"], | |
# Testing proper quoting of multi-word concepts only | |
["Articles on biodiversity loss in coral reefs and rainforest ecosystems"], | |
# Testing removal of strategy/approach terms | |
["Studies about different teaching approaches for children with learning disabilities"], | |
# Testing complex OR relationships | |
["Research examining social media influence on political polarization or public discourse"], | |
# Testing implied terms in specific industries | |
["Articles about implementation strategies for blockchain in supply chain management or financial services"], | |
# Testing qualifiers that don't add search value | |
["Research on effective leadership styles in multicultural organizations"], | |
# Testing removal of multiple implied terms | |
["Studies on the effects of microplastic pollution techniques on marine ecosystem health"], | |
# Testing domain-specific implied terms | |
["Articles about successful cybersecurity protection methods for critical infrastructure"], | |
# Testing generalized vs specific concepts | |
["Research papers on quantum computing algorithms for cryptography or optimization problems"], | |
# Testing implied terms in outcome descriptions | |
["Studies examining the relationship between sleep quality and academic performance outcomes"], | |
# Testing complex nesting of concepts | |
["Articles about renewable energy integration challenges in developing countries or island nations"] | |
] | |
# Create Gradio interface with metadata for deployment | |
title = "Boolean Search Query Generator" | |
description = "Convert natural language queries into boolean search expressions. The model will remove search-related terms (like 'articles', 'research', etc.), handle generic implied terms (like 'practices', 'methods'), and format the core concepts using proper boolean syntax." | |
demo = gr.Interface( | |
fn=get_boolean_query, | |
inputs=[ | |
gr.Textbox( | |
label="Enter your natural language query", | |
placeholder="e.g., I'm looking for information about climate change and renewable energy" | |
) | |
], | |
outputs=gr.Textbox(label="Boolean Search Query"), | |
title=title, | |
description=description, | |
examples=examples, | |
theme=gr.themes.Soft() | |
) | |
if __name__ == "__main__": | |
demo.launch() | |