Spaces:

Zwounds
/

Boolean_Search_Query_Model

Runtime error

App Files Files Community

Boolean_Search_Query_Model / demo.py

Zwounds

Upload folder using huggingface_hub

e34d0c9 verified 4 months ago

raw

history blame

6.88 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import logging
	import os

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def load_model():
	"""Load the GGUF model from Hugging Face."""
	logger.info("Loading GGUF model...")

	# Download the model from HF Hub
	model_path = hf_hub_download(
	repo_id="Zwounds/boolean-search-model",
	filename="boolean-model.gguf",
	repo_type="model"
	)

	# Load the model with llama-cpp-python
	model = Llama(
	model_path=model_path,
	n_ctx=2048, # Context window
	n_gpu_layers=0 # Use CPU only for HF Spaces compatibility
	)

	return model

	def format_prompt(query):
	"""Format query with instruction prompt."""
	return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	Convert this natural language query into a boolean search query by following these rules:

	1. FIRST: Remove all meta-terms from this list (they should NEVER appear in output):
	- articles, papers, research, studies
	- examining, investigating, analyzing
	- findings, documents, literature
	- publications, journals, reviews
	Example: "Research examining X" → just "X"

	2. SECOND: Remove generic implied terms that don't add search value:
	- Remove words like "practices," "techniques," "methods," "approaches," "strategies"
	- Remove words like "impacts," "effects," "influences," "role," "applications"
	- For example: "sustainable agriculture practices" → "sustainable agriculture"
	- For example: "teaching methodologies" → "teaching"
	- For example: "leadership styles" → "leadership"

	3. THEN: Format the remaining terms:
	CRITICAL QUOTING RULES:
	- Multi-word phrases MUST ALWAYS be in quotes - NO EXCEPTIONS
	- Examples of correct quoting:
	- Wrong: machine learning AND deep learning
	- Right: "machine learning" AND "deep learning"
	- Wrong: natural language processing
	- Right: "natural language processing"
	- Single words must NEVER have quotes (e.g., science, research, learning)
	- Use AND to connect required concepts
	- Use OR with parentheses for alternatives (e.g., ("soil health" OR biodiversity))

	Example conversions showing proper quoting:
	"Research on machine learning for natural language processing"
	→ "machine learning" AND "natural language processing"

	"Studies examining anxiety depression stress in workplace"
	→ (anxiety OR depression OR stress) AND workplace

	"Articles about deep learning impact on computer vision"
	→ "deep learning" AND "computer vision"

	"Research on sustainable agriculture practices and their impact on soil health or biodiversity"
	→ "sustainable agriculture" AND ("soil health" OR biodiversity)

	"Articles about effective teaching methods for second language acquisition"
	→ teaching AND "second language acquisition"

	### Input:
	{query}

	### Response:
	"""

	def get_boolean_query(query):
	"""Generate boolean query from natural language."""
	prompt = format_prompt(query)

	# Generate response
	response = model(
	prompt,
	max_tokens=64,
	temperature=0,
	stop=["<\|end_of_text\|>", "###"] # Stop at these tokens
	)

	# Extract generated text
	text = response["choices"][0]["text"].strip()

	# Extract response section if present
	if "### Response:" in text:
	text = text.split("### Response:")[-1].strip()

	return text

	# Load model globally
	logger.info("Initializing model...")
	model = load_model()
	logger.info("Model loaded successfully")

	# Example queries using more natural language
	examples = [
	# Testing removal of meta-terms
	["Find research papers examining the long-term effects of meditation on brain structure"],

	# Testing removal of generic implied terms (practices, techniques, methods)
	["Articles about deep learning techniques for natural language processing tasks"],

	# Testing removal of impact/effect terms
	["Studies on the impact of early childhood nutrition on cognitive development"],

	# Testing handling of technology applications
	["Information on virtual reality applications in architectural design and urban planning"],

	# Testing proper OR relationship with parentheses
	["Research on electric vehicles adoption in urban environments or rural communities"],

	# Testing proper quoting of multi-word concepts only
	["Articles on biodiversity loss in coral reefs and rainforest ecosystems"],

	# Testing removal of strategy/approach terms
	["Studies about different teaching approaches for children with learning disabilities"],

	# Testing complex OR relationships
	["Research examining social media influence on political polarization or public discourse"],

	# Testing implied terms in specific industries
	["Articles about implementation strategies for blockchain in supply chain management or financial services"],

	# Testing qualifiers that don't add search value
	["Research on effective leadership styles in multicultural organizations"],

	# Testing removal of multiple implied terms
	["Studies on the effects of microplastic pollution techniques on marine ecosystem health"],

	# Testing domain-specific implied terms
	["Articles about successful cybersecurity protection methods for critical infrastructure"],

	# Testing generalized vs specific concepts
	["Research papers on quantum computing algorithms for cryptography or optimization problems"],

	# Testing implied terms in outcome descriptions
	["Studies examining the relationship between sleep quality and academic performance outcomes"],

	# Testing complex nesting of concepts
	["Articles about renewable energy integration challenges in developing countries or island nations"]
	]


	# Create Gradio interface with metadata for deployment
	title = "Boolean Search Query Generator"
	description = "Convert natural language queries into boolean search expressions. The model will remove search-related terms (like 'articles', 'research', etc.), handle generic implied terms (like 'practices', 'methods'), and format the core concepts using proper boolean syntax."
	demo = gr.Interface(
	fn=get_boolean_query,
	inputs=[
	gr.Textbox(
	label="Enter your natural language query",
	placeholder="e.g., I'm looking for information about climate change and renewable energy"
	)
	],
	outputs=gr.Textbox(label="Boolean Search Query"),
	title=title,
	description=description,
	examples=examples,
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	demo.launch()