Spaces:

Agents-MCP-Hackathon
/

doc-mcp

Running

App Files Files Community

doc-mcp / rag /query.py

mdabidhussain

updated doc-mcp app

8455de3 9 days ago

raw

history blame contribute delete

3.27 kB

	from typing import List, Optional

	from llama_index.core import VectorStoreIndex
	from llama_index.core.vector_stores import (FilterOperator, MetadataFilter,
	MetadataFilters)
	from pydantic import BaseModel, Field

	from .config import get_vector_store


	class Node(BaseModel):
	file_name: str = Field("Name of the file")
	url: str = Field("GitHub repo url of the file")
	score: float = Field("Relevance score of the node") # Changed to float
	content: str = Field("Content of the node")


	class ContextResponseModel(BaseModel):
	response: str = Field("Response for user's query")
	source_nodes: Optional[List[Node]] = Field(
	"List of sources used to generate response"
	)


	class QueryRetriever:
	def __init__(self, repo):
	self.vector_store_index = VectorStoreIndex.from_vector_store(get_vector_store())
	self.filters = MetadataFilters(
	filters=[
	MetadataFilter(
	key="metadata.repo",
	value=repo,
	operator=FilterOperator.EQ,
	)
	]
	)

	def make_query(self, query: str, mode: str = "default") -> dict:
	"""
	Retrieve relevant documentation context for a given query using specified retrieval mode.

	This function is designed to support Retrieval-Augmented Generation (RAG) by extracting
	the most relevant context chunks from indexed documentation sources.

	Args:
	query (str): The user's input query related to the documentation.
	mode (str, optional): Retrieval strategy to use. One of:
	- "default": Standard semantic similarity search.
	- "text_search": Keyword-based search.
	- "hybrid": Combines semantic and keyword-based methods.
	Defaults to "default".

	Returns:
	dict: Dictionary with 'response' and 'source_nodes' keys
	"""
	query_engine = self.vector_store_index.as_query_engine(
	similarity_top_k=5,
	vector_store_query_mode=mode,
	filters=self.filters,
	response_mode="refine",
	)

	response = query_engine.query(query)
	nodes = []
	for node in response.source_nodes:
	nodes.append(
	{
	"file_name": node.metadata.get("file_name", "Unknown"),
	"url": node.metadata.get("url", "#"),
	"score": float(node.score) if node.score else 0.0,
	"content": node.get_content(),
	}
	)

	return {"response": str(response.response), "source_nodes": nodes}

	@staticmethod
	def get_available_repos() -> List[str]:
	"""Get list of available repositories in the vector store"""
	try:
	from .config import get_available_repos as get_repos_from_db
	print("fetching repos")
	re = get_repos_from_db()

	print(re)
	return re
	except Exception as e:
	print(f"Error getting repos from database: {e}")
	# Fallback to hardcoded list
	return ["mindsdb/mindsdb", "run-llama/llama_index"]