Spaces:

Game4all
/

reqroup

Running

reqroup / app.py

Lucas ARRIESSE

Return solutions which are not grounded on search items

32239ae 1 day ago

7.95 kB

	import asyncio
	import logging
	import os
	import sys
	import uvicorn
	from fastapi import FastAPI
	from schemas import _SearchedSolutionModel, _SolutionCriticismOutput, CriticizeSolutionsRequest, CritiqueResponse, RequirementInfo, ReqGroupingCategory, ReqGroupingResponse, ReqGroupingRequest, _ReqGroupingCategory, _ReqGroupingOutput, SolutionCriticism, SolutionModel, SolutionSearchResponse
	from jinja2 import Environment, FileSystemLoader, StrictUndefined
	from litellm.router import Router
	from dotenv import load_dotenv

	logging.basicConfig(
	level=logging.INFO,
	format='[%(asctime)s][%(levelname)s][%(filename)s:%(lineno)d]: %(message)s',
	datefmt='%Y-%m-%d %H:%M:%S'
	)

	# Load .env files
	load_dotenv()

	if "LLM_MODEL" not in os.environ or "LLM_API_KEY" not in os.environ:
	logging.error(
	"No LLM token (`LLM_API_KEY`) and/or LLM model (`LLM_MODEL`) were provided in the env vars. Exiting")
	sys.exit(-1)

	# LiteLLM router
	llm_router = Router(model_list=[
	{
	"model_name": "chat",
	"litellm_params": {
	"model": os.environ.get("LLM_MODEL"),
	"api_key": os.environ.get("LLM_API_KEY"),
	"rpm": 15,
	"max_parallel_requests": 4,
	"allowed_fails": 1,
	"cooldown_time": 60,
	"max_retries": 10,
	}
	}
	], num_retries=10, retry_after=30)

	# Jinja2 environment to load prompt templates
	prompt_env = Environment(loader=FileSystemLoader(
	'prompts'), enable_async=True, undefined=StrictUndefined)

	api = FastAPI(docs_url="/")


	@api.post("/categorize_requirements")
	async def categorize_reqs(params: ReqGroupingRequest) -> ReqGroupingResponse:
	"""Categorize the given service requirements into categories"""

	MAX_ATTEMPTS = 5

	categories: list[_ReqGroupingCategory] = []
	messages = []

	# categorize the requirements using their indices
	req_prompt = await prompt_env.get_template("classify.txt").render_async(**{
	"requirements": [rq.model_dump() for rq in params.requirements],
	"max_n_categories": params.max_n_categories,
	"response_schema": _ReqGroupingOutput.model_json_schema()})

	# add system prompt with requirements
	messages.append({"role": "user", "content": req_prompt})

	# ensure all requirements items are processed
	for attempt in range(MAX_ATTEMPTS):
	req_completion = await llm_router.acompletion(model="chat", messages=messages, response_format=_ReqGroupingOutput)
	output = _ReqGroupingOutput.model_validate_json(
	req_completion.choices[0].message.content)

	# quick check to ensure no requirement was left out by the LLM by checking all IDs are contained in at least a single category
	valid_ids_universe = set(range(0, len(params.requirements)))
	assigned_ids = {
	req_id for cat in output.categories for req_id in cat.items}

	# keep only non-hallucinated, valid assigned ids
	valid_assigned_ids = assigned_ids.intersection(valid_ids_universe)

	# check for remaining requirements assigned to none of the categories
	unassigned_ids = valid_ids_universe - valid_assigned_ids

	if len(unassigned_ids) == 0:
	categories.extend(output.categories)
	break
	else:
	messages.append(req_completion.choices[0].message)
	messages.append(
	{"role": "user", "content": f"You haven't categorized the following requirements in at least one category {unassigned_ids}. Please do so."})

	if attempt == MAX_ATTEMPTS - 1:
	raise Exception("Failed to classify all requirements")

	# build the final category objects
	# remove the invalid (likely hallucinated) requirement IDs
	final_categories = []
	for idx, cat in enumerate(output.categories):
	final_categories.append(ReqGroupingCategory(
	id=idx,
	title=cat.title,
	requirements=[params.requirements[i]
	for i in cat.items if i < len(params.requirements)]
	))

	return ReqGroupingResponse(categories=final_categories)


	@api.post("/criticize_solution", response_model=CritiqueResponse)
	async def criticize_solution(params: CriticizeSolutionsRequest) -> CritiqueResponse:
	"""Criticize the challenges, weaknesses and limitations of the provided solutions."""

	async def __criticize_single(solution: SolutionModel):
	req_prompt = await prompt_env.get_template("criticize.txt").render_async(**{
	"solutions": [solution.model_dump()],
	"response_schema": _SolutionCriticismOutput.model_json_schema()
	})

	req_completion = await llm_router.acompletion(
	model="chat",
	messages=[{"role": "user", "content": req_prompt}],
	response_format=_SolutionCriticismOutput
	)

	criticism_out = _SolutionCriticismOutput.model_validate_json(
	req_completion.choices[0].message.content
	)

	return SolutionCriticism(solution=solution, criticism=criticism_out.criticisms[0])

	critiques = await asyncio.gather(*[__criticize_single(sol) for sol in params.solutions], return_exceptions=False)
	return CritiqueResponse(critiques=critiques)


	@api.post("/search_solutions_gemini", response_model=SolutionSearchResponse)
	async def search_solutions(params: ReqGroupingResponse) -> SolutionSearchResponse:
	"""Searches solutions using Gemini and grounded on google search"""

	async def _search_inner(cat: ReqGroupingCategory) -> SolutionModel:
	# ================== generate the solution with web grounding
	req_prompt = await prompt_env.get_template("search_solution.txt").render_async(**{
	"category": cat.model_dump(),
	})

	# generate the completion in non-structured mode.
	# the googleSearch tool enables grounding gemini with google search
	# this also forces gemini to perform a tool call
	req_completion = await llm_router.acompletion(model="chat", messages=[
	{"role": "user", "content": req_prompt}
	], tools=[{"googleSearch": {}}], tool_choice="required")

	# ==================== structure the solution as a json ===================================

	structured_prompt = await prompt_env.get_template("structure_solution.txt").render_async(**{
	"solution": req_completion.choices[0].message.content,
	"response_schema": _SearchedSolutionModel.model_json_schema()
	})

	structured_completion = await llm_router.acompletion(model="chat", messages=[
	{"role": "user", "content": structured_prompt}
	], response_format=_SearchedSolutionModel)
	solution_model = _SearchedSolutionModel.model_validate_json(
	structured_completion.choices[0].message.content)

	# ======================== build the final solution object ================================

	# extract the source metadata from the search items
	sources_metadata = [
	f'{a["web"]["title"]} - {a["web"]["uri"]}' for a in req_completion["vertex_ai_grounding_metadata"][0]['groundingChunks']]

	final_sol = SolutionModel(
	Context="",
	Requirements=[
	cat.requirements[i].requirement for i in solution_model.requirement_ids
	],
	Problem_Description=solution_model.problem_description,
	Solution_Description=solution_model.solution_description,
	References=sources_metadata,
	Category_Id=cat.id,
	)
	return final_sol

	solutions = await asyncio.gather(*[_search_inner(cat) for cat in params.categories], return_exceptions=True)
	logging.info(solutions)
	final_solutions = [
	sol for sol in solutions if not isinstance(sol, Exception)]

	return SolutionSearchResponse(solutions=final_solutions)


	uvicorn.run(api, host="0.0.0.0", port=8000)