Spaces:

Game4all
/

reqroup

Running

App Files Files Community

Lucas ARRIESSE commited on 8 days ago

Commit

41c1aed

0 Parent(s):

Initial commit

Browse files

Files changed (7) hide show

.gitignore +2 -0
Dockerfile +10 -0
README.md +9 -0
app.py +88 -0
prompts/classify.txt +23 -0
requirements.txt +51 -0
schemas.py +44 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .venv
2	+ __pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY . .
+EXPOSE 8000
+RUN pip3 install -r requirements.txt
+ENTRYPOINT ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+title: Reqroup
+emoji: 🤖
+colorFrom: purple
+colorTo: blue
+sdk: docker
+pinned: false
+short_description: Categorize service requirements into groups (using AI)
+---

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import logging
+import os
+import sys
+import uvicorn
+from fastapi import FastAPI
+from schemas import RequirementInfo, ReqGroupingCategory, ReqGroupingResponse, ReqGroupingRequest, _ReqGroupingCategory, _ReqGroupingOutput
+from jinja2 import Environment, FileSystemLoader
+from litellm import acompletion
+from dotenv import load_dotenv
+logging.basicConfig(
+    level=logging.INFO,
+    format='[%(asctime)s][%(levelname)s][%(filename)s:%(lineno)d]: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+# Load .env files
+load_dotenv()
+if "LLM_API_MODEL" not in os.environ or "LLM_API_KEY" not in os.environ:
+    logging.error(
+        "No LLM token (`LLM_API_TOKEN`) and/or LLM model (`LLM_API_KEY`) were provided in the env vars. Exiting")
+    sys.exit(-1)
+LLM_API_MODEL = os.environ.get("LLM_API_MODEL")
+LLM_API_KEY = os.environ.get("LLM_API_KEY")
+# Jinja2 environment to load prompt templates
+prompt_env = Environment(loader=FileSystemLoader('prompts'), enable_async=True)
+fastapi = FastAPI()
+@fastapi.post("/categorize_requirements")
+async def categorize_reqs(params: ReqGroupingRequest) -> ReqGroupingResponse:
+    """Categorize the given service requirements into categories"""
+    MAX_ATTEMPTS = 5
+    categories: list[_ReqGroupingCategory] = []
+    messages = []
+    # categorize the requirements using their indices
+    req_prompt = await prompt_env.get_template("classify.txt").render_async(**{
+        "requirements": [rq.model_dump() for rq in params.requirements],
+        "max_n_categories": params.max_n_categories,
+        "response_schema": _ReqGroupingOutput.model_json_schema()})
+    # add system prompt with requirements
+    messages.append({"role": "user", "content": req_prompt})
+    # ensure all requirements items are processed
+    for attempt in range(MAX_ATTEMPTS):
+        req_completion = await acompletion(model=LLM_API_MODEL, api_key=LLM_API_KEY, messages=messages, response_format=_ReqGroupingOutput)
+        output = _ReqGroupingOutput.model_validate_json(
+            req_completion.choices[0].message.content)
+        # quick check to ensure no requirement was left out by the LLM by checking all IDs are contained in at least a single category
+        assigned_ids = {
+            req_id for cat in output.categories for req_id in cat.items}
+        unassigned_ids = set(range(1, len(params.requirements))) - assigned_ids
+        if len(unassigned_ids) == 0:
+            categories.extend(output.categories)
+            break
+        else:
+            messages.append(req_completion.choices[0].message)
+            messages.append(
+                {"role": "user", "content": f"You haven't categorized the following requirements in at least one category {unassigned_ids}. Please do so."})
+            if attempt == MAX_ATTEMPTS - 1:
+                raise Exception("Failed to classify all requirements")
+    # build the final category objects
+    # remove the invalid (likely hallucinated) requirement IDs
+    final_categories = []
+    for idx, cat in enumerate(output.categories):
+        final_categories.append(ReqGroupingCategory(
+            id=idx,
+            title=cat.title,
+            requirements=[params.requirements[i]
+                          for i in cat.items if i < len(params.requirements)]
+        ))
+    return ReqGroupingResponse(categories=final_categories)
+uvicorn.run(fastapi, host="0.0.0.0", port=8000)

prompts/classify.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+<role>You are an useful assistant who excels at categorizing technical extracted requirements</role>
+<task>You are tasked with classifying each element of a list of technical requirements into categories which you may arbitrarily define.
+For each category indicate which requirements belong in that category using their ID. An item may appear in one category at a time.
+Please make each category title indicative of whats in it.
+</task>
+{% if max_n_categories is none %}
+<number_of_categories>You may have at most as much categories as you think is needed</number_of_categories>
+{% else %}
+<number_of_categories>You may have at most {{max_n_categories}} categories</number_of_categories>
+{%endif%}
+Here are the requirements:
+<requirements>
+{% for req in requirements %}
+- {{ loop.index }}. {{ req["requirement"] }}
+{% endfor %}
+</requirements>
+<response_format>
+Reply in JSON using the following format:
+{{response_schema}}
+</response_format>

requirements.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+aiohappyeyeballs==2.6.1
+aiohttp==3.12.13
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+certifi==2025.6.15
+charset-normalizer==3.4.2
+click==8.2.1
+distro==1.9.0
+dotenv==0.9.9
+fastapi==0.115.12
+filelock==3.18.0
+frozenlist==1.7.0
+fsspec==2025.5.1
+h11==0.16.0
+hf-xet==1.1.4
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.33.0
+idna==3.10
+importlib_metadata==8.7.0
+Jinja2==3.1.6
+jiter==0.10.0
+jsonschema==4.24.0
+jsonschema-specifications==2025.4.1
+litellm==1.72.6
+MarkupSafe==3.0.2
+multidict==6.4.4
+openai==1.88.0
+packaging==25.0
+propcache==0.3.2
+pydantic==2.11.7
+pydantic_core==2.33.2
+python-dotenv==1.1.0
+PyYAML==6.0.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.4
+rpds-py==0.25.1
+sniffio==1.3.1
+starlette==0.46.2
+tiktoken==0.9.0
+tokenizers==0.21.1
+tqdm==4.67.1
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+urllib3==2.4.0
+uvicorn==0.34.3
+yarl==1.20.1
+zipp==3.23.0

schemas.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional
+# Shared model schemas
+class RequirementInfo(BaseModel):
+    """Represents an extracted requirement info"""
+    context: str = Field(..., description="Context for the requirement.")
+    requirement: str = Field(..., description="The requirement itself.")
+    document: str = Field(...,
+                          description="The document the requirement is extracted from.")
+class ReqGroupingCategory(BaseModel):
+    """Represents the category of requirements grouped together"""
+    id: int = Field(..., description="ID of the grouping category")
+    title: str = Field(..., description="Title given to the grouping category")
+    requirements: List[RequirementInfo] = Field(
+        ..., description="List of grouped requirements")
+# Endpoint model schemas
+class ReqGroupingRequest(BaseModel):
+    """Request schema of a requirement grouping call."""
+    requirements: list[RequirementInfo]
+    max_n_categories: Optional[int] = Field(
+        default=None, description="Max number of categories to construct. Defaults to None")
+class ReqGroupingResponse(BaseModel):
+    """Response of a requirement grouping call."""
+    categories: List[ReqGroupingCategory]
+# INFO: keep in sync with prompt
+class _ReqGroupingCategory(BaseModel):
+    title: str = Field(..., description="Title given to the grouping category")
+    items: list[int] = Field(
+        ..., description="List of the IDs of the requirements belonging to the category.")
+class _ReqGroupingOutput(BaseModel):
+    categories: list[_ReqGroupingCategory] = Field(
+        ..., description="List of grouping categories")