Spaces:

OrganizedProgrammers
/

3GPPDocFinder

Running

App Files Files Community

om4r932 commited on 11 days ago

Commit

bb31bf3

1 Parent(s): 894fb85

Remove RAG functionality

Browse files

Files changed (1) hide show

app.py +1 -89

app.py CHANGED Viewed

@@ -626,92 +626,4 @@ def find_documents_batch(request: BatchDocRequest):
         results=results,
         missing=missing,
         search_time=time.time() - start_time
-    )
-def generate_keywords_from_rag_query(question: str):
-    llm = openai.OpenAI(
-        api_key=os.environ.get("GROQ_API_KEY"),
-        base_url="https://api.groq.com/openai/v1",
-        http_client=httpx.Client(verify=False)
-    )
-    system_prompt = """
-        You are a keyword extraction assistant specialized in technical documentation and knowledge retrieval.
-        Your task is to convert a natural language question into a concise set of search-friendly keywords that combine technical terms, abbreviations, and general descriptors.
-        Focus on terminology used in standards, technical specifications, or protocol documentation. Avoid full sentences, keep it short and focused.
-        Return the result as a single string, suitable for use in vector search or RAG pipelines.
-        Input (example):
-        "Explain the procedure for network slice selection"
-        Output:
-        "NSSF network slice selection"
-    """
-    messages = [{
-        "role": "system",
-        "content": system_prompt
-    }, {
-        "role": "user",
-        "content": f"Now process the following input: {question}"
-    }]
-    response = llm.chat.completions.create(messages=messages, model="llama-3.3-70b-versatile")
-    return response.choices[0].message.content
-class RAGRequest(BaseModel):
-    question: str
-    threshold: int
-    release: Optional[str] = None
-    working_group: Optional[str] = None
-    spec_type: Optional[Literal["TS", "TR"]] = None
-@app.post("/list-rag-docs")
-def get_docs_for_rag(req: RAGRequest):
-    keywords = generate_keywords_from_rag_query(req.question)
-    print(keywords)
-    doc_data = finder_spec.indexer_documents
-    unique_specs = []
-    documents = {}
-    results = search_spec_bm25(KeywordRequest2(keywords=keywords, threshold=req.threshold, release=req.release, working_group=req.working_group, spec_type=req.spec_type))
-    for result in results.results:
-        if result['id'] in unique_specs: continue
-        if result['id'] not in unique_specs:
-            unique_specs.append(result['id'])
-        content = dict(doc_data[result['id']])
-        content_bak = dict(doc_data[result['id']])
-        if isinstance(content, str): continue
-        for chapter in content_bak.keys():
-            if any(kw in chapter.lower() for kw in ["reference", "void"]) or any(kw in content_bak[chapter].lower() for kw in ["annex"]):
-                content.pop(chapter)
-        documents[f"{result['id']}*-*{result['title']}"] = content
-    faiss_index = faiss.IndexFlatIP(384)
-    meta = {}
-    contents = []
-    index_counter = 0
-    for spec in documents.keys():
-        for chapter, content in documents[spec].items():
-            contents.append(content)
-            meta[index_counter] = (spec.split("*-*")[0], spec.split("*-*")[1], chapter, content)
-            index_counter += 1
-    print("Done contents")
-    embedding = model.encode(contents, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=True).astype('float32')
-    embedding = embedding.reshape(-1, 384)  # Forme (1, 384)
-    print(embedding.shape)
-    faiss_index.add(embedding)
-    embedding_query = model.encode(req.question, convert_to_numpy=True, normalize_embeddings=True).astype('float32')
-    embedding_query = embedding_query.reshape(1, -1)
-    distances, indices = faiss_index.search(embedding_query, 15)
-    outputs = []
-    for i, idx in enumerate(indices[0]):
-        if idx in meta:
-            outputs.append(f"{meta[idx]}")
-    return {"output": "\n".join(outputs)}

         results=results,
         missing=missing,
         search_time=time.time() - start_time
+    )