llm

Sleeping

App Files Files Community

Chris4K commited on Jan 12

Commit

e904f34

verified ·

1 Parent(s): 68a1536

Create faq_service.py

Browse files

Files changed (1) hide show

services/faq_service.py +91 -0

services/faq_service.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# services/faq_service.py
+from typing import List, Dict, Any, Optional
+import aiohttp
+from bs4 import BeautifulSoup
+import faiss
+import logging
+from config.config import settings
+logger = logging.getLogger(__name__)
+class FAQService:
+    def __init__(self, model_service):
+        self.embedder = model_service.embedder
+        self.faiss_index = None
+        self.faq_data = []
+    async def fetch_faq_pages(self) -> List[Dict[str, Any]]:
+        async with aiohttp.ClientSession() as session:
+            try:
+                async with session.get(f"{settings.FAQ_ROOT_URL}sitemap.xml", timeout=settings.TIMEOUT) as response:
+                    if response.status == 200:
+                        sitemap = await response.text()
+                        soup = BeautifulSoup(sitemap, 'xml')
+                        faq_urls = [loc.text for loc in soup.find_all('loc') if "/faq/" in loc.text]
+                        tasks = [self.fetch_faq_content(url, session) for url in faq_urls]
+                        return await asyncio.gather(*tasks)
+            except Exception as e:
+                logger.error(f"Error fetching FAQ sitemap: {e}")
+                return []
+    async def fetch_faq_content(self, url: str, session: aiohttp.ClientSession) -> Optional[Dict[str, Any]]:
+        try:
+            async with session.get(url, timeout=settings.TIMEOUT) as response:
+                if response.status == 200:
+                    content = await response.text()
+                    soup = BeautifulSoup(content, 'html.parser')
+                    faq_title = soup.find('h1').text.strip() if soup.find('h1') else "Unknown Title"
+                    faqs = []
+                    sections = soup.find_all(['div', 'section'], class_=['faq-item', 'faq-section'])
+                    for section in sections:
+                        question = section.find(['h2', 'h3']).text.strip() if section.find(['h2', 'h3']) else None
+                        answer = section.find(['p']).text.strip() if section.find(['p']) else None
+                        if question and answer:
+                            faqs.append({"question": question, "answer": answer})
+                    return {"url": url, "title": faq_title, "faqs": faqs}
+        except Exception as e:
+            logger.error(f"Error fetching FAQ content from {url}: {e}")
+            return None
+    async def index_faqs(self):
+        faq_pages = await self.fetch_faq_pages()
+        faq_pages = [page for page in faq_pages if page]
+        self.faq_data = []
+        all_texts = []
+        for faq_page in faq_pages:
+            for item in faq_page['faqs']:
+                combined_text = f"{item['question']} {item['answer']}"
+                all_texts.append(combined_text)
+                self.faq_data.append({
+                    "question": item['question'],
+                    "answer": item['answer'],
+                    "source": faq_page['url']
+                })
+        embeddings = self.embedder.encode(all_texts, convert_to_tensor=True).cpu().detach().numpy()
+        dimension = embeddings.shape[1]
+        self.faiss_index = faiss.IndexFlatL2(dimension)
+        self.faiss_index.add(embeddings)
+    async def search_faqs(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
+        if not self.faiss_index:
+            await self.index_faqs()
+        query_embedding = self.embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()
+        distances, indices = self.faiss_index.search(query_embedding, top_k)
+        results = []
+        for i, idx in enumerate(indices[0]):
+            if idx < len(self.faq_data):
+                result = self.faq_data[idx].copy()
+                result["score"] = float(distances[0][i])
+                results.append(result)
+        return results