Spaces:

brestok
/

JaveaAI

Sleeping

App Files Files Community

brestok commited on Apr 29, 2024

Commit

3d560e1

verified ·

1 Parent(s): 498d1ce

Update project/bot/openai_backend.py

Browse files

Files changed (1) hide show

project/bot/openai_backend.py +5 -4

project/bot/openai_backend.py CHANGED Viewed

@@ -77,13 +77,13 @@ class SearchBot:
     async def analyze_full_response(self) -> str:
         assistant_message = self.chat_history.pop()['content']
-        nlp = pipeline("ner", model=settings.NLP_MODEL, tokenizer=settings.NLP_TOKENIZER, grouped_entities=True)
         ner_result = nlp(assistant_message)
         analyzed_assistant_message = assistant_message
         for entity in ner_result:
             if entity['entity_group'] in ("LOC", "ORG", "MISC") and entity['word'] != "Javea":
                 enriched_information = await self.enrich_information_from_google(entity['word'])
-                analyzed_assistant_message = analyzed_assistant_message.replace(entity['word'], enriched_information)
         return "ENRICHED:" + analyzed_assistant_message
     async def _convert_to_embeddings(self, text_list):
@@ -96,7 +96,7 @@ class SearchBot:
     @staticmethod
     async def _get_context_data(user_query: list[float]) -> list[dict]:
-        radius = 4
         _, distances, indices = settings.FAISS_INDEX.range_search(user_query, radius)
         indices_distances_df = pd.DataFrame({'index': indices, 'distance': distances})
         filtered_data_df = settings.products_dataset.iloc[indices].copy()
@@ -110,7 +110,8 @@ class SearchBot:
     async def create_context_str(context: List[Dict]) -> str:
         context_str = ''
         for i, chunk in enumerate(context):
-            context_str += f'{i + 1}) {chunk["chunks"]}'
         return context_str
     async def _rag(self, context: List[Dict], query: str, session: AsyncSession, country: str):

     async def analyze_full_response(self) -> str:
         assistant_message = self.chat_history.pop()['content']
+        nlp = pipeline("ner", model=settings.NLP_MODEL, tokenizer=settings.NLP_TOKENIZER, aggregation_strategy="simple")
         ner_result = nlp(assistant_message)
         analyzed_assistant_message = assistant_message
         for entity in ner_result:
             if entity['entity_group'] in ("LOC", "ORG", "MISC") and entity['word'] != "Javea":
                 enriched_information = await self.enrich_information_from_google(entity['word'])
+                analyzed_assistant_message = analyzed_assistant_message.replace(entity['word'], enriched_information, 1)
         return "ENRICHED:" + analyzed_assistant_message
     async def _convert_to_embeddings(self, text_list):
     @staticmethod
     async def _get_context_data(user_query: list[float]) -> list[dict]:
+        radius = 5
         _, distances, indices = settings.FAISS_INDEX.range_search(user_query, radius)
         indices_distances_df = pd.DataFrame({'index': indices, 'distance': distances})
         filtered_data_df = settings.products_dataset.iloc[indices].copy()
     async def create_context_str(context: List[Dict]) -> str:
         context_str = ''
         for i, chunk in enumerate(context):
+            if "Comments:" in chunk['chunks']:
+                context_str += f'{i + 1}) {chunk["chunks"]}'
         return context_str
     async def _rag(self, context: List[Dict], query: str, session: AsyncSession, country: str):