Spaces:

tuwaiq-allam
/

Group1-News_Search

Sleeping

App Files Files Community

Suku0 commited on Aug 15, 2024

Commit

a376030

verified ·

1 Parent(s): 0eb0452

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -27

app.py CHANGED Viewed

@@ -2,12 +2,88 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import os
 import openai
 openai.api_key = os.getenv("OPENAI_API_KEY")
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
@@ -18,29 +94,8 @@ def respond(
     temperature,
     top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

 from huggingface_hub import InferenceClient
 import os
 import openai
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+embedding_model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
 openai.api_key = os.getenv("OPENAI_API_KEY")
+db_index = faiss.read_index("db_index.faiss")
+metadata_info = pd.read_csv('clean_data.csv')
+def search(query):
+    cleaned_query = query
+    query_embedding = embedding_model.encode(cleaned_query).reshape(1, -1).astype('float32')
+    D, I = db_index.search(query_embedding, k=10)
+    results = []
+    for idx in I[0]:
+        if idx < len(metadata_embeddings):
+            doc_index = idx
+            results.append({
+                'type': 'metadata',
+                'title': df.iloc[doc_index]['title'],
+                'author': df.iloc[doc_index]['author'],
+                'publish_date': df.iloc[doc_index]['publish_date'],
+                'full_text': df.iloc[doc_index]['full_text'],
+                'source': df.iloc[doc_index]['url']
+            })
+        else:
+            chunk_index = idx - len(metadata_embeddings)
+            metadata = metadata_info[chunk_index]
+            doc_index = metadata['index']
+            chunk_text = metadata['chunk']
+            results.append({
+                'type': 'content',
+                'title': df.iloc[doc_index]['title'],
+                'author': df.iloc[doc_index]['author'],
+                'publish_date': df.iloc[doc_index]['publish_date'],
+                'content': chunk_text,
+                'source': df.iloc[doc_index]['url']
+            })
+    return results
+def generate_answer(query):
+    prompt = f"""
+    Based on the following query from a user, please generate a detailed answer based on the context
+    focusing on which is the best based on the query. You should responsd as you are a news and politician expert agent and are conversing with the
+    user in a nice cordial way. If the query question is not in the context say I don't know, and always provide the url as the source of the information.
+    Remove the special characters and (/n ) , make the output clean and concise.
+    ###########
+    query:
+    "{query}"
+    ########
+    context:"
+    "{search(query)}"
+    #####
+    Return in Markdown format with each hotel highlighted.
+    """
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    response = openai.ChatCompletion.create(
+        model="gpt-4o-mini",
+        max_tokens=1500,
+        n=1,
+        stop=None,
+        temperature=0.2, #higher temperature means more creative or more hallucination
+        messages = messages
+    )
+    # Extract the generated response from the API response
+    generated_text = response.choices[0].message['content'].strip()
+    return generated_text
 def respond(
     temperature,
     top_p,
 ):
+    response = generate_answer(message)
+    yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface