Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- app.py +46 -0
- data.py +2 -0
- get_hadiths.py +67 -0
- pickle_ebd.pkl +3 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from get_hadiths import HadithSearch
|
3 |
+
import os
|
4 |
+
from functools import partial
|
5 |
+
|
6 |
+
hadith_search = HadithSearch(api_key=os.environ.get("apk"))
|
7 |
+
|
8 |
+
title = "Smart Hadith"
|
9 |
+
desc = "### This is a simple AI tool. Write your problem or situation (personal or not) and quickly find relevant hadiths on this topic or a problem you have. Just type in plain English how you feel. Contact suggestions/questions: [email protected]\n\n"
|
10 |
+
|
11 |
+
|
12 |
+
warning = "Warning!\n **PLEASE READ THE DISCLAIMER BELOW** This isn't a 100% accurate tool; not all Hadiths are present in the database, and some results might be repetitive. If that's the case, try generating more Hadiths with the selector.\nMore informations describing how the tool works are coming soon\n\n## DISCLAIMER\n\nTHIS TOOL IS INTENDED FOR REFERENCE PURPOSES ONLY AND IS NOT INTENDED TO BE TAKEN AS RELIGIOUS ADVICE. THE HADITHS DISPLAYED BY THIS TOOL ARE NOT INTENDED TO BE USED AS A SOLE SOURCE OF RELIGIOUS GUIDANCE. USERS ARE RESPONSIBLE FOR CONDUCTING THEIR OWN RESEARCH AND SEEKING GUIDANCE FROM RELIGIOUS SCHOLARS.\n\nPLEASE NOTE THAT THE CONTENT DISPLAYED BY THIS TOOL IS NOT GUARANTEED TO BE ACCURATE, COMPLETE, OR UP-TO-DATE.\n\nTHE DEVELOPERS OF THIS TOOL WILL NOT BE HELD RESPONSIBLE FOR ANY DECISIONS MADE BY THE USERS OF THIS TOOL THAT ARE BASED ON THE CONTENT DISPLAYED BY THIS TOOL.\n\nHadiths gathered from this repository: https:\/\/www.kaggle.com\/datasets\/fahd09\/hadith-dataset"
|
13 |
+
|
14 |
+
def update_results(text_area, number_to_display=10):
|
15 |
+
results = hadith_search.search_hadiths(text_area, number_to_display)
|
16 |
+
return results
|
17 |
+
|
18 |
+
|
19 |
+
with gr.Blocks(title=title) as demo:
|
20 |
+
gr.Markdown(f"## {title}")
|
21 |
+
gr.Markdown(desc)
|
22 |
+
|
23 |
+
with gr.Accordion("READ BEFORE USING THE TOOL!", open=False):
|
24 |
+
gr.Markdown(warning)
|
25 |
+
|
26 |
+
with gr.Row():
|
27 |
+
with gr.Column(scale=4):
|
28 |
+
text_area = gr.Textbox(placeholder="I heard it was advised to drink water in three segments", lines=3, label="Describe your situation/problem with your own words (story, keywords, topics, examples etc...)")
|
29 |
+
with gr.Column(scale=1):
|
30 |
+
number_to_display = gr.Number(value=10,label = "Number of hadiths to display")
|
31 |
+
submit_button = gr.Button(value="Find hadiths relevant to my story")
|
32 |
+
pass
|
33 |
+
|
34 |
+
fn = partial(update_results)
|
35 |
+
|
36 |
+
with gr.Accordion("Relevant Hadiths (Rerun if results are not satisfying"):
|
37 |
+
response_display = gr.Markdown("Empty")
|
38 |
+
|
39 |
+
submit_button.click(fn=fn, inputs=[text_area,number_to_display], outputs=[response_display])
|
40 |
+
demo.launch(max_threads=40)
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
data.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
data = pd.read_pickle("pickle_ebd.pkl")
|
get_hadiths.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
from openai import OpenAI
|
5 |
+
from data import data as df
|
6 |
+
|
7 |
+
class HadithSearch:
|
8 |
+
def __init__(self, api_key):
|
9 |
+
self.client = OpenAI(api_key=api_key)
|
10 |
+
self.data = df
|
11 |
+
|
12 |
+
def _cosine_similarity(self, a, b):
|
13 |
+
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
14 |
+
|
15 |
+
def _get_embedding(self, text, model="text-embedding-ada-002"):
|
16 |
+
try:
|
17 |
+
text = text.replace("\n", " ")
|
18 |
+
except Exception as e:
|
19 |
+
pass
|
20 |
+
response = self.client.chat.completions.create(
|
21 |
+
model="gpt-3.5-turbo",
|
22 |
+
messages=[
|
23 |
+
{
|
24 |
+
"role": "system",
|
25 |
+
"content": "Your task is to transform a described situation into a list of the top 3 most important things to look for in a database of Islamic hadith that could be helpful to bring answers. \n\nIt should be very specific and formatted with only the list and remove all occurences of the word 'Hadiths', just the topics sought. JSON FORMAT!\n\nThe goal is to use this list to perform cosine similarity embedding search on the hadith database."
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"role": "user",
|
29 |
+
"content": text
|
30 |
+
}
|
31 |
+
],
|
32 |
+
temperature=1,
|
33 |
+
max_tokens=684,
|
34 |
+
top_p=1,
|
35 |
+
frequency_penalty=0,
|
36 |
+
presence_penalty=0
|
37 |
+
).choices[0].message.content
|
38 |
+
return self.client.embeddings.create(input=f"{response}", model=model).data[0].embedding
|
39 |
+
|
40 |
+
def search_hadiths(self, user_input, num_hadiths=10):
|
41 |
+
if self.data is None:
|
42 |
+
raise ValueError("Data not loaded.")
|
43 |
+
|
44 |
+
embedding_column_name = "embeddings"
|
45 |
+
try:
|
46 |
+
self.data[embedding_column_name] = self.data.embeddings.apply(lambda x: x["embeding"])
|
47 |
+
except Exception as e:
|
48 |
+
pass
|
49 |
+
|
50 |
+
embedding = self._get_embedding(user_input, model='text-embedding-ada-002')
|
51 |
+
self.data['similarities'] = self.data.embeddings.apply(lambda x: self._cosine_similarity(x, embedding))
|
52 |
+
|
53 |
+
results = self.data.sort_values('similarities', ascending=False).head(int(num_hadiths)).to_dict(orient="records")
|
54 |
+
formatted_results = self._format_results(results)
|
55 |
+
return formatted_results
|
56 |
+
|
57 |
+
def _format_results(self, results):
|
58 |
+
for r in results:
|
59 |
+
formatted_output = ""
|
60 |
+
formatted_output += "### Source: " + str(r["source"]) + " | Chapter name : "+ str(r["chapter"]) +" | Chapter number: " + str(r["chapter_no"]) + " | Hadith number : " + str(r["chapter_no"]) + "\n\n"
|
61 |
+
formatted_output += "Similarity with query: " + str(round(r["similarities"]*100,2)) + "%" +" | Chain index: " + str(r["chain_indx"]) + "\n\n"
|
62 |
+
formatted_output += "### Hadith content:" + "\n\n" + str(r["text_en"]) + "\n\n"
|
63 |
+
formatted_output += "Arabic version: \n\n" + str(r["text_ar"])
|
64 |
+
formatted_output += "\n\n-----------------------------------------------------------------------------------------------------\n\n"
|
65 |
+
formatted_output = formatted_output.replace("`", "")
|
66 |
+
return formatted_output
|
67 |
+
|
pickle_ebd.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0d207391f0488a92360e7f0365c51b0888a5281220d05602982e2027773f92b
|
3 |
+
size 502886347
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
gradio
|
3 |
+
pandas
|
4 |
+
numpy
|