Mohamed-Maher commited on
Commit
1c5166c
·
verified ·
1 Parent(s): 35cfe27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -65
app.py CHANGED
@@ -7,68 +7,61 @@ import nltk
7
  import gradio as gr
8
  from sklearn.metrics.pairwise import cosine_similarity
9
 
10
- import os
11
-
12
- current_path = os.getcwd()
13
-
14
- print(f"The current working directory is: {current_path}")
15
-
16
-
17
- # class HadithClassificationApp:
18
- # def __init__(self):
19
- # # Download NLTK resources if needed
20
- # nltk.download('punkt')
21
-
22
- # # Load the dataset and labels
23
- # self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
24
- # self.labels = self.dataset['Arabic_Grade']
25
-
26
- # # Load the models
27
- # with open("tfidf_vectorizer.pkl", "rb") as f:
28
- # self.vectorizer = pickle.load(f)
29
- # with open("cosine_similarity_model.pkl", "rb") as f:
30
- # self.X = pickle.load(f)
31
-
32
- # @staticmethod
33
- # def remove_tashkeel(text):
34
- # tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
35
- # return re.sub(tashkeel_pattern, '', text)
36
-
37
- # def preprocess_arabic_text(self, text):
38
- # text = self.remove_tashkeel(text)
39
- # tokens = nltk.word_tokenize(text)
40
- # cleaned_tokens = [token for token in tokens if token.isalnum()]
41
- # lowercase_tokens = [token.lower() for token in cleaned_tokens]
42
- # return " ".join(lowercase_tokens)
43
-
44
- # def predict_label(self, input_text, threshold=0.5):
45
- # input_text = self.preprocess_arabic_text(input_text)
46
- # input_vector = self.vectorizer.transform([input_text])
47
- # similarities = cosine_similarity(input_vector, self.X).flatten()
48
-
49
- # max_index = np.argmax(similarities)
50
- # max_similarity = similarities[max_index]
51
-
52
- # if max_similarity >= threshold:
53
- # return self.labels.iloc[max_index]
54
- # else:
55
- # return "No similar text found in dataset"
56
-
57
- # def classify_hadith(self, input_text):
58
- # return self.predict_label(input_text)
59
-
60
- # if __name__ == "__main__":
61
- # # Initialize the app
62
- # hadith_classification_app = HadithClassificationApp()
63
-
64
- # # Set up the Gradio interface
65
- # iface = gr.Interface(
66
- # fn=hadith_classification_app.classify_hadith,
67
- # inputs="text",
68
- # outputs="text",
69
- # title="Hadith Classification App",
70
- # description="Classify Hadith text based on pre-trained model."
71
- # )
72
-
73
- # # Launch the Gradio interface
74
- # iface.launch()
 
7
  import gradio as gr
8
  from sklearn.metrics.pairwise import cosine_similarity
9
 
10
+ class HadithClassificationApp:
11
+ def __init__(self):
12
+ # Download NLTK resources if needed
13
+ nltk.download('punkt')
14
+
15
+ # Load the dataset and labels
16
+ self.dataset = pd.read_csv("/home/user/app/Preprocess_LK_Hadith_dataset.csv")
17
+ self.labels = self.dataset['Arabic_Grade']
18
+
19
+ # Load the models
20
+ with open("tfidf_vectorizer.pkl", "rb") as f:
21
+ self.vectorizer = pickle.load(f)
22
+ with open("cosine_similarity_model.pkl", "rb") as f:
23
+ self.X = pickle.load(f)
24
+
25
+ @staticmethod
26
+ def remove_tashkeel(text):
27
+ tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
28
+ return re.sub(tashkeel_pattern, '', text)
29
+
30
+ def preprocess_arabic_text(self, text):
31
+ text = self.remove_tashkeel(text)
32
+ tokens = nltk.word_tokenize(text)
33
+ cleaned_tokens = [token for token in tokens if token.isalnum()]
34
+ lowercase_tokens = [token.lower() for token in cleaned_tokens]
35
+ return " ".join(lowercase_tokens)
36
+
37
+ def predict_label(self, input_text, threshold=0.5):
38
+ input_text = self.preprocess_arabic_text(input_text)
39
+ input_vector = self.vectorizer.transform([input_text])
40
+ similarities = cosine_similarity(input_vector, self.X).flatten()
41
+
42
+ max_index = np.argmax(similarities)
43
+ max_similarity = similarities[max_index]
44
+
45
+ if max_similarity >= threshold:
46
+ return self.labels.iloc[max_index]
47
+ else:
48
+ return "No similar text found in dataset"
49
+
50
+ def classify_hadith(self, input_text):
51
+ return self.predict_label(input_text)
52
+
53
+ if __name__ == "__main__":
54
+ # Initialize the app
55
+ hadith_classification_app = HadithClassificationApp()
56
+
57
+ # Set up the Gradio interface
58
+ iface = gr.Interface(
59
+ fn=hadith_classification_app.classify_hadith,
60
+ inputs="text",
61
+ outputs="text",
62
+ title="Hadith Classification App",
63
+ description="Classify Hadith text based on pre-trained model."
64
+ )
65
+
66
+ # Launch the Gradio interface
67
+ iface.launch()