Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,60 @@
|
|
1 |
import streamlit as st
|
2 |
import tl_calamancy_lg
|
3 |
import os
|
4 |
-
import os
|
5 |
import pandas as pd
|
6 |
import json
|
|
|
7 |
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def get_most_similar_tag(user_query, dataframe):
|
11 |
# Process user query and existing queries with spaCy
|
12 |
-
all_queries = list(dataframe['
|
13 |
processed_queries = [nlp(query) for query in all_queries]
|
14 |
|
15 |
# Get word vectors for each query
|
@@ -29,36 +74,6 @@ def get_most_similar_tag(user_query, dataframe):
|
|
29 |
|
30 |
# Return the most similar tag and its similarity score
|
31 |
return most_similar_tag, user_similarity_scores[most_similar_index]
|
32 |
-
|
33 |
-
# Get the absolute path of the script directory
|
34 |
-
cwd = os.getcwd()
|
35 |
-
|
36 |
-
# Read the CSV file
|
37 |
-
file_path = os.path.join(cwd, "dataset_v2.json")
|
38 |
-
|
39 |
-
# Open the JSON file
|
40 |
-
with open(file_path, 'r') as file:
|
41 |
-
# Load the JSON data
|
42 |
-
data = json.load(file)
|
43 |
-
|
44 |
-
# Extract patterns and responses into separate lists
|
45 |
-
patterns_data = []
|
46 |
-
responses_data = []
|
47 |
-
|
48 |
-
for intent in data["intents"]:
|
49 |
-
tag = intent["tag"]
|
50 |
-
patterns = intent.get("patterns", [])
|
51 |
-
responses = intent.get("responses", [])
|
52 |
-
|
53 |
-
for pattern in patterns:
|
54 |
-
patterns_data.append({"tag": tag, "pattern": pattern})
|
55 |
-
|
56 |
-
for response in responses:
|
57 |
-
responses_data.append({"tag": tag, "response": response})
|
58 |
-
|
59 |
-
# Create DataFrames
|
60 |
-
patterns_df = pd.DataFrame(patterns_data)
|
61 |
-
responses_df = pd.DataFrame(responses_data)
|
62 |
|
63 |
def main():
|
64 |
# StreamLit Title
|
@@ -68,7 +83,8 @@ def main():
|
|
68 |
|
69 |
st.success(doc1)
|
70 |
|
71 |
-
|
|
|
72 |
|
73 |
st.success(returned_tag + str(returned_score))
|
74 |
|
|
|
1 |
import streamlit as st
|
2 |
import tl_calamancy_lg
|
3 |
import os
|
|
|
4 |
import pandas as pd
|
5 |
import json
|
6 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
+
# Define the cache decorator for loading the spaCy model
|
9 |
+
@st.cache_resource(allow_output_mutation=True)
|
10 |
+
def load_nlp_model():
|
11 |
+
return tl_calamancy_lg.load()
|
12 |
+
|
13 |
+
# Load the spaCy model using the cached function
|
14 |
+
nlp = load_nlp_model()
|
15 |
+
|
16 |
+
# Define the cache decorator for loading the DataFrame
|
17 |
+
@st.cache_data
|
18 |
+
def load_data(file_path):
|
19 |
+
# Open the JSON file
|
20 |
+
with open(file_path, 'r') as file:
|
21 |
+
# Load the JSON data
|
22 |
+
data = json.load(file)
|
23 |
+
|
24 |
+
# Extract patterns and responses into separate lists
|
25 |
+
patterns_data = []
|
26 |
+
responses_data = []
|
27 |
+
|
28 |
+
for intent in data["intents"]:
|
29 |
+
tag = intent["tag"]
|
30 |
+
patterns = intent.get("patterns", [])
|
31 |
+
responses = intent.get("responses", [])
|
32 |
+
|
33 |
+
for pattern in patterns:
|
34 |
+
patterns_data.append({"tag": tag, "pattern": pattern})
|
35 |
+
|
36 |
+
for response in responses:
|
37 |
+
responses_data.append({"tag": tag, "response": response})
|
38 |
+
|
39 |
+
# Create and return DataFrames
|
40 |
+
patterns_df = pd.DataFrame(patterns_data)
|
41 |
+
responses_df = pd.DataFrame(responses_data)
|
42 |
+
return patterns_df, responses_df
|
43 |
+
|
44 |
+
# Get the absolute path of the script directory
|
45 |
+
cwd = os.getcwd()
|
46 |
+
|
47 |
+
# Read the CSV file
|
48 |
+
file_path = os.path.join(cwd, "dataset_v2.json")
|
49 |
+
|
50 |
+
# Load the DataFrames using the cached function
|
51 |
+
patterns_df, responses_df = load_data(file_path)
|
52 |
+
|
53 |
+
# Define the cache decorator for the similarity function
|
54 |
+
@st.cache_data
|
55 |
def get_most_similar_tag(user_query, dataframe):
|
56 |
# Process user query and existing queries with spaCy
|
57 |
+
all_queries = list(dataframe['pattern']) + [user_query]
|
58 |
processed_queries = [nlp(query) for query in all_queries]
|
59 |
|
60 |
# Get word vectors for each query
|
|
|
74 |
|
75 |
# Return the most similar tag and its similarity score
|
76 |
return most_similar_tag, user_similarity_scores[most_similar_index]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
def main():
|
79 |
# StreamLit Title
|
|
|
83 |
|
84 |
st.success(doc1)
|
85 |
|
86 |
+
# Use the cached function to get the most similar tag
|
87 |
+
returned_tag, returned_score = get_most_similar_tag("Anong lunas sa masakit ang likod", patterns_df)
|
88 |
|
89 |
st.success(returned_tag + str(returned_score))
|
90 |
|