Spaces:

rande
/

TagaCare

Sleeping

App Files Files Community

rande commited on Nov 27, 2023

Commit

c55d69f

1 Parent(s): e7b4cf7

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -35

app.py CHANGED Viewed

@@ -1,15 +1,60 @@
 import streamlit as st
 import tl_calamancy_lg
 import os
-import os
 import pandas as pd
 import json
-nlp = tl_calamancy_lg.load()
 def get_most_similar_tag(user_query, dataframe):
     # Process user query and existing queries with spaCy
-    all_queries = list(dataframe['query']) + [user_query]
     processed_queries = [nlp(query) for query in all_queries]
     # Get word vectors for each query
@@ -29,36 +74,6 @@ def get_most_similar_tag(user_query, dataframe):
     # Return the most similar tag and its similarity score
     return most_similar_tag, user_similarity_scores[most_similar_index]
-# Get the absolute path of the script directory
-cwd = os.getcwd()
-# Read the CSV file
-file_path = os.path.join(cwd, "dataset_v2.json")
-# Open the JSON file
-with open(file_path, 'r') as file:
-    # Load the JSON data
-    data = json.load(file)
-# Extract patterns and responses into separate lists
-patterns_data = []
-responses_data = []
-for intent in data["intents"]:
-    tag = intent["tag"]
-    patterns = intent.get("patterns", [])
-    responses = intent.get("responses", [])
-    for pattern in patterns:
-        patterns_data.append({"tag": tag, "pattern": pattern})
-    for response in responses:
-        responses_data.append({"tag": tag, "response": response})
-# Create DataFrames
-patterns_df = pd.DataFrame(patterns_data)
-responses_df = pd.DataFrame(responses_data)
 def main():
     # StreamLit Title
@@ -68,7 +83,8 @@ def main():
     st.success(doc1)
-    returned_tag, returned_score = get_most_similar_tag("Anong lunas sa masakit ang likod", patterns_data_data)
     st.success(returned_tag + str(returned_score))

 import streamlit as st
 import tl_calamancy_lg
 import os
 import pandas as pd
 import json
+from sklearn.metrics.pairwise import cosine_similarity
+# Define the cache decorator for loading the spaCy model
+@st.cache_resource(allow_output_mutation=True)
+def load_nlp_model():
+    return tl_calamancy_lg.load()
+# Load the spaCy model using the cached function
+nlp = load_nlp_model()
+# Define the cache decorator for loading the DataFrame
+@st.cache_data
+def load_data(file_path):
+    # Open the JSON file
+    with open(file_path, 'r') as file:
+        # Load the JSON data
+        data = json.load(file)
+    # Extract patterns and responses into separate lists
+    patterns_data = []
+    responses_data = []
+    for intent in data["intents"]:
+        tag = intent["tag"]
+        patterns = intent.get("patterns", [])
+        responses = intent.get("responses", [])
+        for pattern in patterns:
+            patterns_data.append({"tag": tag, "pattern": pattern})
+        for response in responses:
+            responses_data.append({"tag": tag, "response": response})
+    # Create and return DataFrames
+    patterns_df = pd.DataFrame(patterns_data)
+    responses_df = pd.DataFrame(responses_data)
+    return patterns_df, responses_df
+# Get the absolute path of the script directory
+cwd = os.getcwd()
+# Read the CSV file
+file_path = os.path.join(cwd, "dataset_v2.json")
+# Load the DataFrames using the cached function
+patterns_df, responses_df = load_data(file_path)
+# Define the cache decorator for the similarity function
+@st.cache_data
 def get_most_similar_tag(user_query, dataframe):
     # Process user query and existing queries with spaCy
+    all_queries = list(dataframe['pattern']) + [user_query]
     processed_queries = [nlp(query) for query in all_queries]
     # Get word vectors for each query
     # Return the most similar tag and its similarity score
     return most_similar_tag, user_similarity_scores[most_similar_index]
 def main():
     # StreamLit Title
     st.success(doc1)
+    # Use the cached function to get the most similar tag
+    returned_tag, returned_score = get_most_similar_tag("Anong lunas sa masakit ang likod", patterns_df)
     st.success(returned_tag + str(returned_score))