Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,60 +1,61 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
import nltk
|
3 |
import numpy as np
|
4 |
import tflearn
|
5 |
-
import
|
|
|
|
|
|
|
6 |
from nltk.tokenize import word_tokenize
|
7 |
from nltk.stem.lancaster import LancasterStemmer
|
8 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
9 |
import requests
|
10 |
-
import
|
11 |
-
import
|
12 |
-
import
|
13 |
-
import pickle
|
14 |
from bs4 import BeautifulSoup
|
|
|
15 |
from selenium import webdriver
|
16 |
from selenium.webdriver.chrome.options import Options
|
17 |
import chromedriver_autoinstaller
|
|
|
18 |
|
19 |
-
# Ensure NLTK resources are downloaded
|
20 |
nltk.download('punkt')
|
21 |
|
22 |
-
#
|
23 |
-
GOOGLE_MAPS_API_KEY = os.environ.get("GOOGLE_API_KEY") # Get API key from environment variable
|
24 |
-
if not GOOGLE_MAPS_API_KEY:
|
25 |
-
raise ValueError("Error: GOOGLE_MAPS_API_KEY environment variable not set.")
|
26 |
-
|
27 |
-
url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
|
28 |
-
places_details_url = "https://maps.googleapis.com/maps/api/place/details/json"
|
29 |
-
query = "therapist OR counselor OR mental health professional OR marriage and family therapist OR psychotherapist OR psychiatrist OR psychologist OR nutritionist OR wellness doctor OR holistic practitioner OR integrative medicine OR chiropractor OR naturopath"
|
30 |
-
|
31 |
-
# Chatbot
|
32 |
stemmer = LancasterStemmer()
|
33 |
|
|
|
34 |
try:
|
35 |
with open("intents.json") as file:
|
36 |
data = json.load(file)
|
37 |
except FileNotFoundError:
|
38 |
-
raise FileNotFoundError("Error: 'intents.json' file not found.")
|
39 |
|
|
|
40 |
try:
|
41 |
-
with open("data.pickle", "rb") as
|
42 |
-
words, labels, training, output = pickle.load(
|
43 |
except FileNotFoundError:
|
44 |
-
raise FileNotFoundError("Error: 'data.pickle' file not found.")
|
45 |
|
|
|
46 |
net = tflearn.input_data(shape=[None, len(training[0])])
|
47 |
net = tflearn.fully_connected(net, 8)
|
48 |
net = tflearn.fully_connected(net, 8)
|
49 |
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")
|
|
|
50 |
|
|
|
51 |
model = tflearn.DNN(net)
|
52 |
-
|
53 |
try:
|
54 |
model.load("MentalHealthChatBotmodel.tflearn")
|
55 |
except FileNotFoundError:
|
56 |
raise FileNotFoundError("Error: Trained model file 'MentalHealthChatBotmodel.tflearn' not found.")
|
57 |
|
|
|
58 |
def bag_of_words(s, words):
|
59 |
bag = [0 for _ in range(len(words))]
|
60 |
s_words = word_tokenize(s)
|
@@ -65,71 +66,115 @@ def bag_of_words(s, words):
|
|
65 |
bag[i] = 1
|
66 |
return np.array(bag)
|
67 |
|
|
|
68 |
def chat(message, history):
|
69 |
history = history or []
|
70 |
message = message.lower()
|
|
|
71 |
try:
|
|
|
72 |
results = model.predict([bag_of_words(message, words)])
|
73 |
results_index = np.argmax(results)
|
74 |
tag = labels[results_index]
|
|
|
|
|
75 |
for tg in data["intents"]:
|
76 |
if tg['tag'] == tag:
|
77 |
responses = tg['responses']
|
78 |
response = random.choice(responses)
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
history.append((message, response))
|
83 |
-
return history, history
|
84 |
-
|
85 |
-
# Sentiment Analysis
|
86 |
-
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
|
87 |
-
model_sentiment = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
|
88 |
|
89 |
-
def analyze_sentiment(text):
|
90 |
-
try:
|
91 |
-
inputs = tokenizer(text, return_tensors="pt")
|
92 |
-
with torch.no_grad():
|
93 |
-
logits = model_sentiment(**inputs).logits
|
94 |
-
sentiment = ["Negative", "Neutral", "Positive"][torch.argmax(logits)]
|
95 |
-
return f"**Predicted Sentiment:** {sentiment}"
|
96 |
except Exception as e:
|
97 |
-
|
98 |
-
|
99 |
-
# Emotion Detection
|
100 |
-
def detect_emotion(text):
|
101 |
-
# Implement your own emotion detection logic
|
102 |
-
return "Emotion detection not implemented"
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
# Implement your own suggestion generation logic
|
107 |
-
return pd.DataFrame(columns=["Subject", "Article URL", "Video URL"])
|
108 |
|
109 |
-
# Google Places API
|
110 |
def get_places_data(query, location, radius, api_key, next_page_token=None):
|
111 |
-
params = {
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
if next_page_token:
|
113 |
params["pagetoken"] = next_page_token
|
|
|
114 |
response = requests.get(url, params=params)
|
|
|
115 |
if response.status_code == 200:
|
116 |
return response.json()
|
117 |
else:
|
118 |
-
print(f"Error: {response.status_code} - {response.text}")
|
119 |
return None
|
120 |
|
|
|
121 |
def get_place_details(place_id, api_key):
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
124 |
if response.status_code == 200:
|
125 |
details_data = response.json().get("result", {})
|
126 |
return {
|
|
|
|
|
127 |
"phone_number": details_data.get("formatted_phone_number", "Not available"),
|
128 |
"website": details_data.get("website", "Not available")
|
129 |
}
|
130 |
else:
|
131 |
return {}
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
def get_all_places(query, location, radius, api_key):
|
134 |
all_results = []
|
135 |
next_page_token = None
|
@@ -137,61 +182,138 @@ def get_all_places(query, location, radius, api_key):
|
|
137 |
data = get_places_data(query, location, radius, api_key, next_page_token)
|
138 |
if data:
|
139 |
results = data.get('results', [])
|
|
|
|
|
|
|
140 |
for place in results:
|
141 |
place_id = place.get("place_id")
|
142 |
name = place.get("name")
|
143 |
address = place.get("formatted_address")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
details = get_place_details(place_id, api_key)
|
145 |
phone_number = details.get("phone_number", "Not available")
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
next_page_token = data.get('next_page_token')
|
149 |
if not next_page_token:
|
150 |
break
|
|
|
|
|
151 |
else:
|
152 |
break
|
|
|
153 |
return all_results
|
154 |
|
155 |
-
#
|
156 |
-
def
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
else:
|
169 |
-
wellness_results = pd.DataFrame([["", "", "", ""]], columns=["Name", "Address", "Phone", "Website"])
|
170 |
else:
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
gr.
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
+
import torch
|
4 |
import nltk
|
5 |
import numpy as np
|
6 |
import tflearn
|
7 |
+
import tensorflow as tf
|
8 |
+
import random
|
9 |
+
import json
|
10 |
+
import pickle
|
11 |
from nltk.tokenize import word_tokenize
|
12 |
from nltk.stem.lancaster import LancasterStemmer
|
|
|
13 |
import requests
|
14 |
+
import csv
|
15 |
+
import time
|
16 |
+
import re
|
|
|
17 |
from bs4 import BeautifulSoup
|
18 |
+
import pandas as pd
|
19 |
from selenium import webdriver
|
20 |
from selenium.webdriver.chrome.options import Options
|
21 |
import chromedriver_autoinstaller
|
22 |
+
import os
|
23 |
|
24 |
+
# Ensure necessary NLTK resources are downloaded
|
25 |
nltk.download('punkt')
|
26 |
|
27 |
+
# Initialize the stemmer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
stemmer = LancasterStemmer()
|
29 |
|
30 |
+
# Load intents.json
|
31 |
try:
|
32 |
with open("intents.json") as file:
|
33 |
data = json.load(file)
|
34 |
except FileNotFoundError:
|
35 |
+
raise FileNotFoundError("Error: 'intents.json' file not found. Ensure it exists in the current directory.")
|
36 |
|
37 |
+
# Load preprocessed data from pickle
|
38 |
try:
|
39 |
+
with open("data.pickle", "rb") as f:
|
40 |
+
words, labels, training, output = pickle.load(f)
|
41 |
except FileNotFoundError:
|
42 |
+
raise FileNotFoundError("Error: 'data.pickle' file not found. Ensure it exists and matches the model.")
|
43 |
|
44 |
+
# Build the model structure
|
45 |
net = tflearn.input_data(shape=[None, len(training[0])])
|
46 |
net = tflearn.fully_connected(net, 8)
|
47 |
net = tflearn.fully_connected(net, 8)
|
48 |
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")
|
49 |
+
net = tflearn.regression(net)
|
50 |
|
51 |
+
# Load the trained model
|
52 |
model = tflearn.DNN(net)
|
|
|
53 |
try:
|
54 |
model.load("MentalHealthChatBotmodel.tflearn")
|
55 |
except FileNotFoundError:
|
56 |
raise FileNotFoundError("Error: Trained model file 'MentalHealthChatBotmodel.tflearn' not found.")
|
57 |
|
58 |
+
# Function to process user input into a bag-of-words format
|
59 |
def bag_of_words(s, words):
|
60 |
bag = [0 for _ in range(len(words))]
|
61 |
s_words = word_tokenize(s)
|
|
|
66 |
bag[i] = 1
|
67 |
return np.array(bag)
|
68 |
|
69 |
+
# Chat function
|
70 |
def chat(message, history):
|
71 |
history = history or []
|
72 |
message = message.lower()
|
73 |
+
|
74 |
try:
|
75 |
+
# Predict the tag
|
76 |
results = model.predict([bag_of_words(message, words)])
|
77 |
results_index = np.argmax(results)
|
78 |
tag = labels[results_index]
|
79 |
+
|
80 |
+
# Match tag with intent and choose a random response
|
81 |
for tg in data["intents"]:
|
82 |
if tg['tag'] == tag:
|
83 |
responses = tg['responses']
|
84 |
response = random.choice(responses)
|
85 |
+
break
|
86 |
+
else:
|
87 |
+
response = "I'm sorry, I didn't understand that. Could you please rephrase?"
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
except Exception as e:
|
90 |
+
response = f"An error occurred: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
+
history.append((message, response))
|
93 |
+
return history, history
|
|
|
|
|
94 |
|
95 |
+
# Function to send a request to Google Places API and fetch places data
|
96 |
def get_places_data(query, location, radius, api_key, next_page_token=None):
|
97 |
+
params = {
|
98 |
+
"query": query,
|
99 |
+
"location": location,
|
100 |
+
"radius": radius,
|
101 |
+
"key": api_key
|
102 |
+
}
|
103 |
+
|
104 |
if next_page_token:
|
105 |
params["pagetoken"] = next_page_token
|
106 |
+
|
107 |
response = requests.get(url, params=params)
|
108 |
+
|
109 |
if response.status_code == 200:
|
110 |
return response.json()
|
111 |
else:
|
|
|
112 |
return None
|
113 |
|
114 |
+
# Function to fetch detailed information for a specific place using its place_id
|
115 |
def get_place_details(place_id, api_key):
|
116 |
+
details_url = places_details_url
|
117 |
+
params = {
|
118 |
+
"place_id": place_id,
|
119 |
+
"key": api_key
|
120 |
+
}
|
121 |
+
response = requests.get(details_url, params=params)
|
122 |
+
|
123 |
if response.status_code == 200:
|
124 |
details_data = response.json().get("result", {})
|
125 |
return {
|
126 |
+
"opening_hours": details_data.get("opening_hours", {}).get("weekday_text", "Not available"),
|
127 |
+
"reviews": details_data.get("reviews", "Not available"),
|
128 |
"phone_number": details_data.get("formatted_phone_number", "Not available"),
|
129 |
"website": details_data.get("website", "Not available")
|
130 |
}
|
131 |
else:
|
132 |
return {}
|
133 |
|
134 |
+
# Scrape website URL from Google Maps results (using Selenium)
|
135 |
+
def scrape_website_from_google_maps(place_name):
|
136 |
+
chrome_options = Options()
|
137 |
+
chrome_options.add_argument("--headless")
|
138 |
+
chrome_options.add_argument("--no-sandbox")
|
139 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
140 |
+
|
141 |
+
driver = webdriver.Chrome(options=chrome_options)
|
142 |
+
search_url = f"https://www.google.com/maps/search/{place_name.replace(' ', '+')}"
|
143 |
+
driver.get(search_url)
|
144 |
+
time.sleep(5)
|
145 |
+
|
146 |
+
try:
|
147 |
+
website_element = driver.find_element_by_xpath('//a[contains(@aria-label, "Visit") and contains(@aria-label, "website")]')
|
148 |
+
website_url = website_element.get_attribute('href')
|
149 |
+
except:
|
150 |
+
website_url = "Not available"
|
151 |
+
|
152 |
+
driver.quit()
|
153 |
+
return website_url
|
154 |
+
|
155 |
+
# Scraping the website to extract phone number or email
|
156 |
+
def scrape_website_for_contact_info(website):
|
157 |
+
phone_number = "Not available"
|
158 |
+
email = "Not available"
|
159 |
+
|
160 |
+
try:
|
161 |
+
response = requests.get(website, timeout=5)
|
162 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
163 |
+
|
164 |
+
phone_match = re.search(r'\(?\+?[0-9]*\)?[0-9_\- \(\)]*', soup.get_text())
|
165 |
+
if phone_match:
|
166 |
+
phone_number = phone_match.group()
|
167 |
+
|
168 |
+
email_match = re.search(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', soup.get_text())
|
169 |
+
if email_match:
|
170 |
+
email = email_match.group()
|
171 |
+
|
172 |
+
except Exception as e:
|
173 |
+
print(f"Error scraping website {website}: {e}")
|
174 |
+
|
175 |
+
return phone_number, email
|
176 |
+
|
177 |
+
# Function to fetch all places data including pagination
|
178 |
def get_all_places(query, location, radius, api_key):
|
179 |
all_results = []
|
180 |
next_page_token = None
|
|
|
182 |
data = get_places_data(query, location, radius, api_key, next_page_token)
|
183 |
if data:
|
184 |
results = data.get('results', [])
|
185 |
+
if not results:
|
186 |
+
break
|
187 |
+
|
188 |
for place in results:
|
189 |
place_id = place.get("place_id")
|
190 |
name = place.get("name")
|
191 |
address = place.get("formatted_address")
|
192 |
+
rating = place.get("rating", "Not available")
|
193 |
+
business_status = place.get("business_status", "Not available")
|
194 |
+
user_ratings_total = place.get("user_ratings_total", "Not available")
|
195 |
+
website = place.get("website", "Not available")
|
196 |
+
types = ", ".join(place.get("types", []))
|
197 |
+
location = place.get("geometry", {}).get("location", {})
|
198 |
+
latitude = location.get("lat", "Not available")
|
199 |
+
longitude = location.get("lng", "Not available")
|
200 |
+
|
201 |
details = get_place_details(place_id, api_key)
|
202 |
phone_number = details.get("phone_number", "Not available")
|
203 |
+
if phone_number == "Not available" and website != "Not available":
|
204 |
+
phone_number, email = scrape_website_for_contact_info(website)
|
205 |
+
else:
|
206 |
+
email = "Not available"
|
207 |
+
|
208 |
+
if website == "Not available":
|
209 |
+
website = scrape_website_from_google_maps(name)
|
210 |
+
|
211 |
+
all_results.append([name, address, phone_number, rating, business_status,
|
212 |
+
user_ratings_total, website, types, latitude, longitude,
|
213 |
+
details.get("opening_hours", "Not available"),
|
214 |
+
details.get("reviews", "Not available"), email])
|
215 |
+
|
216 |
next_page_token = data.get('next_page_token')
|
217 |
if not next_page_token:
|
218 |
break
|
219 |
+
|
220 |
+
time.sleep(2)
|
221 |
else:
|
222 |
break
|
223 |
+
|
224 |
return all_results
|
225 |
|
226 |
+
# Function to save results to CSV file
|
227 |
+
def save_to_csv(data, filename):
|
228 |
+
with open(filename, mode='w', newline='', encoding='utf-8') as file:
|
229 |
+
writer = csv.writer(file)
|
230 |
+
writer.writerow(["Name", "Address", "Phone", "Rating", "Business Status", "User Ratings Total", "Website", "Types", "Latitude", "Longitude", "Opening Hours", "Reviews", "Email"])
|
231 |
+
writer.writerows(data)
|
232 |
+
print(f"Data saved to {filename}")
|
233 |
+
|
234 |
+
# Main function to execute script
|
235 |
+
def main():
|
236 |
+
google_places_data = get_all_places(query, location, radius, api_key)
|
237 |
+
if google_places_data:
|
238 |
+
save_to_csv(google_places_data, "wellness_professionals_hawaii.csv")
|
|
|
|
|
239 |
else:
|
240 |
+
print("No data found.")
|
241 |
+
|
242 |
+
# Gradio UI setup
|
243 |
+
with gr.Blocks() as demo:
|
244 |
+
st.set_page_config(page_title="Emotion Detection and Well-Being Suggestions", layout="wide")
|
245 |
+
|
246 |
+
# Load pre-trained model and tokenizer
|
247 |
+
@gr.cache_resource
|
248 |
+
def load_model():
|
249 |
+
tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
|
250 |
+
model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
|
251 |
+
return tokenizer, model
|
252 |
+
|
253 |
+
tokenizer, model = load_model()
|
254 |
+
|
255 |
+
# Display header
|
256 |
+
gr.Markdown("# Emotion Detection and Well-Being Suggestions")
|
257 |
+
|
258 |
+
# User input for text (emotion detection)
|
259 |
+
user_input = gr.Textbox(lines=1, label="How are you feeling today?")
|
260 |
+
emotion_output = gr.Textbox(label="Emotion Detected")
|
261 |
+
|
262 |
+
# Model prediction
|
263 |
+
def predict_emotion(text):
|
264 |
+
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
265 |
+
result = pipe(text)
|
266 |
+
emotion = result[0]['label']
|
267 |
+
return emotion
|
268 |
+
|
269 |
+
user_input.change(predict_emotion, inputs=user_input, outputs=emotion_output)
|
270 |
+
|
271 |
+
# Chatbot functionality
|
272 |
+
chatbot = gr.Chatbot(label="Chat")
|
273 |
+
message_input = gr.Textbox(lines=1, label="Message")
|
274 |
+
history_state = gr.State([])
|
275 |
+
|
276 |
+
def chat(message, history):
|
277 |
+
history = history or []
|
278 |
+
message = message.lower()
|
279 |
+
|
280 |
+
try:
|
281 |
+
# Predict the tag
|
282 |
+
results = model.predict([bag_of_words(message, words)])
|
283 |
+
results_index = np.argmax(results)
|
284 |
+
tag = labels[results_index]
|
285 |
+
|
286 |
+
# Match tag with intent and choose a random response
|
287 |
+
for tg in data["intents"]:
|
288 |
+
if tg['tag'] == tag:
|
289 |
+
responses = tg['responses']
|
290 |
+
response = random.choice(responses)
|
291 |
+
break
|
292 |
+
else:
|
293 |
+
response = "I'm sorry, I didn't understand that. Could you please rephrase?"
|
294 |
+
|
295 |
+
except Exception as e:
|
296 |
+
response = f"An error occurred: {str(e)}"
|
297 |
+
|
298 |
+
history.append((message, response))
|
299 |
+
return history, history
|
300 |
+
|
301 |
+
message_input.submit(chat, inputs=[message_input, history_state], outputs=[chatbot, history_state])
|
302 |
+
|
303 |
+
# Button to fetch wellness professionals data
|
304 |
+
fetch_button = gr.Button("Fetch Wellness Professionals Data")
|
305 |
+
data_output = gr.File(label="Download Data")
|
306 |
+
|
307 |
+
def fetch_data():
|
308 |
+
all_results = get_all_places(query, location, radius, api_key)
|
309 |
+
if all_results:
|
310 |
+
df = pd.DataFrame(all_results, columns=["Name", "Address", "Phone", "Rating", "Business Status", "User Ratings Total", "Website", "Types", "Latitude", "Longitude", "Opening Hours", "Reviews", "Email"])
|
311 |
+
csv_file = df.to_csv(index=False)
|
312 |
+
return csv_file
|
313 |
+
else:
|
314 |
+
return "No data found."
|
315 |
+
|
316 |
+
fetch_button.click(fetch_data, inputs=None, outputs=data_output)
|
317 |
+
|
318 |
+
# Launch Gradio interface
|
319 |
+
demo.launch()
|