redfernstech commited on
Commit
4aefa71
·
verified ·
1 Parent(s): ebb45b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -230
app.py CHANGED
@@ -1,237 +1,22 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import os
4
- import re
5
- import preprocessor as p
6
- import joblib
7
- import base64
8
 
 
 
 
9
 
 
 
 
 
10
 
11
- project_description = """
12
- # Hotel Data Analysis Project
13
 
14
- ## Overview
 
 
 
15
 
16
- I have completed a hotel data analysis project using an instant web scraper.
17
- This project involved scraping hotel data and hotel reviews separately, cleaning the data,
18
- concatenating it, and performing sentiment analysis on the DataFrame.
19
- Additionally, I clustered the hotel reviews, applied sentiment analysis, and passed
20
- those clusters to an LLM (Language Model) to extract strengths and weaknesses of hotels.
21
-
22
- ## Steps
23
-
24
- ### 1. Scraping Hotel Data
25
-
26
- - Utilized an instant web scraper to collect hotel data.
27
- - Scraped hotel data separately from hotel reviews.
28
-
29
- ### 2. Data Collection
30
-
31
- - Collected hotel data and hotel reviews data separately for each hotel.
32
-
33
- ### 3. Data Cleaning
34
-
35
- - Cleaned the collected data to remove any inconsistencies or errors.
36
- - Applied preprocessing techniques to prepare the data for analysis.
37
-
38
- ### 4. Data Concatenation
39
-
40
- - Concatenated the cleaned hotel data and hotel reviews data to create a unified dataset for analysis.
41
-
42
- ### 5. Sentiment Analysis
43
-
44
- - Performed sentiment analysis on the concatenated DataFrame.
45
- - Utilized the results to understand the overall sentiment of hotel reviews.
46
-
47
- ### 6. Clustering Hotel Reviews
48
-
49
- - Clustered the hotel reviews based on their content to identify patterns and similarities.
50
-
51
- ### 7. Extracting Strengths and Weaknesses
52
-
53
- - Passed the clustered reviews to an LLM (Language Model) to extract strengths and weaknesses of hotels.
54
- - Used the extracted information to gain insights into customer perceptions.
55
-
56
- ## Conclusion
57
-
58
- This project demonstrates the use of web scraping, data cleaning, sentiment analysis, and clustering techniques to analyze hotel data.
59
- The extracted strengths and weaknesses provide valuable insights for hotel management to improve customer satisfaction and service quality.
60
- """
61
- def create_download_link(df, filename):
62
- csv = df.to_csv(index=False)
63
- b64 = base64.b64encode(csv.encode()).decode()
64
- href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV file</a>'
65
- return href
66
-
67
- # Path to the directory containing CSV files
68
- directory_path = r'hotel reviews'
69
-
70
- # Get a list of CSV files in the directory
71
- csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]
72
-
73
- # Function to concatenate selected columns
74
- def concatenate_columns(df, selected_columns):
75
- concatenated_data = df[selected_columns[0]].tolist() + df[selected_columns[1]].tolist()
76
- return pd.DataFrame({'ConcatenatedData': concatenated_data})
77
-
78
- # Function to display selected dataset
79
- def display_selected_dataset(selected_dataset):
80
- dataset_path = os.path.join(directory_path, selected_dataset)
81
- selected_df = pd.read_csv(dataset_path)
82
- st.subheader(f'Dataset: {selected_dataset}')
83
- st.write(selected_df)
84
- def clean_tweets(series):
85
- REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]")
86
- REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
87
- tempArr = []
88
- for line in series:
89
- # Check if the value is NaN
90
- if pd.isnull(line):
91
- tempArr.append("")
92
- continue
93
- # Send to tweet_processor
94
- tmpL = p.clean(line)
95
- # Remove punctuation
96
- tmpL = REPLACE_NO_SPACE.sub("", tmpL.lower())
97
- # Replace specific characters with spaces
98
- tmpL = REPLACE_WITH_SPACE.sub(" ", tmpL)
99
- # Remove extra spaces
100
- tmpL = " ".join(tmpL.split())
101
- tempArr.append(tmpL)
102
- return tempArr
103
-
104
- # Streamlit app
105
- def main():
106
-
107
-
108
- # Create a menu bar
109
- menu = st.sidebar.selectbox(
110
- 'Navigation',
111
- ['Home', 'collected hotel data', 'Display Hotel Data', 'Display hotel reviews Datasets']
112
- )
113
-
114
- if menu == 'Home':
115
- st.markdown(project_description)
116
-
117
- elif menu == 'collected hotel data':
118
- # Display DataFrame
119
- df = pd.read_csv('chennai hotes.csv')
120
- df1 = pd.read_csv('stream.csv')
121
- st.subheader('Collected chennai hotes Data')
122
- st.write(df)
123
- st.subheader('preprocess applyed data')
124
- st.write(df1)
125
-
126
- elif menu == 'Display Hotel Data':
127
- # Display hotel data
128
- df = pd.read_csv('stream.csv')
129
- css = """
130
- <style>
131
- .hotel-container {
132
- border: 1px solid #ddd;
133
- border-radius: 5px;
134
- padding: 10px;
135
- margin-bottom: 20px;
136
- }
137
- .hotel-image {
138
- max-width: 100%;
139
- border-radius: 5px;
140
- margin-bottom: 10px;
141
- }
142
- .hotel-details {
143
- font-size: 16px;
144
- }
145
- </style>
146
- """
147
- st.markdown(css, unsafe_allow_html=True)
148
- for index, row in df.iterrows():
149
- st.markdown(f"""
150
- <div class="hotel-container">
151
- <img class="hotel-image" src="{row['hotel image']}">
152
- <div class="hotel-details">
153
- <h2>{row['Hotel Name']}</h2>
154
- <p><strong>Rating:</strong> {row['rating']}</p>
155
- <p><strong>Location:</strong> {row['location']} ({row['nearest places']})</p>
156
- <p><strong>Website:</strong> <a href="{row['hotel website']}">Website link</a></p>
157
- <p><strong>Number of Reviews:</strong> {row['number of reviewss 2']}</p>
158
- <p><strong>Room Type:</strong> {row['room type']}</p>
159
- <p><strong>Price:</strong> {row['price']}</p>
160
- <p><strong>Strengths:</strong> {row['Strengths']}</p>
161
- <p><strong>Weaknesses:</strong> {row['Weaknesses']}</p>
162
- </div>
163
- </div>
164
- """, unsafe_allow_html=True)
165
-
166
-
167
- elif menu == 'Display hotel reviews Datasets':
168
- selected_dataset = st.selectbox('Select Dataset', csv_files)
169
- if selected_dataset:
170
- display_selected_dataset(selected_dataset)
171
-
172
- elif menu == 'CSV Column Concatenation and Sentiment Analysis':
173
- st.title('CSV Column Concatenation and Sentiment Analysis')
174
-
175
- new_names = {
176
- 'a3332d346a': 'Reviewer Name',
177
- 'afac1f68d9': 'Reviewer Country',
178
- 'abf093bdfe': 'Room Type',
179
- 'abf093bdfe 2': 'Length of Stay',
180
- 'abf093bdfe 3': 'Review Date',
181
- 'abf093bdfe 4': 'Traveler Type',
182
- 'abf093bdfe 5': 'Second Review Date',
183
- 'f6431b446c': 'Overall Rating',
184
- 'a53cbfa6de': 'Positive Comments',
185
- 'a53cbfa6de 2': 'Negative Comments',
186
- 'a3332d346a 2': 'Hotel Response',
187
- 'a53cbfa6de 3': 'Hotel Response1'
188
- }
189
-
190
- # File upload
191
- uploaded_file = st.file_uploader('Upload CSV file', type=['csv'])
192
- if uploaded_file is not None:
193
- df = pd.read_csv(uploaded_file)
194
- df.rename(columns=new_names, inplace=True)
195
-
196
- # Show original DataFrame
197
- st.subheader('Original DataFrame:')
198
- st.write(df)
199
-
200
- # Select columns
201
- selected_columns = st.multiselect('Select columns to concatenate', df.columns)
202
-
203
- if st.button('Concatenate columns'):
204
- if len(selected_columns) == 2:
205
- # Concatenate columns
206
- new_df = concatenate_columns(df, selected_columns)
207
-
208
- # Remove null values
209
- new_df = new_df.dropna()
210
-
211
- # Drop duplicates
212
- new_df = new_df.drop_duplicates()
213
-
214
- # Reset the index
215
- new_df = new_df.reset_index(drop=True)
216
-
217
- # Clean tweets
218
- new_df['CleanedData'] = clean_tweets(new_df['ConcatenatedData'])
219
-
220
- # Load the saved model
221
- loaded_model = joblib.load('sentiment_analysis_model.pkl')
222
-
223
- # Apply sentiment analysis
224
- new_df['Sentiment'] = loaded_model.predict(new_df['CleanedData'])
225
-
226
- # Display concatenated, cleaned, and sentiment analyzed DataFrame
227
- st.subheader('Concatenated, Cleaned, and Sentiment Analyzed DataFrame:')
228
- st.write(new_df)
229
-
230
- # Create download link
231
- st.markdown(create_download_link(new_df, 'concatenated_sentiment_analyzed_data'), unsafe_allow_html=True)
232
- else:
233
- st.warning('Please select exactly two columns to concatenate.')
234
-
235
- # Run the app
236
- if __name__ == '__main__':
237
- main()
 
1
  import streamlit as st
2
  import pandas as pd
 
 
 
 
 
3
 
4
+ # Load the CSV data
5
+ file_path = 'category upwork jobs.csv'
6
+ jobs_df = pd.read_csv(file_path)
7
 
8
+ # Sidebar: Select Category
9
+ st.sidebar.header("Filter Jobs by Category")
10
+ categories = jobs_df['category'].unique() # Extract unique categories
11
+ selected_category = st.sidebar.selectbox("Choose a category:", categories)
12
 
13
+ # Filter jobs based on the selected category
14
+ filtered_jobs = jobs_df[jobs_df['category'] == selected_category]
15
 
16
+ # Main: Display filtered jobs
17
+ st.title("Jobs Dashboard")
18
+ st.write(f"Showing jobs in category: **{selected_category}**")
19
+ st.dataframe(filtered_jobs[['title']]) # Adjust columns as needed
20
 
21
+ # Optional: Show a count of jobs in the selected category
22
+ st.write(f"Total jobs in this category: {len(filtered_jobs)}")