DrishtiSharma commited on
Commit
c9c0197
Β·
verified Β·
1 Parent(s): 74f50e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py CHANGED
@@ -22,6 +22,7 @@ from langchain_community.tools.sql_database.tool import (
22
  )
23
  from langchain_community.utilities.sql_database import SQLDatabase
24
  from datasets import load_dataset
 
25
  import tempfile
26
 
27
  st.title("SQL-RAG Using CrewAI πŸš€")
@@ -176,6 +177,69 @@ def escape_markdown(text):
176
  escape_chars = r"(\*|_|`|~)"
177
  return re.sub(escape_chars, r"\\\1", text)
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  # SQL-RAG Analysis
180
  if st.session_state.df is not None:
181
  temp_dir = tempfile.TemporaryDirectory()
 
22
  )
23
  from langchain_community.utilities.sql_database import SQLDatabase
24
  from datasets import load_dataset
25
+ from difflib import get_close_matches
26
  import tempfile
27
 
28
  st.title("SQL-RAG Using CrewAI πŸš€")
 
177
  escape_chars = r"(\*|_|`|~)"
178
  return re.sub(escape_chars, r"\\\1", text)
179
 
180
+
181
+ # Synonym mapping for flexible query understanding
182
+ COLUMN_SYNONYMS = {
183
+ "job_title": ["job title", "job role", "role", "designation", "position", "job responsibility"],
184
+ "experience_level": ["experience level", "seniority", "experience", "career stage"],
185
+ "employment_type": ["employment type", "job type", "contract type"],
186
+ "salary_in_usd": ["salary", "income", "earnings", "pay", "wage"],
187
+ "remote_ratio": ["remote work", "work from home", "remote ratio", "remote"],
188
+ "company_size": ["company size", "organization size", "business size"],
189
+ "employee_residence": ["country", "residence", "location", "employee location"],
190
+ "company_location": ["company location", "office location", "company country"],
191
+ }
192
+
193
+ # Helper function to map user query terms to dataset columns
194
+ def map_query_to_column(query):
195
+ for col, synonyms in COLUMN_SYNONYMS.items():
196
+ for term in synonyms:
197
+ if term in query:
198
+ return col
199
+ return None
200
+
201
+ # Visualization generator with synonym handling
202
+ def generate_visual_from_query(query, df):
203
+ try:
204
+ query = query.lower()
205
+
206
+ # Map user terms to actual dataset columns
207
+ col1 = map_query_to_column(query)
208
+ col2 = None # For dual-column charts
209
+
210
+ # Handle common queries
211
+ if "distribution" in query and col1:
212
+ fig = px.box(df, x=col1, y="salary_in_usd", title=f"Salary Distribution by {col1.replace('_', ' ').title()}")
213
+ return fig
214
+
215
+ elif "average salary" in query and col1:
216
+ grouped_df = df.groupby(col1)["salary_in_usd"].mean().reset_index()
217
+ fig = px.bar(grouped_df, x=col1, y="salary_in_usd", title=f"Average Salary by {col1.replace('_', ' ').title()}")
218
+ return fig
219
+
220
+ elif "remote" in query:
221
+ grouped_df = df.groupby("remote_ratio")["salary_in_usd"].mean().reset_index()
222
+ fig = px.bar(grouped_df, x="remote_ratio", y="salary_in_usd", title="Remote Work Impact on Salary")
223
+ return fig
224
+
225
+ elif "company size" in query or "organization size" in query:
226
+ grouped_df = df.groupby("company_size")["salary_in_usd"].mean().reset_index()
227
+ fig = px.bar(grouped_df, x="company_size", y="salary_in_usd", title="Salary by Company Size")
228
+ return fig
229
+
230
+ elif "country" in query or "location" in query:
231
+ grouped_df = df.groupby("employee_residence")["salary_in_usd"].mean().reset_index()
232
+ fig = px.bar(grouped_df, x="employee_residence", y="salary_in_usd", title="Salary by Employee Residence")
233
+ return fig
234
+
235
+ else:
236
+ st.warning("❓ I couldn't understand the query for visualization. Try asking about salary distribution, experience level, remote work, etc.")
237
+ return None
238
+
239
+ except Exception as e:
240
+ st.error(f"Error generating visualization: {e}")
241
+ return None
242
+
243
  # SQL-RAG Analysis
244
  if st.session_state.df is not None:
245
  temp_dir = tempfile.TemporaryDirectory()