Spaces:

DrishtiSharma
/

sql-rag

Running

App Files Files Community

DrishtiSharma commited on Jan 13

Commit

c9c0197

verified ·

1 Parent(s): 74f50e7

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ from langchain_community.tools.sql_database.tool import (
 )
 from langchain_community.utilities.sql_database import SQLDatabase
 from datasets import load_dataset
 import tempfile
 st.title("SQL-RAG Using CrewAI 🚀")
@@ -176,6 +177,69 @@ def escape_markdown(text):
     escape_chars = r"(\*|_|`|~)"
     return re.sub(escape_chars, r"\\\1", text)
 # SQL-RAG Analysis
 if st.session_state.df is not None:
     temp_dir = tempfile.TemporaryDirectory()

 )
 from langchain_community.utilities.sql_database import SQLDatabase
 from datasets import load_dataset
+from difflib import get_close_matches
 import tempfile
 st.title("SQL-RAG Using CrewAI 🚀")
     escape_chars = r"(\*|_|`|~)"
     return re.sub(escape_chars, r"\\\1", text)
+# Synonym mapping for flexible query understanding
+COLUMN_SYNONYMS = {
+    "job_title": ["job title", "job role", "role", "designation", "position", "job responsibility"],
+    "experience_level": ["experience level", "seniority", "experience", "career stage"],
+    "employment_type": ["employment type", "job type", "contract type"],
+    "salary_in_usd": ["salary", "income", "earnings", "pay", "wage"],
+    "remote_ratio": ["remote work", "work from home", "remote ratio", "remote"],
+    "company_size": ["company size", "organization size", "business size"],
+    "employee_residence": ["country", "residence", "location", "employee location"],
+    "company_location": ["company location", "office location", "company country"],
+}
+# Helper function to map user query terms to dataset columns
+def map_query_to_column(query):
+    for col, synonyms in COLUMN_SYNONYMS.items():
+        for term in synonyms:
+            if term in query:
+                return col
+    return None
+# Visualization generator with synonym handling
+def generate_visual_from_query(query, df):
+    try:
+        query = query.lower()
+        # Map user terms to actual dataset columns
+        col1 = map_query_to_column(query)
+        col2 = None  # For dual-column charts
+        # Handle common queries
+        if "distribution" in query and col1:
+            fig = px.box(df, x=col1, y="salary_in_usd", title=f"Salary Distribution by {col1.replace('_', ' ').title()}")
+            return fig
+        elif "average salary" in query and col1:
+            grouped_df = df.groupby(col1)["salary_in_usd"].mean().reset_index()
+            fig = px.bar(grouped_df, x=col1, y="salary_in_usd", title=f"Average Salary by {col1.replace('_', ' ').title()}")
+            return fig
+        elif "remote" in query:
+            grouped_df = df.groupby("remote_ratio")["salary_in_usd"].mean().reset_index()
+            fig = px.bar(grouped_df, x="remote_ratio", y="salary_in_usd", title="Remote Work Impact on Salary")
+            return fig
+        elif "company size" in query or "organization size" in query:
+            grouped_df = df.groupby("company_size")["salary_in_usd"].mean().reset_index()
+            fig = px.bar(grouped_df, x="company_size", y="salary_in_usd", title="Salary by Company Size")
+            return fig
+        elif "country" in query or "location" in query:
+            grouped_df = df.groupby("employee_residence")["salary_in_usd"].mean().reset_index()
+            fig = px.bar(grouped_df, x="employee_residence", y="salary_in_usd", title="Salary by Employee Residence")
+            return fig
+        else:
+            st.warning("❓ I couldn't understand the query for visualization. Try asking about salary distribution, experience level, remote work, etc.")
+            return None
+    except Exception as e:
+        st.error(f"Error generating visualization: {e}")
+        return None
 # SQL-RAG Analysis
 if st.session_state.df is not None:
     temp_dir = tempfile.TemporaryDirectory()