adamboom111 commited on
Commit
a1eb97f
·
verified ·
1 Parent(s): 03043dc

Update app.py

Browse files

to make it more dynamic

Files changed (1) hide show
  1. app.py +44 -145
app.py CHANGED
@@ -1,158 +1,57 @@
1
  from dotenv import load_dotenv
2
  import os
3
- from sentence_transformers import SentenceTransformer
4
  import gradio as gr
5
- from sklearn.metrics.pairwise import cosine_similarity
6
  from groq import Groq
7
 
8
-
9
  load_dotenv()
10
-
11
  api = os.getenv("groq_api_key")
12
 
13
- def create_metadata_embeddings():
14
- student="""
15
- Table: student
16
- Columns:
17
- - student_id: an integer representing the unique ID of a student.
18
- - first_name: a string containing the first name of the student.
19
- - last_name: a string containing the last name of the student.
20
- - date_of_birth: a date representing the student's birthdate.
21
- - email: a string for the student's email address.
22
- - phone_number: a string for the student's contact number.
23
- - major: a string representing the student's major field of study.
24
- - year_of_enrollment: an integer for the year the student enrolled.
25
- """
26
-
27
- employee="""
28
- Table: employee
29
- Columns:
30
- - employee_id: an integer representing the unique ID of an employee.
31
- - first_name: a string containing the first name of the employee.
32
- - last_name: a string containing the last name of the employee.
33
- - email: a string for the employee's email address.
34
- - department: a string for the department the employee works in.
35
- - position: a string representing the employee's job title.
36
- - salary: a float representing the employee's salary.
37
- - date_of_joining: a date for when the employee joined the college.
38
- """
39
-
40
- course="""
41
- Table: course_info
42
- Columns:
43
- - course_id: an integer representing the unique ID of the course.
44
- - course_name: a string containing the course's name.
45
- - course_code: a string for the course's unique code.
46
- - instructor_id: an integer for the ID of the instructor teaching the course.
47
- - department: a string for the department offering the course.
48
- - credits: an integer representing the course credits.
49
- - semester: a string for the semester when the course is offered.
50
- """
51
-
52
- metadata_list = [student, employee, course]
53
-
54
- model = SentenceTransformer('all-MiniLM-L6-v2')
55
-
56
- embeddings = model.encode(metadata_list)
57
-
58
- return embeddings,model,student,employee,course
59
-
60
- def find_best_fit(embeddings,model,user_query,student,employee,course):
61
- query_embedding = model.encode([user_query])
62
- similarities = cosine_similarity(query_embedding, embeddings)
63
- best_match_table = similarities.argmax()
64
- if(best_match_table==0):
65
- table_metadata=student
66
- elif(best_match_table==1):
67
- table_metadata=employee
68
- else:
69
- table_metadata=course
70
-
71
- return table_metadata
72
-
73
-
74
-
75
- def create_prompt(user_query,table_metadata):
76
- system_prompt="""
77
- You are a SQL query generator specialized in generating SQL queries for a single table at a time. Your task is to accurately convert natural language queries into SQL statements based on the user's intent and the provided table metadata.
78
-
79
- Rules:
80
- Single Table Only: Assume all queries are related to a single table provided in the metadata. Ignore any references to other tables.
81
- Metadata-Based Validation: Always ensure the generated query matches the table name, columns, and data types provided in the metadata.
82
- User Intent: Accurately capture the user's requirements, such as filters, sorting, or aggregations, as expressed in natural language.
83
- SQL Syntax: Use standard SQL syntax that is compatible with most relational database systems.
84
-
85
- Input Format:
86
- User Query: The user's natural language request.
87
- Table Metadata: The structure of the relevant table, including the table name, column names, and data types.
88
-
89
- Output Format:
90
- SQL Query: A valid SQL query formatted for readability.
91
- Do not output anything else except the SQL query.Not even a single word extra.Ouput the whole query in a single line only.
92
- You are ready to generate SQL queries based on the user input and table metadata.
93
- """
94
-
95
-
96
- user_prompt=f"""
97
- User Query: {user_query}
98
- Table Metadata: {table_metadata}
99
- """
100
-
101
- return system_prompt,user_prompt
102
-
103
-
104
-
105
- def generate_output(system_prompt,user_prompt):
106
- client = Groq(api_key=api,)
107
- chat_completion = client.chat.completions.create(messages=[
108
- {"role": "system", "content": system_prompt},
109
- {"role": "user","content": user_prompt,}],model="llama3-70b-8192",)
110
- res = chat_completion.choices[0].message.content
111
-
112
- select=res[0:6].lower()
113
- if(select=="select"):
114
- output=res
115
- else:
116
- output="Can't perform the task at the moment."
117
-
118
- return output
119
-
120
-
121
- def response(user_query):
122
- embeddings,model,student,employee,course=create_metadata_embeddings()
123
-
124
- table_metadata=find_best_fit(embeddings,model,user_query,student,employee,course)
125
-
126
- system_prompt,user_prompt=create_prompt(user_query,table_metadata)
127
-
128
- output=generate_output(system_prompt,user_prompt)
129
-
130
- return output
131
-
132
- desc="""
133
-
134
- There are three tables in the database:
135
-
136
-
137
- Student Table:
138
- The table contains the student's unique ID, first name, last name, date of birth, email address, phone number, major field of study, and year of enrollment.
139
-
140
-
141
- Employee Table:
142
- The table includes the employee's unique ID, first name, last name, email address, department, job position, salary, and date of joining.
143
-
144
-
145
- Course Info Table:
146
- The table holds information about the course's unique ID, name, course code, instructor ID, department offering the course, number of credits, and the semester in which the course is offered.
147
-
148
- """
149
 
150
  demo = gr.Interface(
151
  fn=response,
152
- inputs=gr.Textbox(label="Please provide the natural language query"),
153
- outputs=gr.Textbox(label="SQL Query"),
154
- title="SQL Query generator",
155
- description=desc
156
  )
157
 
158
- demo.launch(share="True")
 
1
  from dotenv import load_dotenv
2
  import os
 
3
  import gradio as gr
 
4
  from groq import Groq
5
 
 
6
  load_dotenv()
 
7
  api = os.getenv("groq_api_key")
8
 
9
+ def create_prompt(user_query, table_metadata):
10
+ system_prompt = """
11
+ You are a SQL query generator specialized in generating SQL queries for a single table at a time.
12
+ Your task is to accurately convert natural language queries into SQL statements based on the user's intent and the provided table metadata.
13
+
14
+ Rules:
15
+ - Single Table Only: Use only the table in the metadata.
16
+ - Metadata-Based Validation: Use only columns in the metadata.
17
+ - User Intent: Support filters, grouping, sorting, etc.
18
+ - SQL Syntax: Use standard SQL (DuckDB compatible).
19
+ - Output only valid SQL. No extra commentary.
20
+
21
+ Input:
22
+ User Query: {user_query}
23
+ Table Metadata: {table_metadata}
24
+
25
+ Output:
26
+ SQL Query (on a single line, nothing else).
27
+ """
28
+ return system_prompt.strip(), f"User Query: {user_query}\nTable Metadata: {table_metadata}"
29
+
30
+ def generate_output(system_prompt, user_prompt):
31
+ client = Groq(api_key=api)
32
+ chat_completion = client.chat.completions.create(
33
+ messages=[
34
+ {"role": "system", "content": system_prompt},
35
+ {"role": "user", "content": user_prompt}
36
+ ],
37
+ model="llama3-70b-8192"
38
+ )
39
+ response = chat_completion.choices[0].message.content.strip()
40
+ return response if response.lower().startswith("select") else "Can't perform the task at the moment."
41
+
42
+ # NEW: accepts user_query and dynamic table_metadata string
43
+ def response(payload):
44
+ user_query = payload.get("question", "")
45
+ table_metadata = payload.get("schema", "")
46
+ system_prompt, user_prompt = create_prompt(user_query, table_metadata)
47
+ return generate_output(system_prompt, user_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  demo = gr.Interface(
50
  fn=response,
51
+ inputs=gr.JSON(label="Input JSON (question, schema)"),
52
+ outputs="text",
53
+ title="SQL Generator (Groq + LLaMA3)",
54
+ description="Input: question & table metadata. Output: SQL using dynamic schema."
55
  )
56
 
57
+ demo.launch()