garvit2205 commited on
Commit
c1a0d97
·
verified ·
1 Parent(s): 0275922

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
4
+
5
+ api = os.getenv("groq_api_key")
6
+
7
+
8
+ from sentence_transformers import SentenceTransformer
9
+ import gradio as gr
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ from groq import Groq
12
+
13
+ def create_metadata_embeddings():
14
+ student="""
15
+ Table: student
16
+ Columns:
17
+ - student_id: an integer representing the unique ID of a student.
18
+ - first_name: a string containing the first name of the student.
19
+ - last_name: a string containing the last name of the student.
20
+ - date_of_birth: a date representing the student's birthdate.
21
+ - email: a string for the student's email address.
22
+ - phone_number: a string for the student's contact number.
23
+ - major: a string representing the student's major field of study.
24
+ - year_of_enrollment: an integer for the year the student enrolled.
25
+ """
26
+
27
+ employee="""
28
+ Table: employee
29
+ Columns:
30
+ - employee_id: an integer representing the unique ID of an employee.
31
+ - first_name: a string containing the first name of the employee.
32
+ - last_name: a string containing the last name of the employee.
33
+ - email: a string for the employee's email address.
34
+ - department: a string for the department the employee works in.
35
+ - position: a string representing the employee's job title.
36
+ - salary: a float representing the employee's salary.
37
+ - date_of_joining: a date for when the employee joined the college.
38
+ """
39
+
40
+ course="""
41
+ Table: course_info
42
+ Columns:
43
+ - course_id: an integer representing the unique ID of the course.
44
+ - course_name: a string containing the course's name.
45
+ - course_code: a string for the course's unique code.
46
+ - instructor_id: an integer for the ID of the instructor teaching the course.
47
+ - department: a string for the department offering the course.
48
+ - credits: an integer representing the course credits.
49
+ - semester: a string for the semester when the course is offered.
50
+ """
51
+
52
+ metadata_list = [student, employee, course]
53
+
54
+ model = SentenceTransformer('all-MiniLM-L6-v2')
55
+
56
+ embeddings = model.encode(metadata_list)
57
+
58
+ return embeddings,model,student,employee,course
59
+
60
+ def find_best_fit(embeddings,model,user_query,student,employee,course):
61
+ query_embedding = model.encode([user_query])
62
+ similarities = cosine_similarity(query_embedding, embeddings)
63
+ best_match_table = similarities.argmax()
64
+ if(best_match_table==0):
65
+ table_metadata=student
66
+ elif(best_match_table==1):
67
+ table_metadata=employee
68
+ else:
69
+ table_metadata=course
70
+
71
+ return table_metadata
72
+
73
+
74
+
75
+ def create_prompt(user_query,table_metadata):
76
+ system_prompt="""
77
+ You are a SQL query generator specialized in generating SELECT queries for a single table at a time. Your task is to accurately convert natural language queries into SQL SELECT statements based on the user's intent and the provided table metadata.
78
+ Rules:
79
+ Focus on SELECT Queries: Only generate SELECT queries. Do not generate INSERT, UPDATE, DELETE, or multi-table JOINs.
80
+ Single Table Only: Assume all queries are related to a single table provided in the metadata. Ignore any references to other tables.
81
+ Metadata-Based Validation: Always ensure the generated query matches the table name, columns, and data types provided in the metadata.
82
+ User Intent: Accurately capture the user's requirements, such as filters, sorting, or aggregations, as expressed in natural language.
83
+ SQL Syntax: Use standard SQL syntax that is compatible with most relational database systems.
84
+ Input Format:
85
+ User Query: The user's natural language request.
86
+ Table Metadata: The structure of the relevant table, including the table name, column names, and data types.
87
+ Output Format:
88
+ SQL Query: A valid SELECT query formatted for readability.
89
+ Do not output anything else except the SQL query.Not even a single word extra.Ouput the whole query in a single line only.
90
+ You are ready to generate SQL queries based on the user input and table metadata.
91
+ """
92
+
93
+
94
+ user_prompt=f"""
95
+ User Query: {user_query}
96
+ Table Metadata: {table_metadata}
97
+ """
98
+
99
+ return system_prompt,user_prompt
100
+
101
+
102
+
103
+ def generate_output(system_prompt,user_prompt):
104
+ client = Groq(api_key=api,)
105
+ chat_completion = client.chat.completions.create(messages=[
106
+ {"role": "system", "content": system_prompt},
107
+ {"role": "user","content": user_prompt,}],model="llama3-70b-8192",)
108
+ res = chat_completion.choices[0].message.content
109
+
110
+ select=res[0:6].lower()
111
+ if(select=="select"):
112
+ output=res
113
+ else:
114
+ output="Can't perform the task at the moment."
115
+
116
+ return output
117
+
118
+
119
+ def response(user_query):
120
+ embeddings,model,student,employee,course=create_metadata_embeddings()
121
+
122
+ table_metadata=find_best_fit(embeddings,model,user_query,student,employee,course)
123
+
124
+ system_prompt,user_prompt=create_prompt(user_query,table_metadata)
125
+
126
+ output=generate_output(system_prompt,user_prompt)
127
+
128
+ return output
129
+
130
+
131
+ demo = gr.Interface(
132
+ fn=response,
133
+ inputs=gr.Textbox(label="Please provide the natural language query"),
134
+ outputs=gr.Textbox(label="SQL Query"),
135
+ title="SQL Query generator"
136
+ )
137
+
138
+ demo.launch(share="True")