File size: 7,649 Bytes
2342013
 
2831a6c
 
2342013
1d72851
2342013
 
 
 
 
 
 
2831a6c
2342013
 
 
2831a6c
2342013
2831a6c
308f825
2831a6c
 
 
0549e31
2831a6c
0549e31
2342013
 
2141c97
7d15bf3
2342013
 
 
 
 
fffe3d2
 
2342013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a2596b
2a303ed
2342013
 
 
 
 
 
 
 
 
 
 
 
e0c6c19
2342013
6ebcf42
da306a0
2342013
 
 
c7100fd
2342013
 
 
 
 
308f825
c7100fd
308f825
2342013
 
 
308f825
 
2342013
308f825
 
2342013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d5c25d
2342013
3d5c25d
2342013
 
 
 
 
 
 
 
 
 
 
 
1730d66
 
2831a6c
 
2342013
 
2831a6c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import os
import requests
import gradio as gr

from langchain.memory import ConversationBufferMemory  # Updated import
from langchain import OpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.utilities import SQLDatabase
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.agents import create_tool_calling_agent, AgentExecutor, Tool
from langchain.vectorstores import FAISS

from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from PyPDF2 import PdfReader

# Initialize the memory

#memory = ConversationBufferMemory(return_messages=True, memory_key="chat_history")



open_api_key_token = os.environ['OPEN_AI_API']

open_weather_api_key = os.environ['OPEN_WEATHER_API']

os.environ['OPENAI_API_KEY'] = open_api_key_token
db_uri = 'mysql+mysqlconnector://redmindgen:51([email protected]:3306/collegedb'
#db_uri = 'postgresql+psycopg2://postgres:[email protected]:5432/warehouse'
# Database setup

db = SQLDatabase.from_uri(db_uri)

# LLM setup
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
#llm = OpenAI(temperature=0, api_key= os.environ['OPEN_AI_API'], model_name='gpt-3.5-turbo')

# Define the SQL query generation tool
template_query_generation = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Question: {question}
SQL Query:"""
prompt_query_generation = ChatPromptTemplate.from_template(template_query_generation)

def get_schema(_):
  return db.get_table_info()

def generate_sql_query(question):
    schema = get_schema(None)
    input_data = {"question": question}
    sql_chain = (RunnablePassthrough.assign(schema=get_schema)
                 | prompt_query_generation
                 | llm.bind(stop="\n SQL Result:")
                 | StrOutputParser()
                )
    return sql_chain.invoke(input_data)

def run_query(query):
    return db.run(query)

# Define the database query tool
def database_tool(question):
    sql_query = generate_sql_query(question)
    return run_query(sql_query)

# Define the weather data retrieval tool
def get_weather_data(location="United Arab Emirates"):
    api_key = open_weather_api_key
    base_url = "http://api.openweathermap.org/data/2.5/weather?"

    if location is None or location.strip() == "":
        location = "United Arab Emirates"

    complete_url = f"{base_url}q={location}&appid={api_key}&units=metric"

    response = requests.get(complete_url)
    data = response.json()

    if data["cod"] != "404":
        main = data["main"]
        weather_description = data["weather"][0]["description"]
        temperature = main["temp"]
        return f"The current temperature in {location} is {temperature}°C with {weather_description}."
    else:
        return "Weather data is not found."
#get_weather_data("United Arab Emirates")

# Define the document data tool
def load_and_split_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ''
    for page in reader.pages:
        text += page.extract_text()

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_text(text)
    return texts

def create_vector_store(texts):
    embeddings = OpenAIEmbeddings()
    vector_store = FAISS.from_texts(texts, embeddings)
    return vector_store

def query_vector_store(vector_store, query):
    docs = vector_store.similarity_search(query)
    return '\n\n'.join([doc.page_content for doc in docs])

# Load and process the PDF (ensure the PDF is accessible from your Colab environment)
#pdf_path = "The Magic of Analysing Customers Experience in Freight Forwarding Industry -BLOG.pdf"
pdf_path = "Inbound.pdf"
# Check if the user has the necessary permissions to access the directory
# if not os.path.isdir(pdf_path):
#     raise ValueError(f"Directory {pdf_path} does not exist or you do not have permission to access it.")

texts = load_and_split_pdf(pdf_path)
vector_store = create_vector_store(texts)

def document_data_tool(query):
    return query_vector_store(vector_store, query)

# Initialize the agent with the tools
tools = [
    
    Tool(name="WeatherData", func=get_weather_data, description="Tool to get current weather data for a city or country. Handles questions related to current weather conditions in specific cities or countries.", tool_choice="required"),
    Tool(name="DocumentData", func=document_data_tool, description="Tool to search and retrieve information from the uploaded document. Provide responses with the maximum of 150 words.", tool_choice="required"),
    #Tool(name="DatabaseQuery", func=database_tool, description="Tool to query the database based on the user's question. Only handles questions related to the collegedb schema, including tables such as buildings, classrooms, college, course, faculty, interns, person, section, student, and textbook. Ensure to use only the available fields in these tables.Provide responses with the maximum of 150 words.", tool_choice="required"),
]

prompt_template = f"""You are an assistant that helps with database queries, weather information, and document retrieval.

For weather-related questions, if the user specifies a city, provide the weather information for that city. If the user specifies a country or does not specify a city, provide the weather information for the specified country or the default location of 'United Arab Emirates'.
For document-related questions, search and retrieve information from the uploaded document.
{{agent_scratchpad}}
Question: {{input}}

"""
#For SQL database-related questions, only use the fields available in the collegedb schema, which includes tables such as buildings, classrooms, college, course, faculty, interns, person, section, student, and textbook.
#{memory.buffer}
prompt = ChatPromptTemplate.from_template(prompt_template)

# Initialize the agent with memory
#llm_with_memory = llm.bind(memory=memory)
llm_with_memory = llm.bind()
agent = create_tool_calling_agent(llm_with_memory, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
#agent_executor = AgentExecutor(agent=agent, tools=tools, memory= memory, verbose=True)

# Define the interface function
max_iterations = 5
iterations = 0

def answer_question(user_question, city="United Arab Emirates"):
    global iterations
    iterations = 0

    while iterations < max_iterations:
        response = agent_executor.invoke({"input": user_question})
        if isinstance(response, dict):
            response_text = response.get("output", "")
        else:
            response_text = response
        if "invalid" not in response_text.lower():
            break
        iterations += 1

    if iterations == max_iterations:
        return "The agent could not generate a valid response within the iteration limit."

    # Print memory buffer for debugging
    #print("Memory Buffer:", memory.buffer)
    # Print memory buffer for debugging
    #print("Memory Buffer11:", memory.load_memory_variables({}))


    # Format the response text
    response_text = response_text.replace('\n', ' ').replace('  ', ' ').strip()

    return response_text

# Create the Gradio interface
iface = gr.Interface(
    fn=answer_question,
    inputs="text",
    outputs="text",
    title="Chat with your data",
    description="Ask a question about the database or a document and get a response in natural language, including current weather information."
)

# Launch the Gradio interface
iface.launch(share=True, debug=True)