diff --git "a/climateqa/engine/talk_to_data/step_by_step_vanna.ipynb" "b/climateqa/engine/talk_to_data/step_by_step_vanna.ipynb"
new file mode 100644--- /dev/null
+++ "b/climateqa/engine/talk_to_data/step_by_step_vanna.ipynb"
@@ -0,0 +1,3221 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading embeddings model: BAAI/bge-base-en-v1.5\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sys\n",
+ "import os\n",
+ "sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))\n",
+ "\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "from main import ask_vanna\n",
+ "import sqlite3\n",
+ "import os\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table_names_list = [\"Frequency_of_rainy_days_index\",\n",
+ "\"Winter_precipitation_total\",\n",
+ "\"Summer_precipitation_total\",\n",
+ "\"Annual_precipitation_total\",\n",
+ "\"Remarkable_daily_precipitation_total_(Q99)\",\n",
+ "\"Frequency_of_remarkable_daily_precipitation\",\n",
+ "\"Extreme_precipitation_intensity\",\n",
+ "\"Mean_winter_temperature\",\n",
+ "\"Mean_summer_temperature\",\n",
+ "\"Number_of_tropical_nights\",\n",
+ "\"Maximum_summer_temperature\",\n",
+ "\"Number_of_days_with_Tx_above_30C\",\n",
+ "\"Number_of_days_with_Tx_above_35C\",\n",
+ "\"Drought_index\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from climateqa.engine.llm import get_llm\n",
+ "\n",
+ "llm = get_llm(provider=\"openai\")\n",
+ "user_question = \"Quel sera la température à Marseille dans les prochaines années ?\"\n",
+ "prompt = f\"You are helping to build a sql query to retrieve relevant data for a user question. The different tables are {table_names_list}. The user question is {user_question}. Write the relevant table to query. Answer only the table name.\"\n",
+ "table_name = llm.invoke(prompt).content\n",
+ "# llm.invoke(f\"Make the following sql query display the source table in the rows {sql_query_new_coords}. Just answer the query. The answer should not include ```sql\\n\").content\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "docs = {\"Mean_summer_temperature\": {\n",
+ " \"description\": (\n",
+ " \"The Mean summer temperature table contains information on the average summer temperature in the past and the future. \"\n",
+ " \"The variables are as follows:\\n\"\n",
+ " \"- 'y' and 'x': Lambert Paris II coordinates for the location.\\n\"\n",
+ " \"- year: Year of the observation.\\n\"\n",
+ " \"- month : Month of the observation.\\n\"\n",
+ " \"- day: Day of the observation.\\n\"\n",
+ " \"- 'LambertParisII': Indicates that the x and y coordinates are in Lambert Paris II projection.\\n\"\n",
+ " \"- 'lat' and 'lon': Latitude and longitude of the location.\\n\"\n",
+ " \"- 'TMm': Average summer temperature.\\n\"\n",
+ " ),\n",
+ " \"sql_query\": \"\"\"\n",
+ " CREATE TABLE Mean_summer_temperature (\n",
+ " y FLOAT,\n",
+ " x FLOAT,\n",
+ " year INT,\n",
+ " month INT, \n",
+ " day INT,\n",
+ " LambertParisII VARCHAR(255),\n",
+ " lat FLOAT,\n",
+ " lon FLOAT,\n",
+ " TMm FLOAT, -- Température moyenne en été\n",
+ " );\n",
+ " \"\"\"}\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from climateqa.engine.talk_to_data.utils import loc2coords\n",
+ "location = \"Marseille\"\n",
+ "coords = loc2coords(location)\n",
+ "user_input = user_question.replace(location, f\"lat, long : {coords}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "initial_prompt = f\"You are a mysql expert. \" + \\\n",
+ " \"Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \"\n",
+ "initial_prompt += f\"\\n===Tables \\n + {docs[table_name]['sql_query']}\"\n",
+ "initial_prompt += f\"\\n===Additional Context \\n\\n {docs[table_name]['description']}\"\n",
+ "initial_prompt += (\n",
+ " \"===Response Guidelines \\n\"\n",
+ " \"1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n\"\n",
+ " \"2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n\"\n",
+ " \"3. If the provided context is insufficient, please give a sql query based on your knowledge and the context provided. \\n\"\n",
+ " \"4. Please use the most relevant table(s). \\n\"\n",
+ " \"5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n\"\n",
+ " f\"6. Ensure that the output SQL is mysql-compliant and executable, and free of syntax errors. \\n\"\n",
+ " )\n",
+ "initial_prompt += f\"\\n===Question \\n\\n {user_input}\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'```sql\\nSELECT year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525 AND year > YEAR(CURDATE());\\n```'"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_query = llm.invoke(initial_prompt).content\n",
+ "sql_query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Vanna Ask\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\nCREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT\\n);\\n\\nCREATE TABLE Mean_winter_temperature ( y FLOAT, x FLOAT, year INT, \\n month INT, \\n day INT \\n, LambertParisII VARCHAR(255), lat FLOAT, lon FLOAT, TMm FLOAT);\\n\\n\\n===Additional Context \\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the futureis greater than or equal to 35°C.The variables are as follows:- 'y' and 'x': Lambert Paris II coordinates for the location.- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.- 'lat' and 'lon': Latitude and longitude of the location.- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\nis greater than or equal to 35°C.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please give a sql query based on your knowledge and the context provided. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n",
+ "Using model gpt-4o-mini for 664.25 tokens (approx)\n",
+ "LLM Response: ```sql\n",
+ "SELECT year, month, day, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "```\n",
+ "Extracted SQL: SELECT year, month, day, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "Using model gpt-4o-mini for 204.75 tokens (approx)\n",
+ "execute sql query : SELECT 'Mean_winter_temperature' AS source_table, year, month, day, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.166954040527344 AND lon = 5.430534839630127;\n",
+ "Using model gpt-4o-mini for 159.25 tokens (approx)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "( 0 1 2 3\n",
+ " 0 2031 1 14 9.952474\n",
+ " 1 2031 1 14 9.952474\n",
+ " 2 2032 1 15 10.142323\n",
+ " 3 2032 1 15 10.142323\n",
+ " 4 2033 1 14 9.907943\n",
+ " 5 2033 1 14 9.907943\n",
+ " 6 2034 1 14 9.548874\n",
+ " 7 2034 1 14 9.548874\n",
+ " 8 2035 1 14 10.284758\n",
+ " 9 2035 1 14 10.284758\n",
+ " 10 2036 1 15 10.372100\n",
+ " 11 2036 1 15 10.372100\n",
+ " 12 2037 1 14 9.985710\n",
+ " 13 2037 1 14 9.985710\n",
+ " 14 2038 1 14 10.221372\n",
+ " 15 2038 1 14 10.221372\n",
+ " 16 2039 1 14 10.222609\n",
+ " 17 2039 1 14 10.222609\n",
+ " 18 2040 1 15 10.473663\n",
+ " 19 2040 1 15 10.473663\n",
+ " 20 2041 1 14 10.427641\n",
+ " 21 2041 1 14 10.427641\n",
+ " 22 2042 1 14 10.364736\n",
+ " 23 2042 1 14 10.364736\n",
+ " 24 2043 1 14 10.112911\n",
+ " 25 2043 1 14 10.112911\n",
+ " 26 2044 1 15 10.250792\n",
+ " 27 2044 1 15 10.250792\n",
+ " 28 2045 1 14 10.166119\n",
+ " 29 2045 1 14 10.166119\n",
+ " 30 2046 1 14 10.728998\n",
+ " 31 2046 1 14 10.728998\n",
+ " 32 2047 1 14 10.347249\n",
+ " 33 2047 1 14 10.347249\n",
+ " 34 2048 1 15 10.706604\n",
+ " 35 2048 1 15 10.706604\n",
+ " 36 2049 1 14 10.592438\n",
+ " 37 2049 1 14 10.592438\n",
+ " 38 2050 1 14 10.632255\n",
+ " 39 2050 1 14 10.632255,\n",
+ " Figure({\n",
+ " 'data': [{'name': 'Bar Chart',\n",
+ " 'type': 'bar',\n",
+ " 'x': array([2031, 2031, 2032, 2032, 2033, 2033, 2034, 2034, 2035, 2035, 2036, 2036,\n",
+ " 2037, 2037, 2038, 2038, 2039, 2039, 2040, 2040, 2041, 2041, 2042, 2042,\n",
+ " 2043, 2043, 2044, 2044, 2045, 2045, 2046, 2046, 2047, 2047, 2048, 2048,\n",
+ " 2049, 2049, 2050, 2050]),\n",
+ " 'y': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])},\n",
+ " {'mode': 'lines+markers',\n",
+ " 'name': 'Line Chart',\n",
+ " 'type': 'scatter',\n",
+ " 'x': array([2031, 2031, 2032, 2032, 2033, 2033, 2034, 2034, 2035, 2035, 2036, 2036,\n",
+ " 2037, 2037, 2038, 2038, 2039, 2039, 2040, 2040, 2041, 2041, 2042, 2042,\n",
+ " 2043, 2043, 2044, 2044, 2045, 2045, 2046, 2046, 2047, 2047, 2048, 2048,\n",
+ " 2049, 2049, 2050, 2050]),\n",
+ " 'y': array([ 9.95247412, 9.95247412, 10.14232294, 10.14232294, 9.90794294,\n",
+ " 9.90794294, 9.54887353, 9.54887353, 10.28475824, 10.28475824,\n",
+ " 10.3721 , 10.3721 , 9.98571 , 9.98571 , 10.22137235,\n",
+ " 10.22137235, 10.22260941, 10.22260941, 10.47366294, 10.47366294,\n",
+ " 10.42764059, 10.42764059, 10.36473647, 10.36473647, 10.11291059,\n",
+ " 10.11291059, 10.25079235, 10.25079235, 10.16611941, 10.16611941,\n",
+ " 10.72899765, 10.72899765, 10.34724882, 10.34724882, 10.70660412,\n",
+ " 10.70660412, 10.59243765, 10.59243765, 10.63225529, 10.63225529])}],\n",
+ " 'layout': {'template': '...'}\n",
+ " }))"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res = ask_vanna(\"Quelle sera la température à Marseille sur les prochaines années ?\")\n",
+ "res"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2031 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2031 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2032 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2032 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2033 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.907943 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 2033 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.907943 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 2034 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.548874 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 2034 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.548874 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 2035 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.284758 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 2035 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.284758 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 2036 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.372100 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 2036 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.372100 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 2037 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.985710 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 2037 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.985710 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 2038 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.221372 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 2038 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.221372 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 2039 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.222609 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 2039 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.222609 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 2040 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.473663 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 2040 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.473663 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 2041 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.427641 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 2041 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.427641 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 2042 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.364736 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 2042 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.364736 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 2043 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.112911 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 2043 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.112911 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 2044 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.250792 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 2044 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.250792 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 2045 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.166119 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 2045 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.166119 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 2046 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.728998 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 2046 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.728998 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 2047 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.347249 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 2047 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.347249 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 2048 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.706604 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 2048 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.706604 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 2049 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.592438 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 2049 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.592438 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 2050 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.632255 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 2050 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.632255 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3\n",
+ "0 2031 1 14 9.952474\n",
+ "1 2031 1 14 9.952474\n",
+ "2 2032 1 15 10.142323\n",
+ "3 2032 1 15 10.142323\n",
+ "4 2033 1 14 9.907943\n",
+ "5 2033 1 14 9.907943\n",
+ "6 2034 1 14 9.548874\n",
+ "7 2034 1 14 9.548874\n",
+ "8 2035 1 14 10.284758\n",
+ "9 2035 1 14 10.284758\n",
+ "10 2036 1 15 10.372100\n",
+ "11 2036 1 15 10.372100\n",
+ "12 2037 1 14 9.985710\n",
+ "13 2037 1 14 9.985710\n",
+ "14 2038 1 14 10.221372\n",
+ "15 2038 1 14 10.221372\n",
+ "16 2039 1 14 10.222609\n",
+ "17 2039 1 14 10.222609\n",
+ "18 2040 1 15 10.473663\n",
+ "19 2040 1 15 10.473663\n",
+ "20 2041 1 14 10.427641\n",
+ "21 2041 1 14 10.427641\n",
+ "22 2042 1 14 10.364736\n",
+ "23 2042 1 14 10.364736\n",
+ "24 2043 1 14 10.112911\n",
+ "25 2043 1 14 10.112911\n",
+ "26 2044 1 15 10.250792\n",
+ "27 2044 1 15 10.250792\n",
+ "28 2045 1 14 10.166119\n",
+ "29 2045 1 14 10.166119\n",
+ "30 2046 1 14 10.728998\n",
+ "31 2046 1 14 10.728998\n",
+ "32 2047 1 14 10.347249\n",
+ "33 2047 1 14 10.347249\n",
+ "34 2048 1 15 10.706604\n",
+ "35 2048 1 15 10.706604\n",
+ "36 2049 1 14 10.592438\n",
+ "37 2049 1 14 10.592438\n",
+ "38 2050 1 14 10.632255\n",
+ "39 2050 1 14 10.632255"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading embeddings model: BAAI/bge-base-en-v1.5\n"
+ ]
+ }
+ ],
+ "source": [
+ "from climateqa.engine.talk_to_data.myVanna import MyVanna\n",
+ "from climateqa.engine.talk_to_data.utils import loc2coords, detect_location_with_openai, detectTable, nearestNeighbourSQL\n",
+ "\n",
+ "from climateqa.engine.llm import get_llm\n",
+ "\n",
+ "from dotenv import load_dotenv\n",
+ "\n",
+ "load_dotenv()\n",
+ "\n",
+ "llm = get_llm(provider=\"openai\")\n",
+ "\n",
+ "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
+ "PC_API_KEY = os.getenv('VANNA_PINECONE_API_KEY')\n",
+ "INDEX_NAME = os.getenv('VANNA_INDEX_NAME')\n",
+ "VANNA_MODEL = os.getenv('VANNA_MODEL')\n",
+ "\n",
+ "\n",
+ "#Vanna object\n",
+ "vn = MyVanna(config = {\"temperature\": 0, \"api_key\": OPENAI_API_KEY, 'model': VANNA_MODEL, 'pc_api_key': PC_API_KEY, 'index_name': INDEX_NAME, \"top_k\" : 5})\n",
+ "db_vanna_path = \"database/drias.db\"\n",
+ "vn.connect_to_sqlite(db_vanna_path)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query = \"Quelle sera la température à Marseille sur les prochaines années ?\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Detect location"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Marseille\n"
+ ]
+ }
+ ],
+ "source": [
+ "location = detect_location_with_openai(OPENAI_API_KEY, query)\n",
+ "print(location)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Convert location to longitude, latitude coordonate"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?\n"
+ ]
+ }
+ ],
+ "source": [
+ "coords = loc2coords(location)\n",
+ "user_input = query.replace(location, f\"lat, long : {coords}\")\n",
+ "print(user_input)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Query DRIAS on this location"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_input"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\nCREATE TABLE Mean_winter_temperature ( y FLOAT, x FLOAT, year INT, \\n month INT, \\n day INT \\n, LambertParisII VARCHAR(255), lat FLOAT, lon FLOAT, TMm FLOAT);\\n\\nCREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT\\n);\\n\\nCREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT\\n);\\n\\nCREATE TABLE Mean_summer_temperature ( y FLOAT, x FLOAT, year INT, \\n month INT, \\n day INT \\n, LambertParisII VARCHAR(255), lat FLOAT, lon FLOAT, TMm FLOAT);\\n\\nCREATE TABLE Maximum_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TXm FLOAT\\n);\\n\\n\\n===Additional Context \\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the futureis greater than or equal to 35°C.The variables are as follows:- 'y' and 'x': Lambert Paris II coordinates for the location.- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.- 'lat' and 'lon': Latitude and longitude of the location.- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\nis greater than or equal to 35°C.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\nThe Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the futureis greater than or equal to 30°C.The variables are as follows:- 'y' and 'x': Lambert Paris II coordinates for the location.- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.- 'lat' and 'lon': Latitude and longitude of the location.- 'TX30D': Number of days with Tx ≥ 30°C.\\n\\nThe Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\nis greater than or equal to 30°C.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TX30D': Number of days with Tx ≥ 30°C.\\n\\nThe Mean winter temperature table contains information on the average (mean) winter temperature in the past and the future.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TMm': Average winter temperature.\\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please give a sql query based on your knowledge and the context provided. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n",
+ "Using model gpt-4o-mini for 1235.0 tokens (approx)\n",
+ "LLM Response: ```sql\n",
+ "SELECT year, month, day, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "```\n",
+ "Extracted SQL: SELECT year, month, day, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "Using model gpt-4o-mini for 204.75 tokens (approx)\n",
+ "\n",
+ "SQL QUERY : SELECT year, month, day, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525;\n"
+ ]
+ }
+ ],
+ "source": [
+ "sql_query, result_dataframe, figure = vn.ask(user_input, print_results=False, allow_llm_to_see_data=True)\n",
+ "# sql_query, result_dataframe, figure = vn.ask(approx_user_input, print_results=False, allow_llm_to_see_data=True)\n",
+ "print(\"\\nSQL QUERY :\", sql_query)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " y | \n",
+ " x | \n",
+ " LambertParisII | \n",
+ " lat | \n",
+ " lon | \n",
+ " TMm | \n",
+ " year | \n",
+ " month | \n",
+ " day | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1801000.0 | \n",
+ " 852000.0 | \n",
+ " 0.999877 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 9.952474 | \n",
+ " 2031 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1801000.0 | \n",
+ " 852000.0 | \n",
+ " 0.999877 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 9.952474 | \n",
+ " 2031 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1801000.0 | \n",
+ " 852000.0 | \n",
+ " 0.999877 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 10.142323 | \n",
+ " 2032 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1801000.0 | \n",
+ " 852000.0 | \n",
+ " 0.999877 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 10.142323 | \n",
+ " 2032 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1801000.0 | \n",
+ " 852000.0 | \n",
+ " 0.999877 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 9.907943 | \n",
+ " 2033 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " y x LambertParisII lat lon TMm year \\\n",
+ "0 1801000.0 852000.0 0.999877 43.166954 5.430535 9.952474 2031 \n",
+ "1 1801000.0 852000.0 0.999877 43.166954 5.430535 9.952474 2031 \n",
+ "2 1801000.0 852000.0 0.999877 43.166954 5.430535 10.142323 2032 \n",
+ "3 1801000.0 852000.0 0.999877 43.166954 5.430535 10.142323 2032 \n",
+ "4 1801000.0 852000.0 0.999877 43.166954 5.430535 9.907943 2033 \n",
+ "\n",
+ " month day \n",
+ "0 1 14 \n",
+ "1 1 14 \n",
+ "2 1 15 \n",
+ "3 1 15 \n",
+ "4 1 14 "
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# vn.run_sql(\"SELECT * FROM Mean_winter_temperature WHERE lat = 43.166954040527344 AND lon = 5.430534839630127 LIMIT 5;\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Replace coordinate to existing coordinate in the table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Mean_winter_temperature']\n"
+ ]
+ }
+ ],
+ "source": [
+ "table = detectTable(sql_query)\n",
+ "print(table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[(43.166954040527344, 5.430534839630127)]"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, table[i]) for i in range(len(table))]\n",
+ "coords_tables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'SELECT year, month, day, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127;'"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n = sql_query.count(str(coords[0]))\n",
+ "sql_query_new_coords = sql_query\n",
+ "\n",
+ "for i in range(n):\n",
+ " sql_query_new_coords = sql_query_new_coords.replace(str(coords[0]), str(coords_tables[i][0]),1)\n",
+ " sql_query_new_coords = sql_query_new_coords.replace(str(coords[1]), str(coords_tables[i][1]),1)\n",
+ "sql_query_new_coords"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# sql_with_table_names = llm.invoke(f\"Make the following sql query display the source table in the rows {sql_query_new_coords}. Just answer the query. The answer should not include ```sql\\n\").content\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Query back the DB"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sql_query_new_coords = sql_query.replace(f\"{coords[0]}\", f\"{coords2[0]}\").replace(f\"{coords[1]}\", f\"{coords2[1]}\").replace(\"\\n\", \"\")\n",
+ "# sql_query_new_coords"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'SELECT year, month, day, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525;'"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# res_new = vn.ask('SELECT year, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525\\nUNION ALL\\nSELECT year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525;')\n",
+ "# res_new"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sql_query_new_coords = 'SELECT \"Mean_winter_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127\\nUNION ALL\\nSELECT \"Mean_summer_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127;'\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'year, month, day, TMm'"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_query_new_coords.split(\"SELECT\")[1].split(\"FROM\")[0].strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " lat | \n",
+ " lon | \n",
+ " year | \n",
+ " month | \n",
+ " day | \n",
+ " TMm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2031 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2031 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2032 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2032 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2033 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.907943 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2033 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.907943 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2034 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.548874 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2034 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.548874 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2035 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.284758 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2035 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.284758 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2036 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.372100 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2036 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.372100 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2037 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.985710 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2037 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 9.985710 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2038 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.221372 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2038 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.221372 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2039 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.222609 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2039 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.222609 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2040 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.473663 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2040 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.473663 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2041 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.427641 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2041 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.427641 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2042 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.364736 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2042 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.364736 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2043 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.112911 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2043 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.112911 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2044 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.250792 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2044 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.250792 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2045 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.166119 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2045 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.166119 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2046 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.728998 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2046 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.728998 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2047 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.347249 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2047 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.347249 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2048 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.706604 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2048 | \n",
+ " 1 | \n",
+ " 15 | \n",
+ " 10.706604 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2049 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.592438 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2049 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.592438 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2050 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.632255 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2050 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 10.632255 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lat lon year month day TMm\n",
+ "0 43.166954 5.430535 2031 1 14 9.952474\n",
+ "1 43.166954 5.430535 2031 1 14 9.952474\n",
+ "2 43.166954 5.430535 2032 1 15 10.142323\n",
+ "3 43.166954 5.430535 2032 1 15 10.142323\n",
+ "4 43.166954 5.430535 2033 1 14 9.907943\n",
+ "5 43.166954 5.430535 2033 1 14 9.907943\n",
+ "6 43.166954 5.430535 2034 1 14 9.548874\n",
+ "7 43.166954 5.430535 2034 1 14 9.548874\n",
+ "8 43.166954 5.430535 2035 1 14 10.284758\n",
+ "9 43.166954 5.430535 2035 1 14 10.284758\n",
+ "10 43.166954 5.430535 2036 1 15 10.372100\n",
+ "11 43.166954 5.430535 2036 1 15 10.372100\n",
+ "12 43.166954 5.430535 2037 1 14 9.985710\n",
+ "13 43.166954 5.430535 2037 1 14 9.985710\n",
+ "14 43.166954 5.430535 2038 1 14 10.221372\n",
+ "15 43.166954 5.430535 2038 1 14 10.221372\n",
+ "16 43.166954 5.430535 2039 1 14 10.222609\n",
+ "17 43.166954 5.430535 2039 1 14 10.222609\n",
+ "18 43.166954 5.430535 2040 1 15 10.473663\n",
+ "19 43.166954 5.430535 2040 1 15 10.473663\n",
+ "20 43.166954 5.430535 2041 1 14 10.427641\n",
+ "21 43.166954 5.430535 2041 1 14 10.427641\n",
+ "22 43.166954 5.430535 2042 1 14 10.364736\n",
+ "23 43.166954 5.430535 2042 1 14 10.364736\n",
+ "24 43.166954 5.430535 2043 1 14 10.112911\n",
+ "25 43.166954 5.430535 2043 1 14 10.112911\n",
+ "26 43.166954 5.430535 2044 1 15 10.250792\n",
+ "27 43.166954 5.430535 2044 1 15 10.250792\n",
+ "28 43.166954 5.430535 2045 1 14 10.166119\n",
+ "29 43.166954 5.430535 2045 1 14 10.166119\n",
+ "30 43.166954 5.430535 2046 1 14 10.728998\n",
+ "31 43.166954 5.430535 2046 1 14 10.728998\n",
+ "32 43.166954 5.430535 2047 1 14 10.347249\n",
+ "33 43.166954 5.430535 2047 1 14 10.347249\n",
+ "34 43.166954 5.430535 2048 1 15 10.706604\n",
+ "35 43.166954 5.430535 2048 1 15 10.706604\n",
+ "36 43.166954 5.430535 2049 1 14 10.592438\n",
+ "37 43.166954 5.430535 2049 1 14 10.592438\n",
+ "38 43.166954 5.430535 2050 1 14 10.632255\n",
+ "39 43.166954 5.430535 2050 1 14 10.632255"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "db = sqlite3.connect(db_vanna_path)\n",
+ "# result = db.cursor().execute(sql_with_table_names).fetchall()\n",
+ "result = db.cursor().execute(sql_query_new_coords.replace(\"SELECT\", \"SELECT lat, lon,\")).fetchall()\n",
+ "column_names = [\"lat\",\"lon\"] + sql_query_new_coords.split(\"SELECT\")[1].split(\"FROM\")[0].strip().split(\",\")\n",
+ "df = pd.DataFrame(result, columns=column_names)\n",
+ "# df = pd.DataFrame(result, columns=list(result_dataframe.columns))\n",
+ "\n",
+ "\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Data | \n",
+ " lat | \n",
+ " lon | \n",
+ " year | \n",
+ " TMm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2031 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2031 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2032 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2032 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2033 | \n",
+ " 9.907943 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2048 | \n",
+ " 25.187577 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2049 | \n",
+ " 24.829654 | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2049 | \n",
+ " 24.829654 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2050 | \n",
+ " 25.053394 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2050 | \n",
+ " 25.053394 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
80 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Data lat lon year TMm\n",
+ "0 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n",
+ "1 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n",
+ "2 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n",
+ "3 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n",
+ "4 Mean_winter_temperature 43.166954 5.430535 2033 9.907943\n",
+ ".. ... ... ... ... ...\n",
+ "75 Mean_summer_temperature 43.166954 5.430535 2048 25.187577\n",
+ "76 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n",
+ "77 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n",
+ "78 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n",
+ "79 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n",
+ "\n",
+ "[80 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_query_new_coords = 'SELECT \"Mean_winter_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127\\nUNION ALL\\nSELECT \"Mean_summer_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127;'\n",
+ "\n",
+ "db = sqlite3.connect(db_vanna_path)\n",
+ "result = db.cursor().execute(sql_query_new_coords).fetchall()\n",
+ "# result = db.cursor().execute(sql_query_new_coords.replace(\"SELECT\", \"SELECT lat, lon,\")).fetchall()\n",
+ "df = pd.DataFrame(result, columns = [\"Data\",\"lat\",\"lon\",\"year\",\"TMm\"])\n",
+ "\n",
+ "\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Using model gpt-4o-mini for 258.0 tokens (approx)\n"
+ ]
+ }
+ ],
+ "source": [
+ "plotly_code = vn.generate_plotly_code(\n",
+ " question=query,\n",
+ " sql=sql_query_new_coords,\n",
+ " df_metadata=f\"Running df.dtypes gives:\\n {df.dtypes}\",\n",
+ " )\n",
+ "\n",
+ "fig = vn.get_plotly_figure(plotly_code=plotly_code, df=df)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Data | \n",
+ " lat | \n",
+ " lon | \n",
+ " year | \n",
+ " TMm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2031 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2031 | \n",
+ " 9.952474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2032 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2032 | \n",
+ " 10.142323 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Mean_winter_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2033 | \n",
+ " 9.907943 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2048 | \n",
+ " 25.187577 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2049 | \n",
+ " 24.829654 | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2049 | \n",
+ " 24.829654 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2050 | \n",
+ " 25.053394 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " Mean_summer_temperature | \n",
+ " 43.166954 | \n",
+ " 5.430535 | \n",
+ " 2050 | \n",
+ " 25.053394 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
80 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Data lat lon year TMm\n",
+ "0 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n",
+ "1 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n",
+ "2 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n",
+ "3 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n",
+ "4 Mean_winter_temperature 43.166954 5.430535 2033 9.907943\n",
+ ".. ... ... ... ... ...\n",
+ "75 Mean_summer_temperature 43.166954 5.430535 2048 25.187577\n",
+ "76 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n",
+ "77 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n",
+ "78 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n",
+ "79 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n",
+ "\n",
+ "[80 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "hovertemplate": "Data=Mean_winter_temperature
year=%{x}
TMm=%{y}",
+ "legendgroup": "Mean_winter_temperature",
+ "line": {
+ "color": "#636efa",
+ "dash": "solid"
+ },
+ "marker": {
+ "symbol": "circle"
+ },
+ "mode": "lines",
+ "name": "Mean_winter_temperature",
+ "orientation": "v",
+ "showlegend": true,
+ "type": "scatter",
+ "x": [
+ 2031,
+ 2031,
+ 2032,
+ 2032,
+ 2033,
+ 2033,
+ 2034,
+ 2034,
+ 2035,
+ 2035,
+ 2036,
+ 2036,
+ 2037,
+ 2037,
+ 2038,
+ 2038,
+ 2039,
+ 2039,
+ 2040,
+ 2040,
+ 2041,
+ 2041,
+ 2042,
+ 2042,
+ 2043,
+ 2043,
+ 2044,
+ 2044,
+ 2045,
+ 2045,
+ 2046,
+ 2046,
+ 2047,
+ 2047,
+ 2048,
+ 2048,
+ 2049,
+ 2049,
+ 2050,
+ 2050
+ ],
+ "xaxis": "x",
+ "y": [
+ 9.952474117647114,
+ 9.952474117647114,
+ 10.142322941176474,
+ 10.142322941176474,
+ 9.907942941176486,
+ 9.907942941176486,
+ 9.548873529411765,
+ 9.548873529411765,
+ 10.284758235294191,
+ 10.284758235294191,
+ 10.372100000000046,
+ 10.372100000000046,
+ 9.98571000000004,
+ 9.98571000000004,
+ 10.221372352941216,
+ 10.221372352941216,
+ 10.222609411764722,
+ 10.222609411764722,
+ 10.473662941176485,
+ 10.473662941176485,
+ 10.427640588235306,
+ 10.427640588235306,
+ 10.364736470588241,
+ 10.364736470588241,
+ 10.112910588235309,
+ 10.112910588235309,
+ 10.250792352941176,
+ 10.250792352941176,
+ 10.166119411764669,
+ 10.166119411764669,
+ 10.728997647058861,
+ 10.728997647058861,
+ 10.347248823529412,
+ 10.347248823529412,
+ 10.706604117647089,
+ 10.706604117647089,
+ 10.59243764705883,
+ 10.59243764705883,
+ 10.63225529411767,
+ 10.63225529411767
+ ],
+ "yaxis": "y"
+ },
+ {
+ "hovertemplate": "Data=Mean_summer_temperature
year=%{x}
TMm=%{y}",
+ "legendgroup": "Mean_summer_temperature",
+ "line": {
+ "color": "#EF553B",
+ "dash": "solid"
+ },
+ "marker": {
+ "symbol": "circle"
+ },
+ "mode": "lines",
+ "name": "Mean_summer_temperature",
+ "orientation": "v",
+ "showlegend": true,
+ "type": "scatter",
+ "x": [
+ 2031,
+ 2031,
+ 2032,
+ 2032,
+ 2033,
+ 2033,
+ 2034,
+ 2034,
+ 2035,
+ 2035,
+ 2036,
+ 2036,
+ 2037,
+ 2037,
+ 2038,
+ 2038,
+ 2039,
+ 2039,
+ 2040,
+ 2040,
+ 2041,
+ 2041,
+ 2042,
+ 2042,
+ 2043,
+ 2043,
+ 2044,
+ 2044,
+ 2045,
+ 2045,
+ 2046,
+ 2046,
+ 2047,
+ 2047,
+ 2048,
+ 2048,
+ 2049,
+ 2049,
+ 2050,
+ 2050
+ ],
+ "xaxis": "x",
+ "y": [
+ 24.061035294117687,
+ 24.061035294117687,
+ 24.530692941176483,
+ 24.530692941176483,
+ 24.722234705882386,
+ 24.722234705882386,
+ 23.84629176470588,
+ 23.84629176470588,
+ 24.231422352941195,
+ 24.231422352941195,
+ 24.488941764705885,
+ 24.488941764705885,
+ 24.79424117647062,
+ 24.79424117647062,
+ 24.730553529411793,
+ 24.730553529411793,
+ 24.44979882352942,
+ 24.44979882352942,
+ 24.40726882352942,
+ 24.40726882352942,
+ 24.768547647058824,
+ 24.768547647058824,
+ 24.53479647058822,
+ 24.53479647058822,
+ 24.769181176470624,
+ 24.769181176470624,
+ 24.489877058823538,
+ 24.489877058823538,
+ 24.448076470588262,
+ 24.448076470588262,
+ 25.111282352941203,
+ 25.111282352941203,
+ 24.72313823529413,
+ 24.72313823529413,
+ 25.187577058823535,
+ 25.187577058823535,
+ 24.829653529411814,
+ 24.829653529411814,
+ 25.053394117647144,
+ 25.053394117647144
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "legend": {
+ "title": {
+ "text": "Data"
+ },
+ "tracegroupgap": 0
+ },
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#f2f5fa"
+ },
+ "error_y": {
+ "color": "#f2f5fa"
+ },
+ "marker": {
+ "line": {
+ "color": "rgb(17,17,17)",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "rgb(17,17,17)",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#A2B1C6",
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "minorgridcolor": "#506784",
+ "startlinecolor": "#A2B1C6"
+ },
+ "baxis": {
+ "endlinecolor": "#A2B1C6",
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "minorgridcolor": "#506784",
+ "startlinecolor": "#A2B1C6"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "marker": {
+ "line": {
+ "color": "#283442"
+ }
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "line": {
+ "color": "#283442"
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#506784"
+ },
+ "line": {
+ "color": "rgb(17,17,17)"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#2a3f5f"
+ },
+ "line": {
+ "color": "rgb(17,17,17)"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#f2f5fa",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#f2f5fa"
+ },
+ "geo": {
+ "bgcolor": "rgb(17,17,17)",
+ "lakecolor": "rgb(17,17,17)",
+ "landcolor": "rgb(17,17,17)",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "#506784"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "dark"
+ },
+ "paper_bgcolor": "rgb(17,17,17)",
+ "plot_bgcolor": "rgb(17,17,17)",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ },
+ "bgcolor": "rgb(17,17,17)",
+ "radialaxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "rgb(17,17,17)",
+ "gridcolor": "#506784",
+ "gridwidth": 2,
+ "linecolor": "#506784",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "#C8D4E3"
+ },
+ "yaxis": {
+ "backgroundcolor": "rgb(17,17,17)",
+ "gridcolor": "#506784",
+ "gridwidth": 2,
+ "linecolor": "#506784",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "#C8D4E3"
+ },
+ "zaxis": {
+ "backgroundcolor": "rgb(17,17,17)",
+ "gridcolor": "#506784",
+ "gridwidth": 2,
+ "linecolor": "#506784",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "#C8D4E3"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#f2f5fa"
+ }
+ },
+ "sliderdefaults": {
+ "bgcolor": "#C8D4E3",
+ "bordercolor": "rgb(17,17,17)",
+ "borderwidth": 1,
+ "tickwidth": 0
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ },
+ "bgcolor": "rgb(17,17,17)",
+ "caxis": {
+ "gridcolor": "#506784",
+ "linecolor": "#506784",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "updatemenudefaults": {
+ "bgcolor": "#506784",
+ "borderwidth": 0
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "#283442",
+ "linecolor": "#506784",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "#283442",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "#283442",
+ "linecolor": "#506784",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "#283442",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "Mean Temperature in Marseille Over the Years"
+ },
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "year"
+ }
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "TMm"
+ }
+ }
+ }
+ },
+ "text/html": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Whole Vanna workflow"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def replace_coordonates(coords, sql_query, coords_tables):\n",
+ " n = sql_query.count(str(coords[0]))\n",
+ " sql_query_new_coords = sql_query\n",
+ "\n",
+ " for i in range(n):\n",
+ " sql_query_new_coords = sql_query_new_coords.replace(str(coords[0]), str(coords_tables[i][0]),1)\n",
+ " sql_query_new_coords = sql_query_new_coords.replace(str(coords[1]), str(coords_tables[i][1]),1)\n",
+ " return sql_query_new_coords"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def ask_vanna(query):\n",
+ " location = detect_location_with_openai(OPENAI_API_KEY, query)\n",
+ " coords = loc2coords(location)\n",
+ " user_input = query.replace(location, f\"lat, long : {coords}\")\n",
+ " sql_query, result_dataframe, figure = vn.ask(user_input, print_results=False, allow_llm_to_see_data=True)\n",
+ " table = detectTable(sql_query)\n",
+ " coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, table[i]) for i in range(len(table))]\n",
+ " sql_query_new_coords = replace_coordonates(coords, sql_query, coords_tables)\n",
+ " sql_with_table_names = llm.invoke(f\"Make the following sql query display the source table in the rows {sql_query_new_coords}. Just answer the query. The answer should not include ```sql\\n\").content\n",
+ " db = sqlite3.connect(db_vanna_path)\n",
+ " result = db.cursor().execute(sql_with_table_names).fetchall()\n",
+ " df = pd.DataFrame(result, columns=[\"data_name\"] + list(result_dataframe.columns))\n",
+ " \n",
+ " plotly_code = vn.generate_plotly_code(\n",
+ " question=\"query\",\n",
+ " sql=\"sql_with_table_names\",\n",
+ " df_metadata=f\"Running df.dtypes gives:\\n {df.dtypes}\",\n",
+ " )\n",
+ "\n",
+ " fig = vn.get_plotly_figure(plotly_code=plotly_code, df=df)\n",
+ "\n",
+ " return df, fig\n",
+ "\n",
+ "query = \"Quelle sera la température à Marseille sur les prochaines années ?\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\n\\n CREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en hiver\\n );\\n \\n\\n\\n CREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en été\\n );\\n \\n\\n\\n===Additional Context \\n\\n\\n The Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 35°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX35D': Number of days with Tx ≥ 35°C.\\n \\n\\n\\n The Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 30°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX30D': Number of days with Tx ≥ 30°C.\\n \\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please explain why it can't be generated. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n",
+ "Using model gpt-4o-mini for 828.0 tokens (approx)\n",
+ "LLM Response: intermediate_sql\n",
+ "```sql\n",
+ "SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\n",
+ "UNION\n",
+ "SELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "```\n",
+ "Extracted SQL: SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\n",
+ "UNION\n",
+ "SELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "Running Intermediate SQL: SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\n",
+ "UNION\n",
+ "SELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "Final SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\n\\n CREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en hiver\\n );\\n \\n\\n\\n CREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en été\\n );\\n \\n\\n\\n===Additional Context \\n\\n\\n The Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 35°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX35D': Number of days with Tx ≥ 35°C.\\n \\n\\n\\n The Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 30°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX30D': Number of days with Tx ≥ 30°C.\\n \\n\\nThe following is a pandas DataFrame with the results of the intermediate SQL query SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\\nUNION\\nSELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;: \\n| year |\\n|--------|\\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please explain why it can't be generated. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n",
+ "Using model gpt-4o-mini for 903.25 tokens (approx)\n",
+ "LLM Response: La context fourni ne contient pas d'informations sur les prévisions de température pour les prochaines années. Par conséquent, je ne peux pas générer une requête SQL pour répondre à cette question.\n",
+ "Couldn't run sql: Execution failed on sql 'La context fourni ne contient pas d'informations sur les prévisions de température pour les prochaines années. Par conséquent, je ne peux pas générer une requête SQL pour répondre à cette question.': near \"La\": syntax error\n",
+ "execute sql query : To display the source table in the rows, you can use a simple SELECT statement. For example:\n",
+ "\n",
+ "SELECT * FROM your_table_name; \n",
+ "\n",
+ "Replace \"your_table_name\" with the actual name of your source table. This query will retrieve all columns and rows from the specified table.\n"
+ ]
+ },
+ {
+ "ename": "OperationalError",
+ "evalue": "near \"To\": syntax error",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mOperationalError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df, fig \u001b[38;5;241m=\u001b[39m \u001b[43mask_vanna\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(df)\n\u001b[1;32m 3\u001b[0m fig\n",
+ "File \u001b[0;32m~/ai4s/climate_qa/climate-question-answering/climateqa/engine/talk_to_data/main.py:71\u001b[0m, in \u001b[0;36mask_vanna\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexecute sql query : \u001b[39m\u001b[38;5;124m\"\u001b[39m, sql_with_table_names)\n\u001b[1;32m 70\u001b[0m db \u001b[38;5;241m=\u001b[39m sqlite3\u001b[38;5;241m.\u001b[39mconnect(db_vanna_path)\n\u001b[0;32m---> 71\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcursor\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_with_table_names\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mfetchall()\n\u001b[1;32m 72\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(result, columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_name\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(result_dataframe\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[1;32m 74\u001b[0m plotly_code \u001b[38;5;241m=\u001b[39m vn\u001b[38;5;241m.\u001b[39mgenerate_plotly_code(\n\u001b[1;32m 75\u001b[0m question\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 76\u001b[0m sql\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msql_with_table_names\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 77\u001b[0m df_metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRunning df.dtypes gives:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdf\u001b[38;5;241m.\u001b[39mdtypes\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 78\u001b[0m )\n",
+ "\u001b[0;31mOperationalError\u001b[0m: near \"To\": syntax error"
+ ]
+ }
+ ],
+ "source": [
+ "df, fig = ask_vanna(query)\n",
+ "print(df)\n",
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\n\\n CREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en hiver\\n );\\n \\n\\n\\n CREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en été\\n );\\n \\n\\n\\n===Additional Context \\n\\n\\n The Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 35°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX35D': Number of days with Tx ≥ 35°C.\\n \\n\\n\\n The Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 30°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX30D': Number of days with Tx ≥ 30°C.\\n \\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please explain why it can't be generated. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n",
+ "Using model gpt-4o-mini for 828.0 tokens (approx)\n",
+ "LLM Response: ```sql\n",
+ "SELECT year, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525\n",
+ "UNION ALL\n",
+ "SELECT year, TMm \n",
+ "FROM Mean_summer_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "```\n",
+ "Extracted SQL: SELECT year, TMm \n",
+ "FROM Mean_winter_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525\n",
+ "UNION ALL\n",
+ "SELECT year, TMm \n",
+ "FROM Mean_summer_temperature \n",
+ "WHERE lat = 43.2961743 AND lon = 5.3699525;\n",
+ "Using model gpt-4o-mini for 218.5 tokens (approx)\n",
+ "[(2031, 9.952474117647114), (2031, 9.952474117647114), (2032, 10.142322941176474), (2032, 10.142322941176474), (2033, 9.907942941176486), (2033, 9.907942941176486), (2034, 9.548873529411765), (2034, 9.548873529411765), (2035, 10.284758235294191), (2035, 10.284758235294191), (2036, 10.372100000000046), (2036, 10.372100000000046), (2037, 9.98571000000004), (2037, 9.98571000000004), (2038, 10.221372352941216), (2038, 10.221372352941216), (2039, 10.222609411764722), (2039, 10.222609411764722), (2040, 10.473662941176485), (2040, 10.473662941176485), (2041, 10.427640588235306), (2041, 10.427640588235306), (2042, 10.364736470588241), (2042, 10.364736470588241), (2043, 10.112910588235309), (2043, 10.112910588235309), (2044, 10.250792352941176), (2044, 10.250792352941176), (2045, 10.166119411764669), (2045, 10.166119411764669), (2046, 10.728997647058861), (2046, 10.728997647058861), (2047, 10.347248823529412), (2047, 10.347248823529412), (2048, 10.706604117647089), (2048, 10.706604117647089), (2049, 10.59243764705883), (2049, 10.59243764705883), (2050, 10.63225529411767), (2050, 10.63225529411767), (2031, 24.061035294117687), (2031, 24.061035294117687), (2032, 24.530692941176483), (2032, 24.530692941176483), (2033, 24.722234705882386), (2033, 24.722234705882386), (2034, 23.84629176470588), (2034, 23.84629176470588), (2035, 24.231422352941195), (2035, 24.231422352941195), (2036, 24.488941764705885), (2036, 24.488941764705885), (2037, 24.79424117647062), (2037, 24.79424117647062), (2038, 24.730553529411793), (2038, 24.730553529411793), (2039, 24.44979882352942), (2039, 24.44979882352942), (2040, 24.40726882352942), (2040, 24.40726882352942), (2041, 24.768547647058824), (2041, 24.768547647058824), (2042, 24.53479647058822), (2042, 24.53479647058822), (2043, 24.769181176470624), (2043, 24.769181176470624), (2044, 24.489877058823538), (2044, 24.489877058823538), (2045, 24.448076470588262), (2045, 24.448076470588262), (2046, 25.111282352941203), (2046, 25.111282352941203), (2047, 24.72313823529413), (2047, 24.72313823529413), (2048, 25.187577058823535), (2048, 25.187577058823535), (2049, 24.829653529411814), (2049, 24.829653529411814), (2050, 25.053394117647144), (2050, 25.053394117647144)]\n"
+ ]
+ }
+ ],
+ "source": [
+ "location = detect_location_with_openai(OPENAI_API_KEY, query)\n",
+ "if location:\n",
+ " coords = loc2coords(location)\n",
+ " user_input = query.replace(location, f\"lat, long : {coords}\")\n",
+ " \n",
+ " answer = vn.ask(user_input, print_results=False, allow_llm_to_see_data=True)\n",
+ " table = detectTable(answer[0])\n",
+ " \n",
+ " coords2 = nearestNeighbourSQL(db_vanna_path, coords, table[0])\n",
+ "\n",
+ " query = answer[0].replace(f\"{coords[0]}\", f\"{coords2[0]}\")\n",
+ " sql_query = query.replace(f\"{coords[1]}\", f\"{coords2[1]}\")\n",
+ "\n",
+ " db = sqlite3.connect(db_vanna_path)\n",
+ " result = db.cursor().execute(sql_query).fetchall()\n",
+ " print(result)\n",
+ " df = pd.DataFrame(result, columns=answer[1].columns)\n",
+ " \n",
+ "else: \n",
+ " answer = vn.ask(query, visualize=True, print_results=False, allow_llm_to_see_data=True)\n",
+ " sql_query = answer[0]\n",
+ " df = answer[1]\n",
+ " "
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "climateqa",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}