diff --git "a/climateqa/engine/talk_to_data/step_by_step_vanna.ipynb" "b/climateqa/engine/talk_to_data/step_by_step_vanna.ipynb" new file mode 100644--- /dev/null +++ "b/climateqa/engine/talk_to_data/step_by_step_vanna.ipynb" @@ -0,0 +1,3221 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading embeddings model: BAAI/bge-base-en-v1.5\n" + ] + } + ], + "source": [ + "import sys\n", + "import os\n", + "sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from main import ask_vanna\n", + "import sqlite3\n", + "import os\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# test" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "table_names_list = [\"Frequency_of_rainy_days_index\",\n", + "\"Winter_precipitation_total\",\n", + "\"Summer_precipitation_total\",\n", + "\"Annual_precipitation_total\",\n", + "\"Remarkable_daily_precipitation_total_(Q99)\",\n", + "\"Frequency_of_remarkable_daily_precipitation\",\n", + "\"Extreme_precipitation_intensity\",\n", + "\"Mean_winter_temperature\",\n", + "\"Mean_summer_temperature\",\n", + "\"Number_of_tropical_nights\",\n", + "\"Maximum_summer_temperature\",\n", + "\"Number_of_days_with_Tx_above_30C\",\n", + "\"Number_of_days_with_Tx_above_35C\",\n", + "\"Drought_index\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from climateqa.engine.llm import get_llm\n", + "\n", + "llm = get_llm(provider=\"openai\")\n", + "user_question = \"Quel sera la température à Marseille dans les prochaines années ?\"\n", + "prompt = f\"You are helping to build a sql query to retrieve relevant data for a user question. The different tables are {table_names_list}. The user question is {user_question}. Write the relevant table to query. Answer only the table name.\"\n", + "table_name = llm.invoke(prompt).content\n", + "# llm.invoke(f\"Make the following sql query display the source table in the rows {sql_query_new_coords}. Just answer the query. The answer should not include ```sql\\n\").content\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "docs = {\"Mean_summer_temperature\": {\n", + " \"description\": (\n", + " \"The Mean summer temperature table contains information on the average summer temperature in the past and the future. \"\n", + " \"The variables are as follows:\\n\"\n", + " \"- 'y' and 'x': Lambert Paris II coordinates for the location.\\n\"\n", + " \"- year: Year of the observation.\\n\"\n", + " \"- month : Month of the observation.\\n\"\n", + " \"- day: Day of the observation.\\n\"\n", + " \"- 'LambertParisII': Indicates that the x and y coordinates are in Lambert Paris II projection.\\n\"\n", + " \"- 'lat' and 'lon': Latitude and longitude of the location.\\n\"\n", + " \"- 'TMm': Average summer temperature.\\n\"\n", + " ),\n", + " \"sql_query\": \"\"\"\n", + " CREATE TABLE Mean_summer_temperature (\n", + " y FLOAT,\n", + " x FLOAT,\n", + " year INT,\n", + " month INT, \n", + " day INT,\n", + " LambertParisII VARCHAR(255),\n", + " lat FLOAT,\n", + " lon FLOAT,\n", + " TMm FLOAT, -- Température moyenne en été\n", + " );\n", + " \"\"\"}\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from climateqa.engine.talk_to_data.utils import loc2coords\n", + "location = \"Marseille\"\n", + "coords = loc2coords(location)\n", + "user_input = user_question.replace(location, f\"lat, long : {coords}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "initial_prompt = f\"You are a mysql expert. \" + \\\n", + " \"Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \"\n", + "initial_prompt += f\"\\n===Tables \\n + {docs[table_name]['sql_query']}\"\n", + "initial_prompt += f\"\\n===Additional Context \\n\\n {docs[table_name]['description']}\"\n", + "initial_prompt += (\n", + " \"===Response Guidelines \\n\"\n", + " \"1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n\"\n", + " \"2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n\"\n", + " \"3. If the provided context is insufficient, please give a sql query based on your knowledge and the context provided. \\n\"\n", + " \"4. Please use the most relevant table(s). \\n\"\n", + " \"5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n\"\n", + " f\"6. Ensure that the output SQL is mysql-compliant and executable, and free of syntax errors. \\n\"\n", + " )\n", + "initial_prompt += f\"\\n===Question \\n\\n {user_input}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'```sql\\nSELECT year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525 AND year > YEAR(CURDATE());\\n```'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_query = llm.invoke(initial_prompt).content\n", + "sql_query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Vanna Ask\n" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\nCREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT\\n);\\n\\nCREATE TABLE Mean_winter_temperature ( y FLOAT, x FLOAT, year INT, \\n month INT, \\n day INT \\n, LambertParisII VARCHAR(255), lat FLOAT, lon FLOAT, TMm FLOAT);\\n\\n\\n===Additional Context \\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the futureis greater than or equal to 35°C.The variables are as follows:- 'y' and 'x': Lambert Paris II coordinates for the location.- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.- 'lat' and 'lon': Latitude and longitude of the location.- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\nis greater than or equal to 35°C.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please give a sql query based on your knowledge and the context provided. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n", + "Using model gpt-4o-mini for 664.25 tokens (approx)\n", + "LLM Response: ```sql\n", + "SELECT year, month, day, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "```\n", + "Extracted SQL: SELECT year, month, day, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "Using model gpt-4o-mini for 204.75 tokens (approx)\n", + "execute sql query : SELECT 'Mean_winter_temperature' AS source_table, year, month, day, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.166954040527344 AND lon = 5.430534839630127;\n", + "Using model gpt-4o-mini for 159.25 tokens (approx)\n" + ] + }, + { + "data": { + "text/plain": [ + "( 0 1 2 3\n", + " 0 2031 1 14 9.952474\n", + " 1 2031 1 14 9.952474\n", + " 2 2032 1 15 10.142323\n", + " 3 2032 1 15 10.142323\n", + " 4 2033 1 14 9.907943\n", + " 5 2033 1 14 9.907943\n", + " 6 2034 1 14 9.548874\n", + " 7 2034 1 14 9.548874\n", + " 8 2035 1 14 10.284758\n", + " 9 2035 1 14 10.284758\n", + " 10 2036 1 15 10.372100\n", + " 11 2036 1 15 10.372100\n", + " 12 2037 1 14 9.985710\n", + " 13 2037 1 14 9.985710\n", + " 14 2038 1 14 10.221372\n", + " 15 2038 1 14 10.221372\n", + " 16 2039 1 14 10.222609\n", + " 17 2039 1 14 10.222609\n", + " 18 2040 1 15 10.473663\n", + " 19 2040 1 15 10.473663\n", + " 20 2041 1 14 10.427641\n", + " 21 2041 1 14 10.427641\n", + " 22 2042 1 14 10.364736\n", + " 23 2042 1 14 10.364736\n", + " 24 2043 1 14 10.112911\n", + " 25 2043 1 14 10.112911\n", + " 26 2044 1 15 10.250792\n", + " 27 2044 1 15 10.250792\n", + " 28 2045 1 14 10.166119\n", + " 29 2045 1 14 10.166119\n", + " 30 2046 1 14 10.728998\n", + " 31 2046 1 14 10.728998\n", + " 32 2047 1 14 10.347249\n", + " 33 2047 1 14 10.347249\n", + " 34 2048 1 15 10.706604\n", + " 35 2048 1 15 10.706604\n", + " 36 2049 1 14 10.592438\n", + " 37 2049 1 14 10.592438\n", + " 38 2050 1 14 10.632255\n", + " 39 2050 1 14 10.632255,\n", + " Figure({\n", + " 'data': [{'name': 'Bar Chart',\n", + " 'type': 'bar',\n", + " 'x': array([2031, 2031, 2032, 2032, 2033, 2033, 2034, 2034, 2035, 2035, 2036, 2036,\n", + " 2037, 2037, 2038, 2038, 2039, 2039, 2040, 2040, 2041, 2041, 2042, 2042,\n", + " 2043, 2043, 2044, 2044, 2045, 2045, 2046, 2046, 2047, 2047, 2048, 2048,\n", + " 2049, 2049, 2050, 2050]),\n", + " 'y': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])},\n", + " {'mode': 'lines+markers',\n", + " 'name': 'Line Chart',\n", + " 'type': 'scatter',\n", + " 'x': array([2031, 2031, 2032, 2032, 2033, 2033, 2034, 2034, 2035, 2035, 2036, 2036,\n", + " 2037, 2037, 2038, 2038, 2039, 2039, 2040, 2040, 2041, 2041, 2042, 2042,\n", + " 2043, 2043, 2044, 2044, 2045, 2045, 2046, 2046, 2047, 2047, 2048, 2048,\n", + " 2049, 2049, 2050, 2050]),\n", + " 'y': array([ 9.95247412, 9.95247412, 10.14232294, 10.14232294, 9.90794294,\n", + " 9.90794294, 9.54887353, 9.54887353, 10.28475824, 10.28475824,\n", + " 10.3721 , 10.3721 , 9.98571 , 9.98571 , 10.22137235,\n", + " 10.22137235, 10.22260941, 10.22260941, 10.47366294, 10.47366294,\n", + " 10.42764059, 10.42764059, 10.36473647, 10.36473647, 10.11291059,\n", + " 10.11291059, 10.25079235, 10.25079235, 10.16611941, 10.16611941,\n", + " 10.72899765, 10.72899765, 10.34724882, 10.34724882, 10.70660412,\n", + " 10.70660412, 10.59243765, 10.59243765, 10.63225529, 10.63225529])}],\n", + " 'layout': {'template': '...'}\n", + " }))" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = ask_vanna(\"Quelle sera la température à Marseille sur les prochaines années ?\")\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123
020311149.952474
120311149.952474
2203211510.142323
3203211510.142323
420331149.907943
520331149.907943
620341149.548874
720341149.548874
8203511410.284758
9203511410.284758
10203611510.372100
11203611510.372100
1220371149.985710
1320371149.985710
14203811410.221372
15203811410.221372
16203911410.222609
17203911410.222609
18204011510.473663
19204011510.473663
20204111410.427641
21204111410.427641
22204211410.364736
23204211410.364736
24204311410.112911
25204311410.112911
26204411510.250792
27204411510.250792
28204511410.166119
29204511410.166119
30204611410.728998
31204611410.728998
32204711410.347249
33204711410.347249
34204811510.706604
35204811510.706604
36204911410.592438
37204911410.592438
38205011410.632255
39205011410.632255
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3\n", + "0 2031 1 14 9.952474\n", + "1 2031 1 14 9.952474\n", + "2 2032 1 15 10.142323\n", + "3 2032 1 15 10.142323\n", + "4 2033 1 14 9.907943\n", + "5 2033 1 14 9.907943\n", + "6 2034 1 14 9.548874\n", + "7 2034 1 14 9.548874\n", + "8 2035 1 14 10.284758\n", + "9 2035 1 14 10.284758\n", + "10 2036 1 15 10.372100\n", + "11 2036 1 15 10.372100\n", + "12 2037 1 14 9.985710\n", + "13 2037 1 14 9.985710\n", + "14 2038 1 14 10.221372\n", + "15 2038 1 14 10.221372\n", + "16 2039 1 14 10.222609\n", + "17 2039 1 14 10.222609\n", + "18 2040 1 15 10.473663\n", + "19 2040 1 15 10.473663\n", + "20 2041 1 14 10.427641\n", + "21 2041 1 14 10.427641\n", + "22 2042 1 14 10.364736\n", + "23 2042 1 14 10.364736\n", + "24 2043 1 14 10.112911\n", + "25 2043 1 14 10.112911\n", + "26 2044 1 15 10.250792\n", + "27 2044 1 15 10.250792\n", + "28 2045 1 14 10.166119\n", + "29 2045 1 14 10.166119\n", + "30 2046 1 14 10.728998\n", + "31 2046 1 14 10.728998\n", + "32 2047 1 14 10.347249\n", + "33 2047 1 14 10.347249\n", + "34 2048 1 15 10.706604\n", + "35 2048 1 15 10.706604\n", + "36 2049 1 14 10.592438\n", + "37 2049 1 14 10.592438\n", + "38 2050 1 14 10.632255\n", + "39 2050 1 14 10.632255" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading embeddings model: BAAI/bge-base-en-v1.5\n" + ] + } + ], + "source": [ + "from climateqa.engine.talk_to_data.myVanna import MyVanna\n", + "from climateqa.engine.talk_to_data.utils import loc2coords, detect_location_with_openai, detectTable, nearestNeighbourSQL\n", + "\n", + "from climateqa.engine.llm import get_llm\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "\n", + "llm = get_llm(provider=\"openai\")\n", + "\n", + "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n", + "PC_API_KEY = os.getenv('VANNA_PINECONE_API_KEY')\n", + "INDEX_NAME = os.getenv('VANNA_INDEX_NAME')\n", + "VANNA_MODEL = os.getenv('VANNA_MODEL')\n", + "\n", + "\n", + "#Vanna object\n", + "vn = MyVanna(config = {\"temperature\": 0, \"api_key\": OPENAI_API_KEY, 'model': VANNA_MODEL, 'pc_api_key': PC_API_KEY, 'index_name': INDEX_NAME, \"top_k\" : 5})\n", + "db_vanna_path = \"database/drias.db\"\n", + "vn.connect_to_sqlite(db_vanna_path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"Quelle sera la température à Marseille sur les prochaines années ?\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Detect location" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Marseille\n" + ] + } + ], + "source": [ + "location = detect_location_with_openai(OPENAI_API_KEY, query)\n", + "print(location)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convert location to longitude, latitude coordonate" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?\n" + ] + } + ], + "source": [ + "coords = loc2coords(location)\n", + "user_input = query.replace(location, f\"lat, long : {coords}\")\n", + "print(user_input)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query DRIAS on this location" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_input" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\nCREATE TABLE Mean_winter_temperature ( y FLOAT, x FLOAT, year INT, \\n month INT, \\n day INT \\n, LambertParisII VARCHAR(255), lat FLOAT, lon FLOAT, TMm FLOAT);\\n\\nCREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT\\n);\\n\\nCREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT\\n);\\n\\nCREATE TABLE Mean_summer_temperature ( y FLOAT, x FLOAT, year INT, \\n month INT, \\n day INT \\n, LambertParisII VARCHAR(255), lat FLOAT, lon FLOAT, TMm FLOAT);\\n\\nCREATE TABLE Maximum_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TXm FLOAT\\n);\\n\\n\\n===Additional Context \\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the futureis greater than or equal to 35°C.The variables are as follows:- 'y' and 'x': Lambert Paris II coordinates for the location.- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.- 'lat' and 'lon': Latitude and longitude of the location.- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\nThe Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\nis greater than or equal to 35°C.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TX35D': Number of days with Tx ≥ 35°C.\\n\\nThe Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the futureis greater than or equal to 30°C.The variables are as follows:- 'y' and 'x': Lambert Paris II coordinates for the location.- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.- 'lat' and 'lon': Latitude and longitude of the location.- 'TX30D': Number of days with Tx ≥ 30°C.\\n\\nThe Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\nis greater than or equal to 30°C.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TX30D': Number of days with Tx ≥ 30°C.\\n\\nThe Mean winter temperature table contains information on the average (mean) winter temperature in the past and the future.\\nThe variables are as follows:\\n- 'y' and 'x': Lambert Paris II coordinates for the location.\\n- year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n- 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n- 'lat' and 'lon': Latitude and longitude of the location.\\n- 'TMm': Average winter temperature.\\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please give a sql query based on your knowledge and the context provided. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n", + "Using model gpt-4o-mini for 1235.0 tokens (approx)\n", + "LLM Response: ```sql\n", + "SELECT year, month, day, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "```\n", + "Extracted SQL: SELECT year, month, day, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "Using model gpt-4o-mini for 204.75 tokens (approx)\n", + "\n", + "SQL QUERY : SELECT year, month, day, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525;\n" + ] + } + ], + "source": [ + "sql_query, result_dataframe, figure = vn.ask(user_input, print_results=False, allow_llm_to_see_data=True)\n", + "# sql_query, result_dataframe, figure = vn.ask(approx_user_input, print_results=False, allow_llm_to_see_data=True)\n", + "print(\"\\nSQL QUERY :\", sql_query)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yxLambertParisIIlatlonTMmyearmonthday
01801000.0852000.00.99987743.1669545.4305359.9524742031114
11801000.0852000.00.99987743.1669545.4305359.9524742031114
21801000.0852000.00.99987743.1669545.43053510.1423232032115
31801000.0852000.00.99987743.1669545.43053510.1423232032115
41801000.0852000.00.99987743.1669545.4305359.9079432033114
\n", + "
" + ], + "text/plain": [ + " y x LambertParisII lat lon TMm year \\\n", + "0 1801000.0 852000.0 0.999877 43.166954 5.430535 9.952474 2031 \n", + "1 1801000.0 852000.0 0.999877 43.166954 5.430535 9.952474 2031 \n", + "2 1801000.0 852000.0 0.999877 43.166954 5.430535 10.142323 2032 \n", + "3 1801000.0 852000.0 0.999877 43.166954 5.430535 10.142323 2032 \n", + "4 1801000.0 852000.0 0.999877 43.166954 5.430535 9.907943 2033 \n", + "\n", + " month day \n", + "0 1 14 \n", + "1 1 14 \n", + "2 1 15 \n", + "3 1 15 \n", + "4 1 14 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# vn.run_sql(\"SELECT * FROM Mean_winter_temperature WHERE lat = 43.166954040527344 AND lon = 5.430534839630127 LIMIT 5;\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Replace coordinate to existing coordinate in the table" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Mean_winter_temperature']\n" + ] + } + ], + "source": [ + "table = detectTable(sql_query)\n", + "print(table)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(43.166954040527344, 5.430534839630127)]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, table[i]) for i in range(len(table))]\n", + "coords_tables" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'SELECT year, month, day, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127;'" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n = sql_query.count(str(coords[0]))\n", + "sql_query_new_coords = sql_query\n", + "\n", + "for i in range(n):\n", + " sql_query_new_coords = sql_query_new_coords.replace(str(coords[0]), str(coords_tables[i][0]),1)\n", + " sql_query_new_coords = sql_query_new_coords.replace(str(coords[1]), str(coords_tables[i][1]),1)\n", + "sql_query_new_coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# sql_with_table_names = llm.invoke(f\"Make the following sql query display the source table in the rows {sql_query_new_coords}. Just answer the query. The answer should not include ```sql\\n\").content\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query back the DB" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# sql_query_new_coords = sql_query.replace(f\"{coords[0]}\", f\"{coords2[0]}\").replace(f\"{coords[1]}\", f\"{coords2[1]}\").replace(\"\\n\", \"\")\n", + "# sql_query_new_coords" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'SELECT year, month, day, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525;'" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_query" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "# res_new = vn.ask('SELECT year, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525\\nUNION ALL\\nSELECT year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.2961743 AND lon = 5.3699525;')\n", + "# res_new" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# sql_query_new_coords = 'SELECT \"Mean_winter_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127\\nUNION ALL\\nSELECT \"Mean_summer_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127;'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'year, month, day, TMm'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_query_new_coords.split(\"SELECT\")[1].split(\"FROM\")[0].strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
latlonyearmonthdayTMm
043.1669545.43053520311149.952474
143.1669545.43053520311149.952474
243.1669545.430535203211510.142323
343.1669545.430535203211510.142323
443.1669545.43053520331149.907943
543.1669545.43053520331149.907943
643.1669545.43053520341149.548874
743.1669545.43053520341149.548874
843.1669545.430535203511410.284758
943.1669545.430535203511410.284758
1043.1669545.430535203611510.372100
1143.1669545.430535203611510.372100
1243.1669545.43053520371149.985710
1343.1669545.43053520371149.985710
1443.1669545.430535203811410.221372
1543.1669545.430535203811410.221372
1643.1669545.430535203911410.222609
1743.1669545.430535203911410.222609
1843.1669545.430535204011510.473663
1943.1669545.430535204011510.473663
2043.1669545.430535204111410.427641
2143.1669545.430535204111410.427641
2243.1669545.430535204211410.364736
2343.1669545.430535204211410.364736
2443.1669545.430535204311410.112911
2543.1669545.430535204311410.112911
2643.1669545.430535204411510.250792
2743.1669545.430535204411510.250792
2843.1669545.430535204511410.166119
2943.1669545.430535204511410.166119
3043.1669545.430535204611410.728998
3143.1669545.430535204611410.728998
3243.1669545.430535204711410.347249
3343.1669545.430535204711410.347249
3443.1669545.430535204811510.706604
3543.1669545.430535204811510.706604
3643.1669545.430535204911410.592438
3743.1669545.430535204911410.592438
3843.1669545.430535205011410.632255
3943.1669545.430535205011410.632255
\n", + "
" + ], + "text/plain": [ + " lat lon year month day TMm\n", + "0 43.166954 5.430535 2031 1 14 9.952474\n", + "1 43.166954 5.430535 2031 1 14 9.952474\n", + "2 43.166954 5.430535 2032 1 15 10.142323\n", + "3 43.166954 5.430535 2032 1 15 10.142323\n", + "4 43.166954 5.430535 2033 1 14 9.907943\n", + "5 43.166954 5.430535 2033 1 14 9.907943\n", + "6 43.166954 5.430535 2034 1 14 9.548874\n", + "7 43.166954 5.430535 2034 1 14 9.548874\n", + "8 43.166954 5.430535 2035 1 14 10.284758\n", + "9 43.166954 5.430535 2035 1 14 10.284758\n", + "10 43.166954 5.430535 2036 1 15 10.372100\n", + "11 43.166954 5.430535 2036 1 15 10.372100\n", + "12 43.166954 5.430535 2037 1 14 9.985710\n", + "13 43.166954 5.430535 2037 1 14 9.985710\n", + "14 43.166954 5.430535 2038 1 14 10.221372\n", + "15 43.166954 5.430535 2038 1 14 10.221372\n", + "16 43.166954 5.430535 2039 1 14 10.222609\n", + "17 43.166954 5.430535 2039 1 14 10.222609\n", + "18 43.166954 5.430535 2040 1 15 10.473663\n", + "19 43.166954 5.430535 2040 1 15 10.473663\n", + "20 43.166954 5.430535 2041 1 14 10.427641\n", + "21 43.166954 5.430535 2041 1 14 10.427641\n", + "22 43.166954 5.430535 2042 1 14 10.364736\n", + "23 43.166954 5.430535 2042 1 14 10.364736\n", + "24 43.166954 5.430535 2043 1 14 10.112911\n", + "25 43.166954 5.430535 2043 1 14 10.112911\n", + "26 43.166954 5.430535 2044 1 15 10.250792\n", + "27 43.166954 5.430535 2044 1 15 10.250792\n", + "28 43.166954 5.430535 2045 1 14 10.166119\n", + "29 43.166954 5.430535 2045 1 14 10.166119\n", + "30 43.166954 5.430535 2046 1 14 10.728998\n", + "31 43.166954 5.430535 2046 1 14 10.728998\n", + "32 43.166954 5.430535 2047 1 14 10.347249\n", + "33 43.166954 5.430535 2047 1 14 10.347249\n", + "34 43.166954 5.430535 2048 1 15 10.706604\n", + "35 43.166954 5.430535 2048 1 15 10.706604\n", + "36 43.166954 5.430535 2049 1 14 10.592438\n", + "37 43.166954 5.430535 2049 1 14 10.592438\n", + "38 43.166954 5.430535 2050 1 14 10.632255\n", + "39 43.166954 5.430535 2050 1 14 10.632255" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db = sqlite3.connect(db_vanna_path)\n", + "# result = db.cursor().execute(sql_with_table_names).fetchall()\n", + "result = db.cursor().execute(sql_query_new_coords.replace(\"SELECT\", \"SELECT lat, lon,\")).fetchall()\n", + "column_names = [\"lat\",\"lon\"] + sql_query_new_coords.split(\"SELECT\")[1].split(\"FROM\")[0].strip().split(\",\")\n", + "df = pd.DataFrame(result, columns=column_names)\n", + "# df = pd.DataFrame(result, columns=list(result_dataframe.columns))\n", + "\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatalatlonyearTMm
0Mean_winter_temperature43.1669545.43053520319.952474
1Mean_winter_temperature43.1669545.43053520319.952474
2Mean_winter_temperature43.1669545.430535203210.142323
3Mean_winter_temperature43.1669545.430535203210.142323
4Mean_winter_temperature43.1669545.43053520339.907943
..................
75Mean_summer_temperature43.1669545.430535204825.187577
76Mean_summer_temperature43.1669545.430535204924.829654
77Mean_summer_temperature43.1669545.430535204924.829654
78Mean_summer_temperature43.1669545.430535205025.053394
79Mean_summer_temperature43.1669545.430535205025.053394
\n", + "

80 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Data lat lon year TMm\n", + "0 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n", + "1 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n", + "2 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n", + "3 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n", + "4 Mean_winter_temperature 43.166954 5.430535 2033 9.907943\n", + ".. ... ... ... ... ...\n", + "75 Mean_summer_temperature 43.166954 5.430535 2048 25.187577\n", + "76 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n", + "77 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n", + "78 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n", + "79 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n", + "\n", + "[80 rows x 5 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_query_new_coords = 'SELECT \"Mean_winter_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_winter_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127\\nUNION ALL\\nSELECT \"Mean_summer_temperature\" AS table_name, lat, lon, year, TMm \\nFROM Mean_summer_temperature \\nWHERE lat = 43.166954040527344 AND lon = 5.430534839630127;'\n", + "\n", + "db = sqlite3.connect(db_vanna_path)\n", + "result = db.cursor().execute(sql_query_new_coords).fetchall()\n", + "# result = db.cursor().execute(sql_query_new_coords.replace(\"SELECT\", \"SELECT lat, lon,\")).fetchall()\n", + "df = pd.DataFrame(result, columns = [\"Data\",\"lat\",\"lon\",\"year\",\"TMm\"])\n", + "\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using model gpt-4o-mini for 258.0 tokens (approx)\n" + ] + } + ], + "source": [ + "plotly_code = vn.generate_plotly_code(\n", + " question=query,\n", + " sql=sql_query_new_coords,\n", + " df_metadata=f\"Running df.dtypes gives:\\n {df.dtypes}\",\n", + " )\n", + "\n", + "fig = vn.get_plotly_figure(plotly_code=plotly_code, df=df)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatalatlonyearTMm
0Mean_winter_temperature43.1669545.43053520319.952474
1Mean_winter_temperature43.1669545.43053520319.952474
2Mean_winter_temperature43.1669545.430535203210.142323
3Mean_winter_temperature43.1669545.430535203210.142323
4Mean_winter_temperature43.1669545.43053520339.907943
..................
75Mean_summer_temperature43.1669545.430535204825.187577
76Mean_summer_temperature43.1669545.430535204924.829654
77Mean_summer_temperature43.1669545.430535204924.829654
78Mean_summer_temperature43.1669545.430535205025.053394
79Mean_summer_temperature43.1669545.430535205025.053394
\n", + "

80 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Data lat lon year TMm\n", + "0 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n", + "1 Mean_winter_temperature 43.166954 5.430535 2031 9.952474\n", + "2 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n", + "3 Mean_winter_temperature 43.166954 5.430535 2032 10.142323\n", + "4 Mean_winter_temperature 43.166954 5.430535 2033 9.907943\n", + ".. ... ... ... ... ...\n", + "75 Mean_summer_temperature 43.166954 5.430535 2048 25.187577\n", + "76 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n", + "77 Mean_summer_temperature 43.166954 5.430535 2049 24.829654\n", + "78 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n", + "79 Mean_summer_temperature 43.166954 5.430535 2050 25.053394\n", + "\n", + "[80 rows x 5 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "Data=Mean_winter_temperature
year=%{x}
TMm=%{y}", + "legendgroup": "Mean_winter_temperature", + "line": { + "color": "#636efa", + "dash": "solid" + }, + "marker": { + "symbol": "circle" + }, + "mode": "lines", + "name": "Mean_winter_temperature", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 2031, + 2031, + 2032, + 2032, + 2033, + 2033, + 2034, + 2034, + 2035, + 2035, + 2036, + 2036, + 2037, + 2037, + 2038, + 2038, + 2039, + 2039, + 2040, + 2040, + 2041, + 2041, + 2042, + 2042, + 2043, + 2043, + 2044, + 2044, + 2045, + 2045, + 2046, + 2046, + 2047, + 2047, + 2048, + 2048, + 2049, + 2049, + 2050, + 2050 + ], + "xaxis": "x", + "y": [ + 9.952474117647114, + 9.952474117647114, + 10.142322941176474, + 10.142322941176474, + 9.907942941176486, + 9.907942941176486, + 9.548873529411765, + 9.548873529411765, + 10.284758235294191, + 10.284758235294191, + 10.372100000000046, + 10.372100000000046, + 9.98571000000004, + 9.98571000000004, + 10.221372352941216, + 10.221372352941216, + 10.222609411764722, + 10.222609411764722, + 10.473662941176485, + 10.473662941176485, + 10.427640588235306, + 10.427640588235306, + 10.364736470588241, + 10.364736470588241, + 10.112910588235309, + 10.112910588235309, + 10.250792352941176, + 10.250792352941176, + 10.166119411764669, + 10.166119411764669, + 10.728997647058861, + 10.728997647058861, + 10.347248823529412, + 10.347248823529412, + 10.706604117647089, + 10.706604117647089, + 10.59243764705883, + 10.59243764705883, + 10.63225529411767, + 10.63225529411767 + ], + "yaxis": "y" + }, + { + "hovertemplate": "Data=Mean_summer_temperature
year=%{x}
TMm=%{y}", + "legendgroup": "Mean_summer_temperature", + "line": { + "color": "#EF553B", + "dash": "solid" + }, + "marker": { + "symbol": "circle" + }, + "mode": "lines", + "name": "Mean_summer_temperature", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 2031, + 2031, + 2032, + 2032, + 2033, + 2033, + 2034, + 2034, + 2035, + 2035, + 2036, + 2036, + 2037, + 2037, + 2038, + 2038, + 2039, + 2039, + 2040, + 2040, + 2041, + 2041, + 2042, + 2042, + 2043, + 2043, + 2044, + 2044, + 2045, + 2045, + 2046, + 2046, + 2047, + 2047, + 2048, + 2048, + 2049, + 2049, + 2050, + 2050 + ], + "xaxis": "x", + "y": [ + 24.061035294117687, + 24.061035294117687, + 24.530692941176483, + 24.530692941176483, + 24.722234705882386, + 24.722234705882386, + 23.84629176470588, + 23.84629176470588, + 24.231422352941195, + 24.231422352941195, + 24.488941764705885, + 24.488941764705885, + 24.79424117647062, + 24.79424117647062, + 24.730553529411793, + 24.730553529411793, + 24.44979882352942, + 24.44979882352942, + 24.40726882352942, + 24.40726882352942, + 24.768547647058824, + 24.768547647058824, + 24.53479647058822, + 24.53479647058822, + 24.769181176470624, + 24.769181176470624, + 24.489877058823538, + 24.489877058823538, + 24.448076470588262, + 24.448076470588262, + 25.111282352941203, + 25.111282352941203, + 24.72313823529413, + 24.72313823529413, + 25.187577058823535, + 25.187577058823535, + 24.829653529411814, + 24.829653529411814, + 25.053394117647144, + 25.053394117647144 + ], + "yaxis": "y" + } + ], + "layout": { + "legend": { + "title": { + "text": "Data" + }, + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#f2f5fa" + }, + "error_y": { + "color": "#f2f5fa" + }, + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "baxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#506784" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "header": { + "fill": { + "color": "#2a3f5f" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#f2f5fa", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#f2f5fa" + }, + "geo": { + "bgcolor": "rgb(17,17,17)", + "lakecolor": "rgb(17,17,17)", + "landcolor": "rgb(17,17,17)", + "showlakes": true, + "showland": true, + "subunitcolor": "#506784" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "dark" + }, + "paper_bgcolor": "rgb(17,17,17)", + "plot_bgcolor": "rgb(17,17,17)", + "polar": { + "angularaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "radialaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "yaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "zaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + } + }, + "shapedefaults": { + "line": { + "color": "#f2f5fa" + } + }, + "sliderdefaults": { + "bgcolor": "#C8D4E3", + "bordercolor": "rgb(17,17,17)", + "borderwidth": 1, + "tickwidth": 0 + }, + "ternary": { + "aaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "baxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "caxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "updatemenudefaults": { + "bgcolor": "#506784", + "borderwidth": 0 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Mean Temperature in Marseille Over the Years" + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "year" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "TMm" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Whole Vanna workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_coordonates(coords, sql_query, coords_tables):\n", + " n = sql_query.count(str(coords[0]))\n", + " sql_query_new_coords = sql_query\n", + "\n", + " for i in range(n):\n", + " sql_query_new_coords = sql_query_new_coords.replace(str(coords[0]), str(coords_tables[i][0]),1)\n", + " sql_query_new_coords = sql_query_new_coords.replace(str(coords[1]), str(coords_tables[i][1]),1)\n", + " return sql_query_new_coords" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def ask_vanna(query):\n", + " location = detect_location_with_openai(OPENAI_API_KEY, query)\n", + " coords = loc2coords(location)\n", + " user_input = query.replace(location, f\"lat, long : {coords}\")\n", + " sql_query, result_dataframe, figure = vn.ask(user_input, print_results=False, allow_llm_to_see_data=True)\n", + " table = detectTable(sql_query)\n", + " coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, table[i]) for i in range(len(table))]\n", + " sql_query_new_coords = replace_coordonates(coords, sql_query, coords_tables)\n", + " sql_with_table_names = llm.invoke(f\"Make the following sql query display the source table in the rows {sql_query_new_coords}. Just answer the query. The answer should not include ```sql\\n\").content\n", + " db = sqlite3.connect(db_vanna_path)\n", + " result = db.cursor().execute(sql_with_table_names).fetchall()\n", + " df = pd.DataFrame(result, columns=[\"data_name\"] + list(result_dataframe.columns))\n", + " \n", + " plotly_code = vn.generate_plotly_code(\n", + " question=\"query\",\n", + " sql=\"sql_with_table_names\",\n", + " df_metadata=f\"Running df.dtypes gives:\\n {df.dtypes}\",\n", + " )\n", + "\n", + " fig = vn.get_plotly_figure(plotly_code=plotly_code, df=df)\n", + "\n", + " return df, fig\n", + "\n", + "query = \"Quelle sera la température à Marseille sur les prochaines années ?\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\n\\n CREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en hiver\\n );\\n \\n\\n\\n CREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en été\\n );\\n \\n\\n\\n===Additional Context \\n\\n\\n The Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 35°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX35D': Number of days with Tx ≥ 35°C.\\n \\n\\n\\n The Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 30°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX30D': Number of days with Tx ≥ 30°C.\\n \\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please explain why it can't be generated. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n", + "Using model gpt-4o-mini for 828.0 tokens (approx)\n", + "LLM Response: intermediate_sql\n", + "```sql\n", + "SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\n", + "UNION\n", + "SELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "```\n", + "Extracted SQL: SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\n", + "UNION\n", + "SELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "Running Intermediate SQL: SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\n", + "UNION\n", + "SELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "Final SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\n\\n CREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en hiver\\n );\\n \\n\\n\\n CREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en été\\n );\\n \\n\\n\\n===Additional Context \\n\\n\\n The Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 35°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX35D': Number of days with Tx ≥ 35°C.\\n \\n\\n\\n The Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 30°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX30D': Number of days with Tx ≥ 30°C.\\n \\n\\nThe following is a pandas DataFrame with the results of the intermediate SQL query SELECT DISTINCT year FROM Mean_winter_temperature WHERE lat = 43.2961743 AND lon = 5.3699525\\nUNION\\nSELECT DISTINCT year FROM Mean_summer_temperature WHERE lat = 43.2961743 AND lon = 5.3699525;: \\n| year |\\n|--------|\\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please explain why it can't be generated. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n", + "Using model gpt-4o-mini for 903.25 tokens (approx)\n", + "LLM Response: La context fourni ne contient pas d'informations sur les prévisions de température pour les prochaines années. Par conséquent, je ne peux pas générer une requête SQL pour répondre à cette question.\n", + "Couldn't run sql: Execution failed on sql 'La context fourni ne contient pas d'informations sur les prévisions de température pour les prochaines années. Par conséquent, je ne peux pas générer une requête SQL pour répondre à cette question.': near \"La\": syntax error\n", + "execute sql query : To display the source table in the rows, you can use a simple SELECT statement. For example:\n", + "\n", + "SELECT * FROM your_table_name; \n", + "\n", + "Replace \"your_table_name\" with the actual name of your source table. This query will retrieve all columns and rows from the specified table.\n" + ] + }, + { + "ename": "OperationalError", + "evalue": "near \"To\": syntax error", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOperationalError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df, fig \u001b[38;5;241m=\u001b[39m \u001b[43mask_vanna\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(df)\n\u001b[1;32m 3\u001b[0m fig\n", + "File \u001b[0;32m~/ai4s/climate_qa/climate-question-answering/climateqa/engine/talk_to_data/main.py:71\u001b[0m, in \u001b[0;36mask_vanna\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexecute sql query : \u001b[39m\u001b[38;5;124m\"\u001b[39m, sql_with_table_names)\n\u001b[1;32m 70\u001b[0m db \u001b[38;5;241m=\u001b[39m sqlite3\u001b[38;5;241m.\u001b[39mconnect(db_vanna_path)\n\u001b[0;32m---> 71\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcursor\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_with_table_names\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mfetchall()\n\u001b[1;32m 72\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(result, columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_name\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(result_dataframe\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[1;32m 74\u001b[0m plotly_code \u001b[38;5;241m=\u001b[39m vn\u001b[38;5;241m.\u001b[39mgenerate_plotly_code(\n\u001b[1;32m 75\u001b[0m question\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 76\u001b[0m sql\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msql_with_table_names\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 77\u001b[0m df_metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRunning df.dtypes gives:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdf\u001b[38;5;241m.\u001b[39mdtypes\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 78\u001b[0m )\n", + "\u001b[0;31mOperationalError\u001b[0m: near \"To\": syntax error" + ] + } + ], + "source": [ + "df, fig = ask_vanna(query)\n", + "print(df)\n", + "fig" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SQL Prompt: [{'role': 'system', 'content': \"You are a SQLite expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \\n===Tables \\n\\n CREATE TABLE Mean_winter_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en hiver\\n );\\n \\n\\n\\n CREATE TABLE Mean_summer_temperature (\\n y FLOAT,\\n x FLOAT,\\n year INT, \\n month INT, \\n day INT \\n,\\n LambertParisII VARCHAR(255),\\n lat FLOAT,\\n lon FLOAT,\\n TMm FLOAT, -- Température moyenne en été\\n );\\n \\n\\n\\n===Additional Context \\n\\n\\n The Number of days with Tx above 35C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 35°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX35D': Number of days with Tx ≥ 35°C.\\n \\n\\n\\n The Number of days with Tx above 30C table contains information on the number of days when the maximum temperature in the past and the future\\n is greater than or equal to 30°C.\\n The variables are as follows:\\n - 'y' and 'x': Lambert Paris II coordinates for the location.\\n - year: Year of the observation.\\n\\n - month : Month of the observation.\\n\\n - day: Day of the observation.\\n\\n - 'LambertParisII': Indicates that the x, y coordinates are in the Lambert Paris II projection.\\n - 'lat' and 'lon': Latitude and longitude of the location.\\n - 'TX30D': Number of days with Tx ≥ 30°C.\\n \\n\\n===Response Guidelines \\n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \\n2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \\n3. If the provided context is insufficient, please explain why it can't be generated. \\n4. Please use the most relevant table(s). \\n5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \\n6. Ensure that the output SQL is SQLite-compliant and executable, and free of syntax errors. \\n\"}, {'role': 'user', 'content': 'Quelle sera la température à lat, long : (43.2961743, 5.3699525) sur les prochaines années ?'}]\n", + "Using model gpt-4o-mini for 828.0 tokens (approx)\n", + "LLM Response: ```sql\n", + "SELECT year, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525\n", + "UNION ALL\n", + "SELECT year, TMm \n", + "FROM Mean_summer_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "```\n", + "Extracted SQL: SELECT year, TMm \n", + "FROM Mean_winter_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525\n", + "UNION ALL\n", + "SELECT year, TMm \n", + "FROM Mean_summer_temperature \n", + "WHERE lat = 43.2961743 AND lon = 5.3699525;\n", + "Using model gpt-4o-mini for 218.5 tokens (approx)\n", + "[(2031, 9.952474117647114), (2031, 9.952474117647114), (2032, 10.142322941176474), (2032, 10.142322941176474), (2033, 9.907942941176486), (2033, 9.907942941176486), (2034, 9.548873529411765), (2034, 9.548873529411765), (2035, 10.284758235294191), (2035, 10.284758235294191), (2036, 10.372100000000046), (2036, 10.372100000000046), (2037, 9.98571000000004), (2037, 9.98571000000004), (2038, 10.221372352941216), (2038, 10.221372352941216), (2039, 10.222609411764722), (2039, 10.222609411764722), (2040, 10.473662941176485), (2040, 10.473662941176485), (2041, 10.427640588235306), (2041, 10.427640588235306), (2042, 10.364736470588241), (2042, 10.364736470588241), (2043, 10.112910588235309), (2043, 10.112910588235309), (2044, 10.250792352941176), (2044, 10.250792352941176), (2045, 10.166119411764669), (2045, 10.166119411764669), (2046, 10.728997647058861), (2046, 10.728997647058861), (2047, 10.347248823529412), (2047, 10.347248823529412), (2048, 10.706604117647089), (2048, 10.706604117647089), (2049, 10.59243764705883), (2049, 10.59243764705883), (2050, 10.63225529411767), (2050, 10.63225529411767), (2031, 24.061035294117687), (2031, 24.061035294117687), (2032, 24.530692941176483), (2032, 24.530692941176483), (2033, 24.722234705882386), (2033, 24.722234705882386), (2034, 23.84629176470588), (2034, 23.84629176470588), (2035, 24.231422352941195), (2035, 24.231422352941195), (2036, 24.488941764705885), (2036, 24.488941764705885), (2037, 24.79424117647062), (2037, 24.79424117647062), (2038, 24.730553529411793), (2038, 24.730553529411793), (2039, 24.44979882352942), (2039, 24.44979882352942), (2040, 24.40726882352942), (2040, 24.40726882352942), (2041, 24.768547647058824), (2041, 24.768547647058824), (2042, 24.53479647058822), (2042, 24.53479647058822), (2043, 24.769181176470624), (2043, 24.769181176470624), (2044, 24.489877058823538), (2044, 24.489877058823538), (2045, 24.448076470588262), (2045, 24.448076470588262), (2046, 25.111282352941203), (2046, 25.111282352941203), (2047, 24.72313823529413), (2047, 24.72313823529413), (2048, 25.187577058823535), (2048, 25.187577058823535), (2049, 24.829653529411814), (2049, 24.829653529411814), (2050, 25.053394117647144), (2050, 25.053394117647144)]\n" + ] + } + ], + "source": [ + "location = detect_location_with_openai(OPENAI_API_KEY, query)\n", + "if location:\n", + " coords = loc2coords(location)\n", + " user_input = query.replace(location, f\"lat, long : {coords}\")\n", + " \n", + " answer = vn.ask(user_input, print_results=False, allow_llm_to_see_data=True)\n", + " table = detectTable(answer[0])\n", + " \n", + " coords2 = nearestNeighbourSQL(db_vanna_path, coords, table[0])\n", + "\n", + " query = answer[0].replace(f\"{coords[0]}\", f\"{coords2[0]}\")\n", + " sql_query = query.replace(f\"{coords[1]}\", f\"{coords2[1]}\")\n", + "\n", + " db = sqlite3.connect(db_vanna_path)\n", + " result = db.cursor().execute(sql_query).fetchall()\n", + " print(result)\n", + " df = pd.DataFrame(result, columns=answer[1].columns)\n", + " \n", + "else: \n", + " answer = vn.ask(query, visualize=True, print_results=False, allow_llm_to_see_data=True)\n", + " sql_query = answer[0]\n", + " df = answer[1]\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "climateqa", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}