|
import re |
|
import openai |
|
import pandas as pd |
|
from geopy.geocoders import Nominatim |
|
import sqlite3 |
|
import ast |
|
|
|
|
|
def detect_location_with_openai(api_key, sentence): |
|
""" |
|
Detects locations in a sentence using OpenAI's API. |
|
""" |
|
openai.api_key = api_key |
|
|
|
prompt = f""" |
|
Extract all locations (cities, countries, states, or geographical areas) mentioned in the following sentence. |
|
Return the result as a Python list. If no locations are mentioned, return an empty list. |
|
|
|
Sentence: "{sentence}" |
|
""" |
|
|
|
response = openai.chat.completions.create( |
|
model="gpt-4o-mini", |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant skilled in identifying locations in text."}, |
|
{"role": "user", "content": prompt} |
|
], |
|
max_tokens=100, |
|
temperature=0 |
|
) |
|
|
|
return response.choices[0].message.content.split("\n")[1][2:-2] |
|
|
|
|
|
def detectTable(sql_query): |
|
pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)' |
|
matches = re.findall(pattern, sql_query) |
|
return matches |
|
|
|
|
|
|
|
def loc2coords(location : str): |
|
geolocator = Nominatim(user_agent="city_to_latlong") |
|
location = geolocator.geocode(location) |
|
return (location.latitude, location.longitude) |
|
|
|
|
|
def coords2loc(coords : tuple): |
|
geolocator = Nominatim(user_agent="coords_to_city") |
|
try: |
|
location = geolocator.reverse(coords) |
|
return location.address |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return "Unknown Location" |
|
|
|
|
|
def nearestNeighbourSQL(db: str, location: tuple, table : str): |
|
conn = sqlite3.connect(db) |
|
long = round(location[1], 3) |
|
lat = round(location[0], 3) |
|
cursor = conn.cursor() |
|
cursor.execute(f"SELECT lat, lon FROM {table} WHERE lat BETWEEN {lat - 0.3} AND {lat + 0.3} AND lon BETWEEN {long - 0.3} AND {long + 0.3}") |
|
results = cursor.fetchall() |
|
return results[0] |
|
|
|
def detect_relevant_tables(user_question, llm): |
|
table_names_list = [ |
|
"Frequency_of_rainy_days_index", |
|
"Winter_precipitation_total", |
|
"Summer_precipitation_total", |
|
"Annual_precipitation_total", |
|
|
|
"Frequency_of_remarkable_daily_precipitation", |
|
"Extreme_precipitation_intensity", |
|
"Mean_winter_temperature", |
|
"Mean_summer_temperature", |
|
"Number_of_tropical_nights", |
|
"Maximum_summer_temperature", |
|
"Number_of_days_with_Tx_above_30C", |
|
"Number_of_days_with_Tx_above_35C", |
|
"Drought_index" |
|
] |
|
prompt = ( |
|
f"You are helping to build a sql query to retrieve relevant data for a user question." |
|
f"The different tables are {table_names_list}." |
|
f"The user question is {user_question}. Write the relevant tables to use. Answer only a python list of table name." |
|
) |
|
table_names = ast.literal_eval(llm.invoke(prompt).content.strip("```python\n").strip()) |
|
return table_names |
|
|
|
def replace_coordonates(coords, query, coords_tables): |
|
n = query.count(str(coords[0])) |
|
|
|
for i in range(n): |
|
query = query.replace(str(coords[0]), str(coords_tables[i][0]),1) |
|
query = query.replace(str(coords[1]), str(coords_tables[i][1]),1) |
|
return query |