import re import openai import pandas as pd from geopy.geocoders import Nominatim import sqlite3 import ast def detect_location_with_openai(api_key, sentence): """ Detects locations in a sentence using OpenAI's API. """ openai.api_key = api_key prompt = f""" Extract all locations (cities, countries, states, or geographical areas) mentioned in the following sentence. Return the result as a Python list. If no locations are mentioned, return an empty list. Sentence: "{sentence}" """ response = openai.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": "You are a helpful assistant skilled in identifying locations in text."}, {"role": "user", "content": prompt} ], max_tokens=100, temperature=0 ) return response.choices[0].message.content.split("\n")[1][2:-2] def detectTable(sql_query): pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)' matches = re.findall(pattern, sql_query) return matches def loc2coords(location : str): geolocator = Nominatim(user_agent="city_to_latlong") location = geolocator.geocode(location) return (location.latitude, location.longitude) def coords2loc(coords : tuple): geolocator = Nominatim(user_agent="coords_to_city") try: location = geolocator.reverse(coords) return location.address except Exception as e: print(f"Error: {e}") return "Unknown Location" def nearestNeighbourSQL(db: str, location: tuple, table : str): conn = sqlite3.connect(db) long = round(location[1], 3) lat = round(location[0], 3) cursor = conn.cursor() cursor.execute(f"SELECT lat, lon FROM {table} WHERE lat BETWEEN {lat - 0.3} AND {lat + 0.3} AND lon BETWEEN {long - 0.3} AND {long + 0.3}") results = cursor.fetchall() return results[0] def detect_relevant_tables(user_question, llm): table_names_list = [ "Frequency_of_rainy_days_index", "Winter_precipitation_total", "Summer_precipitation_total", "Annual_precipitation_total", # "Remarkable_daily_precipitation_total_(Q99)", "Frequency_of_remarkable_daily_precipitation", "Extreme_precipitation_intensity", "Mean_winter_temperature", "Mean_summer_temperature", "Number_of_tropical_nights", "Maximum_summer_temperature", "Number_of_days_with_Tx_above_30C", "Number_of_days_with_Tx_above_35C", "Drought_index" ] prompt = ( f"You are helping to build a sql query to retrieve relevant data for a user question." f"The different tables are {table_names_list}." f"The user question is {user_question}. Write the relevant tables to use. Answer only a python list of table name." ) table_names = ast.literal_eval(llm.invoke(prompt).content.strip("```python\n").strip()) return table_names def replace_coordonates(coords, query, coords_tables): n = query.count(str(coords[0])) for i in range(n): query = query.replace(str(coords[0]), str(coords_tables[i][0]),1) query = query.replace(str(coords[1]), str(coords_tables[i][1]),1) return query