|
def create_data(description): |
|
from langchain_core.prompts import ChatPromptTemplate |
|
from langchain_openai import ChatOpenAI |
|
from langchain_core.output_parsers import StrOutputParser |
|
|
|
|
|
|
|
import os |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
os.environ["OPENAI_API_KEY"]=os.getenv('OPENAI_API') |
|
|
|
|
|
|
|
prompt=ChatPromptTemplate.from_messages( |
|
{ |
|
("system", "You are a helpful assistant, please respond to the queries"), |
|
("user","question: {question}") |
|
} |
|
) |
|
df2=description |
|
|
|
llm=ChatOpenAI(model="gpt-3.5-turbo") |
|
|
|
|
|
output_parser=StrOutputParser() |
|
|
|
|
|
|
|
chain=prompt|llm|output_parser |
|
|
|
|
|
def res(i): |
|
response=chain.invoke({"question" : df2['Description'][i]+" Is the news referring to a specific accident incident or accident in general? Answer only in a word: 'specific' or 'general'. No other words are allowed in your answer"}) |
|
return response |
|
|
|
|
|
dj=[] |
|
|
|
for i in range(len(df2)): |
|
dj.append(res(i)) |
|
|
|
df2['Report Type']=dj |
|
|
|
def drp(p): |
|
df2.drop([p],inplace=True) |
|
|
|
for p in range(len(df2)): |
|
if "General" in df2['Report Type'][p] or "general" in df2['Report Type'][p]: |
|
drp(p) |
|
|
|
|
|
df2.reset_index(drop=True,inplace=True) |
|
|
|
|
|
|
|
Date=[] |
|
Time=[] |
|
Killed=[] |
|
Injured=[] |
|
Location=[] |
|
Road_Characteristic=[] |
|
Pedestrian_Involved=[] |
|
vehicles=[] |
|
|
|
|
|
for i in range(len(df2)): |
|
Date.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the date of accident occurrence in Day-Month-Year format. Keep in mind that news publish date and accident occurrence date may be different. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]})) |
|
Time.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the time of accident occurrence in 24-hour format. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]})) |
|
Killed.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were killed in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]})) |
|
Injured.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were injured in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]})) |
|
Location.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the name of the location where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]})) |
|
Road_Characteristic.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the type of road where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]})) |
|
Pedestrian_Involved.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: Was there any pedestrian involved in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]})) |
|
vehicles.append(chain.invoke({"question" : "Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences" + df2['Description'][i]})) |
|
|
|
|
|
df2["Date"]=Date |
|
df2["Time"]=Time |
|
df2["Killed"]=Killed |
|
df2["Injured"]=Injured |
|
df2["Location"]=Location |
|
df2["Road_Characteristic"]=Road_Characteristic |
|
df2["Pedestrian_Involved"]=Pedestrian_Involved |
|
df2["Vehicles Involved"]=vehicles |
|
df3=df2.drop(columns=['Description','Report Type','Date + Desc']) |
|
return df3 |