|
def create_data(description): |
|
print("Running LLM Automation Groq") |
|
from langchain_core.prompts import ChatPromptTemplate |
|
|
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_groq import ChatGroq |
|
|
|
|
|
import os |
|
from dotenv import load_dotenv |
|
import pandas as pd |
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
api_key = os.getenv("GROQ_API") |
|
os.environ["GROQ_API_KEY"]= api_key |
|
|
|
|
|
|
|
prompt=ChatPromptTemplate.from_messages( |
|
{ |
|
("system", "You are a helpful assistant, please respond to the queries"), |
|
("user","question: {question}") |
|
} |
|
) |
|
|
|
|
|
llm = ChatGroq( |
|
model="llama3-70b-8192" |
|
) |
|
|
|
|
|
|
|
output_parser=StrOutputParser() |
|
|
|
|
|
|
|
chain=prompt|llm|output_parser |
|
|
|
df = description |
|
df = df.fillna(0) |
|
dj=[] |
|
for i in range(len(df)): |
|
dj.append(chain.invoke({"question" : df['Description'][i]+" Is the news about road accident? If no, then reply 'General'. Else if the news is about road accident then check if the news is referring to a specific accident incident or accident in general? Answer only in a word: Either specific or general."})) |
|
|
|
df2=df.copy() |
|
df2['Report Type']=dj |
|
def drp(p): |
|
df2.drop([p],inplace=True) |
|
|
|
|
|
for p in range(len(df)): |
|
if "General" in df2['Report Type'][p]: |
|
drp(p) |
|
|
|
|
|
df2.reset_index(drop=True,inplace=True) |
|
|
|
|
|
|
|
def res(i): |
|
response=chain.invoke({"question" : f"""I will give you two strings. 1st string will contain a publish date of a news and the 2nd string will contain the accident news itself. |
|
If the 2nd string contains more than one accident incidents, only consider the 1st incident. Based on these two strings, you have to answer the following questions. Remember your answer must contain ONLY THE ANSWERS WITHOUT ANY EXTRA WORDS OR SENTENCES: |
|
what is the date (Day-Month-Year numerical format) of accident occurrence? ; |
|
Time of Accident occured; How many people were killed in the accident?; |
|
How many people were injured in the accident?; |
|
Location of the accident; |
|
Type of road where accident occured; |
|
Was there any pedestrian involved?; |
|
Do not include any extra words or sentences except the answers seperated by semicolons only. Your reply cannot contain sentences such as - 'Here are the answers to the questions' |
|
string 1 = {df2['Publish Date'][i]} |
|
string 2 = {df2['Description'][i]}""" }) |
|
return response |
|
|
|
dj2=[] |
|
|
|
for i in range(len(df2)): |
|
dj2.append(res(i)) |
|
|
|
|
|
def res2(i): |
|
response=chain.invoke({"question" : df2['Description'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"}) |
|
return response |
|
|
|
vehicles=[] |
|
|
|
for i in range(len(df2)): |
|
vehicles.append(res2(i)) |
|
|
|
|
|
|
|
Date=[] |
|
Time=[] |
|
Killed=[] |
|
Injured=[] |
|
Location=[] |
|
Road_Characteristic=[] |
|
Pedestrian_Involved=[] |
|
|
|
|
|
for i in range(len(dj2)): |
|
words = dj2[i].split(";") |
|
|
|
Date.append(words[0]) |
|
|
|
|
|
Time.append(words[1]) |
|
|
|
|
|
Killed.append(words[2]) |
|
Injured.append(words[3]) |
|
Location.append(words[4]) |
|
Road_Characteristic.append(words[5]) |
|
Pedestrian_Involved.append(words[6]) |
|
|
|
|
|
|
|
df2["Accident Date"]=Date |
|
df2["Time"]=Time |
|
df2["Killed"]=Killed |
|
df2["Injured"]=Injured |
|
df2["Location"]=Location |
|
df2["Road_Characteristic"]=Road_Characteristic |
|
df2["Pedestrian_Involved"]=Pedestrian_Involved |
|
df2["Vehicles_involved"]=vehicles |
|
df3=df2.drop(columns=['Description','Report Type']) |
|
return df3 |
|
|
|
|
|
|
|
|