Update LLM_automation_GPT35.py
Browse files- LLM_automation_GPT35.py +90 -90
LLM_automation_GPT35.py
CHANGED
@@ -1,91 +1,91 @@
|
|
1 |
-
def create_data(description):
|
2 |
-
from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
|
3 |
-
from langchain_openai import ChatOpenAI ##### For using chat openai features
|
4 |
-
from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
import os
|
9 |
-
from dotenv import load_dotenv
|
10 |
-
|
11 |
-
|
12 |
-
load_dotenv()
|
13 |
-
|
14 |
-
### Set all api keys:
|
15 |
-
os.environ["OPENAI_API_KEY"]="sk-proj-
|
16 |
-
|
17 |
-
|
18 |
-
### Create Prompt Template:
|
19 |
-
prompt=ChatPromptTemplate.from_messages(
|
20 |
-
{
|
21 |
-
("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
|
22 |
-
("user","question: {question}")
|
23 |
-
}
|
24 |
-
)
|
25 |
-
df2=description
|
26 |
-
#### Create OpenAI llm:
|
27 |
-
llm=ChatOpenAI(model="gpt-3.5-turbo")
|
28 |
-
|
29 |
-
### Create an output parser:
|
30 |
-
output_parser=StrOutputParser()
|
31 |
-
|
32 |
-
#### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
|
33 |
-
#### Here we have created three actions: The prompt, llm and output parser:
|
34 |
-
chain=prompt|llm|output_parser
|
35 |
-
|
36 |
-
### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
|
37 |
-
def res(i):
|
38 |
-
response=chain.invoke({"question" : df2['Description'][i]+" Is the news referring to a specific accident incident or accident in general? Answer only in a word: 'specific' or 'general'. No other words are allowed in your answer"})
|
39 |
-
return response
|
40 |
-
|
41 |
-
#### dj list contains type of report 'General' or 'Specific'
|
42 |
-
dj=[]
|
43 |
-
|
44 |
-
for i in range(len(df2)):
|
45 |
-
dj.append(res(i))
|
46 |
-
|
47 |
-
df2['Report Type']=dj
|
48 |
-
|
49 |
-
def drp(p):
|
50 |
-
df2.drop([p],inplace=True)
|
51 |
-
### Removing the general accident types:
|
52 |
-
for p in range(len(df2)):
|
53 |
-
if "General" in df2['Report Type'][p] or "general" in df2['Report Type'][p]:
|
54 |
-
drp(p)
|
55 |
-
|
56 |
-
### Reseting index of df3:
|
57 |
-
df2.reset_index(drop=True,inplace=True)
|
58 |
-
|
59 |
-
|
60 |
-
### Splitting dj2 string based on comma position:
|
61 |
-
Date=[]
|
62 |
-
Time=[]
|
63 |
-
Killed=[]
|
64 |
-
Injured=[]
|
65 |
-
Location=[]
|
66 |
-
Road_Characteristic=[]
|
67 |
-
Pedestrian_Involved=[]
|
68 |
-
vehicles=[]
|
69 |
-
#Weather=[]
|
70 |
-
|
71 |
-
for i in range(len(df2)):
|
72 |
-
Date.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the date of accident occurrence in Day-Month-Year format. Keep in mind that news publish date and accident occurrence date may be different. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
73 |
-
Time.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the time of accident occurrence in 24-hour format. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
74 |
-
Killed.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were killed in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
75 |
-
Injured.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were injured in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
76 |
-
Location.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the name of the location where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
77 |
-
Road_Characteristic.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the type of road where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
78 |
-
Pedestrian_Involved.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: Was there any pedestrian involved in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
79 |
-
vehicles.append(chain.invoke({"question" : "Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences" + df2['Description'][i]}))
|
80 |
-
|
81 |
-
#### Probable type of final dataframe:
|
82 |
-
df2["Date"]=Date
|
83 |
-
df2["Time"]=Time
|
84 |
-
df2["Killed"]=Killed
|
85 |
-
df2["Injured"]=Injured
|
86 |
-
df2["Location"]=Location
|
87 |
-
df2["Road_Characteristic"]=Road_Characteristic
|
88 |
-
df2["Pedestrian_Involved"]=Pedestrian_Involved
|
89 |
-
df2["Vehicles Involved"]=vehicles
|
90 |
-
df3=df2.drop(columns=['Description','Report Type','Date + Desc'])
|
91 |
return df3
|
|
|
1 |
+
def create_data(description):
|
2 |
+
from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
|
3 |
+
from langchain_openai import ChatOpenAI ##### For using chat openai features
|
4 |
+
from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
### Set all api keys:
|
15 |
+
os.environ["OPENAI_API_KEY"]="sk-proj-CGfSL8yB41fpwWtTiHNjT3BlbkFJVDCfojcW4VftX8mxyjGv"
|
16 |
+
|
17 |
+
|
18 |
+
### Create Prompt Template:
|
19 |
+
prompt=ChatPromptTemplate.from_messages(
|
20 |
+
{
|
21 |
+
("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
|
22 |
+
("user","question: {question}")
|
23 |
+
}
|
24 |
+
)
|
25 |
+
df2=description
|
26 |
+
#### Create OpenAI llm:
|
27 |
+
llm=ChatOpenAI(model="gpt-3.5-turbo")
|
28 |
+
|
29 |
+
### Create an output parser:
|
30 |
+
output_parser=StrOutputParser()
|
31 |
+
|
32 |
+
#### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
|
33 |
+
#### Here we have created three actions: The prompt, llm and output parser:
|
34 |
+
chain=prompt|llm|output_parser
|
35 |
+
|
36 |
+
### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
|
37 |
+
def res(i):
|
38 |
+
response=chain.invoke({"question" : df2['Description'][i]+" Is the news referring to a specific accident incident or accident in general? Answer only in a word: 'specific' or 'general'. No other words are allowed in your answer"})
|
39 |
+
return response
|
40 |
+
|
41 |
+
#### dj list contains type of report 'General' or 'Specific'
|
42 |
+
dj=[]
|
43 |
+
|
44 |
+
for i in range(len(df2)):
|
45 |
+
dj.append(res(i))
|
46 |
+
|
47 |
+
df2['Report Type']=dj
|
48 |
+
|
49 |
+
def drp(p):
|
50 |
+
df2.drop([p],inplace=True)
|
51 |
+
### Removing the general accident types:
|
52 |
+
for p in range(len(df2)):
|
53 |
+
if "General" in df2['Report Type'][p] or "general" in df2['Report Type'][p]:
|
54 |
+
drp(p)
|
55 |
+
|
56 |
+
### Reseting index of df3:
|
57 |
+
df2.reset_index(drop=True,inplace=True)
|
58 |
+
|
59 |
+
|
60 |
+
### Splitting dj2 string based on comma position:
|
61 |
+
Date=[]
|
62 |
+
Time=[]
|
63 |
+
Killed=[]
|
64 |
+
Injured=[]
|
65 |
+
Location=[]
|
66 |
+
Road_Characteristic=[]
|
67 |
+
Pedestrian_Involved=[]
|
68 |
+
vehicles=[]
|
69 |
+
#Weather=[]
|
70 |
+
|
71 |
+
for i in range(len(df2)):
|
72 |
+
Date.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the date of accident occurrence in Day-Month-Year format. Keep in mind that news publish date and accident occurrence date may be different. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
73 |
+
Time.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the time of accident occurrence in 24-hour format. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
74 |
+
Killed.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were killed in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
75 |
+
Injured.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were injured in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
76 |
+
Location.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the name of the location where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
77 |
+
Road_Characteristic.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the type of road where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
78 |
+
Pedestrian_Involved.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: Was there any pedestrian involved in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
|
79 |
+
vehicles.append(chain.invoke({"question" : "Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences" + df2['Description'][i]}))
|
80 |
+
|
81 |
+
#### Probable type of final dataframe:
|
82 |
+
df2["Date"]=Date
|
83 |
+
df2["Time"]=Time
|
84 |
+
df2["Killed"]=Killed
|
85 |
+
df2["Injured"]=Injured
|
86 |
+
df2["Location"]=Location
|
87 |
+
df2["Road_Characteristic"]=Road_Characteristic
|
88 |
+
df2["Pedestrian_Involved"]=Pedestrian_Involved
|
89 |
+
df2["Vehicles Involved"]=vehicles
|
90 |
+
df3=df2.drop(columns=['Description','Report Type','Date + Desc'])
|
91 |
return df3
|