Thamed-Chowdhury commited on
Commit
6ada8e5
·
verified ·
1 Parent(s): 7b0d150

Update LLM_automation_GPT35.py

Browse files
Files changed (1) hide show
  1. LLM_automation_GPT35.py +90 -90
LLM_automation_GPT35.py CHANGED
@@ -1,91 +1,91 @@
1
- def create_data(description):
2
- from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
3
- from langchain_openai import ChatOpenAI ##### For using chat openai features
4
- from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
5
-
6
-
7
-
8
- import os
9
- from dotenv import load_dotenv
10
-
11
-
12
- load_dotenv()
13
-
14
- ### Set all api keys:
15
- os.environ["OPENAI_API_KEY"]="sk-proj-ZB9b6Gn2FccVRsaL9WYfT3BlbkFJDpUpcoUwyR9LPoIJuAVl"
16
-
17
-
18
- ### Create Prompt Template:
19
- prompt=ChatPromptTemplate.from_messages(
20
- {
21
- ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
22
- ("user","question: {question}")
23
- }
24
- )
25
- df2=description
26
- #### Create OpenAI llm:
27
- llm=ChatOpenAI(model="gpt-3.5-turbo")
28
-
29
- ### Create an output parser:
30
- output_parser=StrOutputParser()
31
-
32
- #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
33
- #### Here we have created three actions: The prompt, llm and output parser:
34
- chain=prompt|llm|output_parser
35
-
36
- ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
37
- def res(i):
38
- response=chain.invoke({"question" : df2['Description'][i]+" Is the news referring to a specific accident incident or accident in general? Answer only in a word: 'specific' or 'general'. No other words are allowed in your answer"})
39
- return response
40
-
41
- #### dj list contains type of report 'General' or 'Specific'
42
- dj=[]
43
-
44
- for i in range(len(df2)):
45
- dj.append(res(i))
46
-
47
- df2['Report Type']=dj
48
-
49
- def drp(p):
50
- df2.drop([p],inplace=True)
51
- ### Removing the general accident types:
52
- for p in range(len(df2)):
53
- if "General" in df2['Report Type'][p] or "general" in df2['Report Type'][p]:
54
- drp(p)
55
-
56
- ### Reseting index of df3:
57
- df2.reset_index(drop=True,inplace=True)
58
-
59
-
60
- ### Splitting dj2 string based on comma position:
61
- Date=[]
62
- Time=[]
63
- Killed=[]
64
- Injured=[]
65
- Location=[]
66
- Road_Characteristic=[]
67
- Pedestrian_Involved=[]
68
- vehicles=[]
69
- #Weather=[]
70
-
71
- for i in range(len(df2)):
72
- Date.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the date of accident occurrence in Day-Month-Year format. Keep in mind that news publish date and accident occurrence date may be different. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
73
- Time.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the time of accident occurrence in 24-hour format. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
74
- Killed.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were killed in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
75
- Injured.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were injured in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
76
- Location.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the name of the location where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
77
- Road_Characteristic.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the type of road where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
78
- Pedestrian_Involved.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: Was there any pedestrian involved in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
79
- vehicles.append(chain.invoke({"question" : "Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences" + df2['Description'][i]}))
80
-
81
- #### Probable type of final dataframe:
82
- df2["Date"]=Date
83
- df2["Time"]=Time
84
- df2["Killed"]=Killed
85
- df2["Injured"]=Injured
86
- df2["Location"]=Location
87
- df2["Road_Characteristic"]=Road_Characteristic
88
- df2["Pedestrian_Involved"]=Pedestrian_Involved
89
- df2["Vehicles Involved"]=vehicles
90
- df3=df2.drop(columns=['Description','Report Type','Date + Desc'])
91
  return df3
 
1
+ def create_data(description):
2
+ from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
3
+ from langchain_openai import ChatOpenAI ##### For using chat openai features
4
+ from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
5
+
6
+
7
+
8
+ import os
9
+ from dotenv import load_dotenv
10
+
11
+
12
+ load_dotenv()
13
+
14
+ ### Set all api keys:
15
+ os.environ["OPENAI_API_KEY"]="sk-proj-CGfSL8yB41fpwWtTiHNjT3BlbkFJVDCfojcW4VftX8mxyjGv"
16
+
17
+
18
+ ### Create Prompt Template:
19
+ prompt=ChatPromptTemplate.from_messages(
20
+ {
21
+ ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
22
+ ("user","question: {question}")
23
+ }
24
+ )
25
+ df2=description
26
+ #### Create OpenAI llm:
27
+ llm=ChatOpenAI(model="gpt-3.5-turbo")
28
+
29
+ ### Create an output parser:
30
+ output_parser=StrOutputParser()
31
+
32
+ #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
33
+ #### Here we have created three actions: The prompt, llm and output parser:
34
+ chain=prompt|llm|output_parser
35
+
36
+ ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
37
+ def res(i):
38
+ response=chain.invoke({"question" : df2['Description'][i]+" Is the news referring to a specific accident incident or accident in general? Answer only in a word: 'specific' or 'general'. No other words are allowed in your answer"})
39
+ return response
40
+
41
+ #### dj list contains type of report 'General' or 'Specific'
42
+ dj=[]
43
+
44
+ for i in range(len(df2)):
45
+ dj.append(res(i))
46
+
47
+ df2['Report Type']=dj
48
+
49
+ def drp(p):
50
+ df2.drop([p],inplace=True)
51
+ ### Removing the general accident types:
52
+ for p in range(len(df2)):
53
+ if "General" in df2['Report Type'][p] or "general" in df2['Report Type'][p]:
54
+ drp(p)
55
+
56
+ ### Reseting index of df3:
57
+ df2.reset_index(drop=True,inplace=True)
58
+
59
+
60
+ ### Splitting dj2 string based on comma position:
61
+ Date=[]
62
+ Time=[]
63
+ Killed=[]
64
+ Injured=[]
65
+ Location=[]
66
+ Road_Characteristic=[]
67
+ Pedestrian_Involved=[]
68
+ vehicles=[]
69
+ #Weather=[]
70
+
71
+ for i in range(len(df2)):
72
+ Date.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the date of accident occurrence in Day-Month-Year format. Keep in mind that news publish date and accident occurrence date may be different. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
73
+ Time.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the time of accident occurrence in 24-hour format. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
74
+ Killed.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were killed in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
75
+ Injured.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: How many people were injured in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
76
+ Location.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the name of the location where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
77
+ Road_Characteristic.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: What is the type of road where accident took place?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
78
+ Pedestrian_Involved.append(chain.invoke({"question" : "Read the accident report carefully and provide only the answer of the question asked. Do not add any extra sentences or words except the answer: Was there any pedestrian involved in the accident?. If you cannot find or deduce the answer, simply reply Not Available" + df2['Description'][i]}))
79
+ vehicles.append(chain.invoke({"question" : "Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences" + df2['Description'][i]}))
80
+
81
+ #### Probable type of final dataframe:
82
+ df2["Date"]=Date
83
+ df2["Time"]=Time
84
+ df2["Killed"]=Killed
85
+ df2["Injured"]=Injured
86
+ df2["Location"]=Location
87
+ df2["Road_Characteristic"]=Road_Characteristic
88
+ df2["Pedestrian_Involved"]=Pedestrian_Involved
89
+ df2["Vehicles Involved"]=vehicles
90
+ df3=df2.drop(columns=['Description','Report Type','Date + Desc'])
91
  return df3