Thamed-Chowdhury commited on
Commit
7b0d150
·
verified ·
1 Parent(s): 0ecab8f

Update LLM_automation_GPT.py

Browse files
Files changed (1) hide show
  1. LLM_automation_GPT.py +124 -124
LLM_automation_GPT.py CHANGED
@@ -1,125 +1,125 @@
1
- def create_data(description):
2
- print("Running THis Script")
3
- from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
4
- from langchain_openai import ChatOpenAI ##### For using chat openai features
5
- from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
6
- from langchain_community.llms import ollama ### Importing ollama
7
-
8
-
9
- import os
10
- from dotenv import load_dotenv
11
- import pandas as pd
12
-
13
-
14
- load_dotenv()
15
-
16
- ### Set all api keys:
17
- os.environ["OPENAI_API_KEY"]="sk-proj-ZB9b6Gn2FccVRsaL9WYfT3BlbkFJDpUpcoUwyR9LPoIJuAVl"
18
- ### Create Prompt Template:
19
- prompt=ChatPromptTemplate.from_messages(
20
- {
21
- ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
22
- ("user","question: {question}")
23
- }
24
- )
25
-
26
- #### Create OpenAI llm:
27
- llm=ChatOpenAI(model="gpt-4o")
28
-
29
- ### Create an output parser:
30
- output_parser=StrOutputParser()
31
-
32
- #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
33
- #### Here we have created three actions: The prompt, llm and output parser:
34
- chain=prompt|llm|output_parser
35
-
36
- df = description
37
- df = df.fillna(0)
38
- dj=[]
39
-
40
- for i in range(len(df)):
41
- dj.append(chain.invoke({"question" : df['Date + Desc'][i]+" Is the news referring to one or many specific accident incidents or accident in general? Make sure that your answer is only in one word. If a report contains more than one accident incident, classify it as a general accident incident. The word should be either 'Specific' or 'General'. Your answer should not contain any words except 'Specific' and 'General' "}))
42
-
43
- df2=df.copy()
44
- df2['Report Type']=dj
45
- def drp(p):
46
- df2.drop([p],inplace=True)
47
-
48
- ### Removing the general accident types:
49
- for p in range(len(df)):
50
- if "General" in df2['Report Type'][p]:
51
- drp(p)
52
-
53
- ### Reseting index of df3:
54
- df2.reset_index(drop=True,inplace=True)
55
-
56
- ### Now finding column values using llm:
57
- ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
58
- def res(i):
59
- response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only:
60
- If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date and do not include Saturday Sunday etc in your date. Only numerics are needed,
61
- Time of Accident occured, How many people were killed in the accident in numeric number?,
62
- How many people were injured in the accident in numeric number?,
63
- Location of the accident,
64
- Type of road where accident occured,
65
- Was there any pedestrian involved?,
66
- Do not include any other sentences except the answers seperated by comma only,
67
- if you cannot find or deduce a answer simply put 'Not Available' in place of it.
68
- If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
69
- return response
70
- #### dj2 list contains all column values seperated by comma:
71
- dj2=[]
72
-
73
- for i in range(len(df2)):
74
- dj2.append(res(i))
75
- ### Finding vehicle
76
- def res2(i):
77
- response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
78
- return response
79
- #### vehicle list contains all vehicles involved:
80
- vehicles=[]
81
-
82
- for i in range(len(df2)):
83
- vehicles.append(res2(i))
84
-
85
-
86
-
87
-
88
- ### Splitting dj2 string based on comma position:
89
- Date=[]
90
- Time=[]
91
- Killed=[]
92
- Injured=[]
93
- Location=[]
94
- Road_Characteristic=[]
95
- Pedestrian_Involved=[]
96
- #Vehicles_involved=[]
97
-
98
- for i in range(len(dj2)):
99
- words = dj2[i].split(",") # Splitting at the comma delimiter
100
- #print(f"Date: {words[0]}")
101
- Date.append(words[0])
102
-
103
- #print(f"Time: {words[1]}")
104
- Time.append(words[1])
105
-
106
- #print(f"Casualities: {words[2]}")
107
- Killed.append(words[2])
108
- Injured.append(words[3])
109
- Location.append(words[4])
110
- Road_Characteristic.append(words[5])
111
- Pedestrian_Involved.append(words[6])
112
- #Vehicles_involved.append(words[7])
113
-
114
- #### Probable type of final dataframe:
115
- df2["Accident Date"]=Date
116
- df2["Time"]=Time
117
- df2["Killed"]=Killed
118
- df2["Injured"]=Injured
119
- df2["Location"]=Location
120
- df2["Road_Characteristic"]=Road_Characteristic
121
- df2["Pedestrian_Involved"]=Pedestrian_Involved
122
- df2["Vehicles Involved"]=vehicles
123
- df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
124
- return df3
125
 
 
1
+ def create_data(description):
2
+ print("Running THis Script")
3
+ from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
4
+ from langchain_openai import ChatOpenAI ##### For using chat openai features
5
+ from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
6
+ from langchain_community.llms import ollama ### Importing ollama
7
+
8
+
9
+ import os
10
+ from dotenv import load_dotenv
11
+ import pandas as pd
12
+
13
+
14
+ load_dotenv()
15
+
16
+ ### Set all api keys:
17
+ os.environ["OPENAI_API_KEY"]="sk-proj-CGfSL8yB41fpwWtTiHNjT3BlbkFJVDCfojcW4VftX8mxyjGv"
18
+ ### Create Prompt Template:
19
+ prompt=ChatPromptTemplate.from_messages(
20
+ {
21
+ ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
22
+ ("user","question: {question}")
23
+ }
24
+ )
25
+
26
+ #### Create OpenAI llm:
27
+ llm=ChatOpenAI(model="gpt-4o")
28
+
29
+ ### Create an output parser:
30
+ output_parser=StrOutputParser()
31
+
32
+ #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
33
+ #### Here we have created three actions: The prompt, llm and output parser:
34
+ chain=prompt|llm|output_parser
35
+
36
+ df = description
37
+ df = df.fillna(0)
38
+ dj=[]
39
+
40
+ for i in range(len(df)):
41
+ dj.append(chain.invoke({"question" : df['Date + Desc'][i]+" Is the news referring to one or many specific accident incidents or accident in general? Make sure that your answer is only in one word. If a report contains more than one accident incident, classify it as a general accident incident. The word should be either 'Specific' or 'General'. Your answer should not contain any words except 'Specific' and 'General' "}))
42
+
43
+ df2=df.copy()
44
+ df2['Report Type']=dj
45
+ def drp(p):
46
+ df2.drop([p],inplace=True)
47
+
48
+ ### Removing the general accident types:
49
+ for p in range(len(df)):
50
+ if "General" in df2['Report Type'][p]:
51
+ drp(p)
52
+
53
+ ### Reseting index of df3:
54
+ df2.reset_index(drop=True,inplace=True)
55
+
56
+ ### Now finding column values using llm:
57
+ ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
58
+ def res(i):
59
+ response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only:
60
+ If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date and do not include Saturday Sunday etc in your date. Only numerics are needed,
61
+ Time of Accident occured, How many people were killed in the accident in numeric number?,
62
+ How many people were injured in the accident in numeric number?,
63
+ Location of the accident,
64
+ Type of road where accident occured,
65
+ Was there any pedestrian involved?,
66
+ Do not include any other sentences except the answers seperated by comma only,
67
+ if you cannot find or deduce a answer simply put 'Not Available' in place of it.
68
+ If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
69
+ return response
70
+ #### dj2 list contains all column values seperated by comma:
71
+ dj2=[]
72
+
73
+ for i in range(len(df2)):
74
+ dj2.append(res(i))
75
+ ### Finding vehicle
76
+ def res2(i):
77
+ response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
78
+ return response
79
+ #### vehicle list contains all vehicles involved:
80
+ vehicles=[]
81
+
82
+ for i in range(len(df2)):
83
+ vehicles.append(res2(i))
84
+
85
+
86
+
87
+
88
+ ### Splitting dj2 string based on comma position:
89
+ Date=[]
90
+ Time=[]
91
+ Killed=[]
92
+ Injured=[]
93
+ Location=[]
94
+ Road_Characteristic=[]
95
+ Pedestrian_Involved=[]
96
+ #Vehicles_involved=[]
97
+
98
+ for i in range(len(dj2)):
99
+ words = dj2[i].split(",") # Splitting at the comma delimiter
100
+ #print(f"Date: {words[0]}")
101
+ Date.append(words[0])
102
+
103
+ #print(f"Time: {words[1]}")
104
+ Time.append(words[1])
105
+
106
+ #print(f"Casualities: {words[2]}")
107
+ Killed.append(words[2])
108
+ Injured.append(words[3])
109
+ Location.append(words[4])
110
+ Road_Characteristic.append(words[5])
111
+ Pedestrian_Involved.append(words[6])
112
+ #Vehicles_involved.append(words[7])
113
+
114
+ #### Probable type of final dataframe:
115
+ df2["Accident Date"]=Date
116
+ df2["Time"]=Time
117
+ df2["Killed"]=Killed
118
+ df2["Injured"]=Injured
119
+ df2["Location"]=Location
120
+ df2["Road_Characteristic"]=Road_Characteristic
121
+ df2["Pedestrian_Involved"]=Pedestrian_Involved
122
+ df2["Vehicles Involved"]=vehicles
123
+ df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
124
+ return df3
125