Thamed-Chowdhury commited on
Commit
ecd42a8
·
verified ·
1 Parent(s): feda798

Update LLM_automation_Groq.py

Browse files
Files changed (1) hide show
  1. LLM_automation_Groq.py +130 -130
LLM_automation_Groq.py CHANGED
@@ -1,130 +1,130 @@
1
- def create_data(description):
2
- from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
3
-
4
- from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
5
- from langchain_groq import ChatGroq
6
-
7
-
8
- import os
9
- from dotenv import load_dotenv
10
- import pandas as pd
11
-
12
- load_dotenv()
13
-
14
- ### Set all api keys:
15
-
16
- #os.environ["LANGCHAIN_TRACING_V2"]="true" ### Will automatically trace our codes using Langsmith
17
- os.environ["GROQ_API_KEY"]="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" #### Will be used for monitoring the calls to and from llm (both free and paid)
18
-
19
- ### Create Prompt Template:
20
- prompt=ChatPromptTemplate.from_messages(
21
- {
22
- ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
23
- ("user","question: {question}")
24
- }
25
- )
26
-
27
- #### Create LLama3 70B llm:
28
- llm = ChatGroq(
29
- model="llama3-70b-8192"
30
- ) # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3 `
31
-
32
-
33
- ### Create an output parser:
34
- output_parser=StrOutputParser()
35
-
36
- #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
37
- #### Here we have created three actions: The prompt, llm and output parser:
38
- chain=prompt|llm|output_parser
39
-
40
- df = description
41
- df = df.fillna(0)
42
- dj=[]
43
-
44
- for i in range(len(df)):
45
- dj.append(chain.invoke({"question" : df['Date + Desc'][i]+" Is the news referring to one or many specific accident incidents or accident in general? Make sure that your answer is only in one word. If a report contains more than one accident incident, classify it as a general accident incident. The word should be either 'Specific' or 'General'. Your answer should not contain any words except 'Specific' and 'General' "}))
46
-
47
- df2=df.copy()
48
- df2['Report Type']=dj
49
- def drp(p):
50
- df2.drop([p],inplace=True)
51
-
52
- ### Removing the general accident types:
53
- for p in range(len(df)):
54
- if "General" in df2['Report Type'][p]:
55
- drp(p)
56
-
57
- ### Reseting index of df3:
58
- df2.reset_index(drop=True,inplace=True)
59
-
60
- ### Now finding column values using llm:
61
- ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
62
- def res(i):
63
- response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only:
64
- If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date,
65
- Time of Accident occured, How many people were killed in the accident in numeric number?,
66
- How many people were injured in the accident in numeric number?,
67
- Location of the accident,
68
- Type of road where accident occured,
69
- Was there any pedestrian involved?,
70
- Do not include any other sentences except the answers seperated by comma only and do not include sentences such as: Here are the answers,
71
- if you cannot find or deduce a answer simply put 'Not Available' in place of it.
72
- If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
73
- return response
74
- #### dj2 list contains all column values seperated by comma:
75
- dj2=[]
76
-
77
- for i in range(len(df2)):
78
- dj2.append(res(i))
79
-
80
- ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
81
- def res2(i):
82
- response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
83
- return response
84
- #### dj2 list contains all column values seperated by comma:
85
- vehicles=[]
86
-
87
- for i in range(len(df2)):
88
- vehicles.append(res2(i))
89
-
90
-
91
- ### Splitting dj2 string based on comma position:
92
- Date=[]
93
- Time=[]
94
- Killed=[]
95
- Injured=[]
96
- Location=[]
97
- Road_Characteristic=[]
98
- Pedestrian_Involved=[]
99
- #Vehicles_involved=[]
100
-
101
- for i in range(len(dj2)):
102
- words = dj2[i].split(",") # Splitting at the comma delimiter
103
- #print(f"Date: {words[0]}")
104
- Date.append(words[0])
105
-
106
- #print(f"Time: {words[1]}")
107
- Time.append(words[1])
108
-
109
- #print(f"Casualities: {words[2]}")
110
- Killed.append(words[2])
111
- Injured.append(words[3])
112
- Location.append(words[4])
113
- Road_Characteristic.append(words[5])
114
- Pedestrian_Involved.append(words[6])
115
- #Vehicles_involved.append(words[7])
116
-
117
- #### Probable type of final dataframe:
118
- df2["Accident Date"]=Date
119
- df2["Time"]=Time
120
- df2["Killed"]=Killed
121
- df2["Injured"]=Injured
122
- df2["Location"]=Location
123
- df2["Road_Characteristic"]=Road_Characteristic
124
- df2["Pedestrian_Involved"]=Pedestrian_Involved
125
- df2["Vehicles_involved"]=vehicles
126
- df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
127
- return df3
128
-
129
-
130
-
 
1
+ def create_data(description):
2
+ from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
3
+
4
+ from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
5
+ from langchain_groq import ChatGroq
6
+
7
+
8
+ import os
9
+ from dotenv import load_dotenv
10
+ import pandas as pd
11
+
12
+ load_dotenv()
13
+
14
+ ### Set all api keys:
15
+
16
+ #os.environ["LANGCHAIN_TRACING_V2"]="true" ### Will automatically trace our codes using Langsmith
17
+ os.environ["GROQ_API_KEY"]=os.getenv('GROQ_API') #### Will be used for monitoring the calls to and from llm (both free and paid)
18
+
19
+ ### Create Prompt Template:
20
+ prompt=ChatPromptTemplate.from_messages(
21
+ {
22
+ ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
23
+ ("user","question: {question}")
24
+ }
25
+ )
26
+
27
+ #### Create LLama3 70B llm:
28
+ llm = ChatGroq(
29
+ model="llama3-70b-8192"
30
+ ) # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3 `
31
+
32
+
33
+ ### Create an output parser:
34
+ output_parser=StrOutputParser()
35
+
36
+ #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
37
+ #### Here we have created three actions: The prompt, llm and output parser:
38
+ chain=prompt|llm|output_parser
39
+
40
+ df = description
41
+ df = df.fillna(0)
42
+ dj=[]
43
+
44
+ for i in range(len(df)):
45
+ dj.append(chain.invoke({"question" : df['Date + Desc'][i]+" Is the news referring to one or many specific accident incidents or accident in general? Make sure that your answer is only in one word. If a report contains more than one accident incident, classify it as a general accident incident. The word should be either 'Specific' or 'General'. Your answer should not contain any words except 'Specific' and 'General' "}))
46
+
47
+ df2=df.copy()
48
+ df2['Report Type']=dj
49
+ def drp(p):
50
+ df2.drop([p],inplace=True)
51
+
52
+ ### Removing the general accident types:
53
+ for p in range(len(df)):
54
+ if "General" in df2['Report Type'][p]:
55
+ drp(p)
56
+
57
+ ### Reseting index of df3:
58
+ df2.reset_index(drop=True,inplace=True)
59
+
60
+ ### Now finding column values using llm:
61
+ ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
62
+ def res(i):
63
+ response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only:
64
+ If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date,
65
+ Time of Accident occured, How many people were killed in the accident in numeric number?,
66
+ How many people were injured in the accident in numeric number?,
67
+ Location of the accident,
68
+ Type of road where accident occured,
69
+ Was there any pedestrian involved?,
70
+ Do not include any other sentences except the answers seperated by comma only and do not include sentences such as: Here are the answers,
71
+ if you cannot find or deduce a answer simply put 'Not Available' in place of it.
72
+ If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
73
+ return response
74
+ #### dj2 list contains all column values seperated by comma:
75
+ dj2=[]
76
+
77
+ for i in range(len(df2)):
78
+ dj2.append(res(i))
79
+
80
+ ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
81
+ def res2(i):
82
+ response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
83
+ return response
84
+ #### dj2 list contains all column values seperated by comma:
85
+ vehicles=[]
86
+
87
+ for i in range(len(df2)):
88
+ vehicles.append(res2(i))
89
+
90
+
91
+ ### Splitting dj2 string based on comma position:
92
+ Date=[]
93
+ Time=[]
94
+ Killed=[]
95
+ Injured=[]
96
+ Location=[]
97
+ Road_Characteristic=[]
98
+ Pedestrian_Involved=[]
99
+ #Vehicles_involved=[]
100
+
101
+ for i in range(len(dj2)):
102
+ words = dj2[i].split(",") # Splitting at the comma delimiter
103
+ #print(f"Date: {words[0]}")
104
+ Date.append(words[0])
105
+
106
+ #print(f"Time: {words[1]}")
107
+ Time.append(words[1])
108
+
109
+ #print(f"Casualities: {words[2]}")
110
+ Killed.append(words[2])
111
+ Injured.append(words[3])
112
+ Location.append(words[4])
113
+ Road_Characteristic.append(words[5])
114
+ Pedestrian_Involved.append(words[6])
115
+ #Vehicles_involved.append(words[7])
116
+
117
+ #### Probable type of final dataframe:
118
+ df2["Accident Date"]=Date
119
+ df2["Time"]=Time
120
+ df2["Killed"]=Killed
121
+ df2["Injured"]=Injured
122
+ df2["Location"]=Location
123
+ df2["Road_Characteristic"]=Road_Characteristic
124
+ df2["Pedestrian_Involved"]=Pedestrian_Involved
125
+ df2["Vehicles_involved"]=vehicles
126
+ df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
127
+ return df3
128
+
129
+
130
+