Update LLM_automation_Groq.py
Browse files- LLM_automation_Groq.py +130 -130
LLM_automation_Groq.py
CHANGED
@@ -1,130 +1,130 @@
|
|
1 |
-
def create_data(description):
|
2 |
-
from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
|
3 |
-
|
4 |
-
from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
|
5 |
-
from langchain_groq import ChatGroq
|
6 |
-
|
7 |
-
|
8 |
-
import os
|
9 |
-
from dotenv import load_dotenv
|
10 |
-
import pandas as pd
|
11 |
-
|
12 |
-
load_dotenv()
|
13 |
-
|
14 |
-
### Set all api keys:
|
15 |
-
|
16 |
-
#os.environ["LANGCHAIN_TRACING_V2"]="true" ### Will automatically trace our codes using Langsmith
|
17 |
-
os.environ["GROQ_API_KEY"]=
|
18 |
-
|
19 |
-
### Create Prompt Template:
|
20 |
-
prompt=ChatPromptTemplate.from_messages(
|
21 |
-
{
|
22 |
-
("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
|
23 |
-
("user","question: {question}")
|
24 |
-
}
|
25 |
-
)
|
26 |
-
|
27 |
-
#### Create LLama3 70B llm:
|
28 |
-
llm = ChatGroq(
|
29 |
-
model="llama3-70b-8192"
|
30 |
-
) # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3 `
|
31 |
-
|
32 |
-
|
33 |
-
### Create an output parser:
|
34 |
-
output_parser=StrOutputParser()
|
35 |
-
|
36 |
-
#### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
|
37 |
-
#### Here we have created three actions: The prompt, llm and output parser:
|
38 |
-
chain=prompt|llm|output_parser
|
39 |
-
|
40 |
-
df = description
|
41 |
-
df = df.fillna(0)
|
42 |
-
dj=[]
|
43 |
-
|
44 |
-
for i in range(len(df)):
|
45 |
-
dj.append(chain.invoke({"question" : df['Date + Desc'][i]+" Is the news referring to one or many specific accident incidents or accident in general? Make sure that your answer is only in one word. If a report contains more than one accident incident, classify it as a general accident incident. The word should be either 'Specific' or 'General'. Your answer should not contain any words except 'Specific' and 'General' "}))
|
46 |
-
|
47 |
-
df2=df.copy()
|
48 |
-
df2['Report Type']=dj
|
49 |
-
def drp(p):
|
50 |
-
df2.drop([p],inplace=True)
|
51 |
-
|
52 |
-
### Removing the general accident types:
|
53 |
-
for p in range(len(df)):
|
54 |
-
if "General" in df2['Report Type'][p]:
|
55 |
-
drp(p)
|
56 |
-
|
57 |
-
### Reseting index of df3:
|
58 |
-
df2.reset_index(drop=True,inplace=True)
|
59 |
-
|
60 |
-
### Now finding column values using llm:
|
61 |
-
### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
|
62 |
-
def res(i):
|
63 |
-
response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only:
|
64 |
-
If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date,
|
65 |
-
Time of Accident occured, How many people were killed in the accident in numeric number?,
|
66 |
-
How many people were injured in the accident in numeric number?,
|
67 |
-
Location of the accident,
|
68 |
-
Type of road where accident occured,
|
69 |
-
Was there any pedestrian involved?,
|
70 |
-
Do not include any other sentences except the answers seperated by comma only and do not include sentences such as: Here are the answers,
|
71 |
-
if you cannot find or deduce a answer simply put 'Not Available' in place of it.
|
72 |
-
If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
|
73 |
-
return response
|
74 |
-
#### dj2 list contains all column values seperated by comma:
|
75 |
-
dj2=[]
|
76 |
-
|
77 |
-
for i in range(len(df2)):
|
78 |
-
dj2.append(res(i))
|
79 |
-
|
80 |
-
### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
|
81 |
-
def res2(i):
|
82 |
-
response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
|
83 |
-
return response
|
84 |
-
#### dj2 list contains all column values seperated by comma:
|
85 |
-
vehicles=[]
|
86 |
-
|
87 |
-
for i in range(len(df2)):
|
88 |
-
vehicles.append(res2(i))
|
89 |
-
|
90 |
-
|
91 |
-
### Splitting dj2 string based on comma position:
|
92 |
-
Date=[]
|
93 |
-
Time=[]
|
94 |
-
Killed=[]
|
95 |
-
Injured=[]
|
96 |
-
Location=[]
|
97 |
-
Road_Characteristic=[]
|
98 |
-
Pedestrian_Involved=[]
|
99 |
-
#Vehicles_involved=[]
|
100 |
-
|
101 |
-
for i in range(len(dj2)):
|
102 |
-
words = dj2[i].split(",") # Splitting at the comma delimiter
|
103 |
-
#print(f"Date: {words[0]}")
|
104 |
-
Date.append(words[0])
|
105 |
-
|
106 |
-
#print(f"Time: {words[1]}")
|
107 |
-
Time.append(words[1])
|
108 |
-
|
109 |
-
#print(f"Casualities: {words[2]}")
|
110 |
-
Killed.append(words[2])
|
111 |
-
Injured.append(words[3])
|
112 |
-
Location.append(words[4])
|
113 |
-
Road_Characteristic.append(words[5])
|
114 |
-
Pedestrian_Involved.append(words[6])
|
115 |
-
#Vehicles_involved.append(words[7])
|
116 |
-
|
117 |
-
#### Probable type of final dataframe:
|
118 |
-
df2["Accident Date"]=Date
|
119 |
-
df2["Time"]=Time
|
120 |
-
df2["Killed"]=Killed
|
121 |
-
df2["Injured"]=Injured
|
122 |
-
df2["Location"]=Location
|
123 |
-
df2["Road_Characteristic"]=Road_Characteristic
|
124 |
-
df2["Pedestrian_Involved"]=Pedestrian_Involved
|
125 |
-
df2["Vehicles_involved"]=vehicles
|
126 |
-
df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
|
127 |
-
return df3
|
128 |
-
|
129 |
-
|
130 |
-
|
|
|
1 |
+
def create_data(description):
|
2 |
+
from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
|
3 |
+
|
4 |
+
from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
|
5 |
+
from langchain_groq import ChatGroq
|
6 |
+
|
7 |
+
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
### Set all api keys:
|
15 |
+
|
16 |
+
#os.environ["LANGCHAIN_TRACING_V2"]="true" ### Will automatically trace our codes using Langsmith
|
17 |
+
os.environ["GROQ_API_KEY"]=os.getenv('GROQ_API') #### Will be used for monitoring the calls to and from llm (both free and paid)
|
18 |
+
|
19 |
+
### Create Prompt Template:
|
20 |
+
prompt=ChatPromptTemplate.from_messages(
|
21 |
+
{
|
22 |
+
("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
|
23 |
+
("user","question: {question}")
|
24 |
+
}
|
25 |
+
)
|
26 |
+
|
27 |
+
#### Create LLama3 70B llm:
|
28 |
+
llm = ChatGroq(
|
29 |
+
model="llama3-70b-8192"
|
30 |
+
) # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3 `
|
31 |
+
|
32 |
+
|
33 |
+
### Create an output parser:
|
34 |
+
output_parser=StrOutputParser()
|
35 |
+
|
36 |
+
#### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
|
37 |
+
#### Here we have created three actions: The prompt, llm and output parser:
|
38 |
+
chain=prompt|llm|output_parser
|
39 |
+
|
40 |
+
df = description
|
41 |
+
df = df.fillna(0)
|
42 |
+
dj=[]
|
43 |
+
|
44 |
+
for i in range(len(df)):
|
45 |
+
dj.append(chain.invoke({"question" : df['Date + Desc'][i]+" Is the news referring to one or many specific accident incidents or accident in general? Make sure that your answer is only in one word. If a report contains more than one accident incident, classify it as a general accident incident. The word should be either 'Specific' or 'General'. Your answer should not contain any words except 'Specific' and 'General' "}))
|
46 |
+
|
47 |
+
df2=df.copy()
|
48 |
+
df2['Report Type']=dj
|
49 |
+
def drp(p):
|
50 |
+
df2.drop([p],inplace=True)
|
51 |
+
|
52 |
+
### Removing the general accident types:
|
53 |
+
for p in range(len(df)):
|
54 |
+
if "General" in df2['Report Type'][p]:
|
55 |
+
drp(p)
|
56 |
+
|
57 |
+
### Reseting index of df3:
|
58 |
+
df2.reset_index(drop=True,inplace=True)
|
59 |
+
|
60 |
+
### Now finding column values using llm:
|
61 |
+
### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
|
62 |
+
def res(i):
|
63 |
+
response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only:
|
64 |
+
If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date,
|
65 |
+
Time of Accident occured, How many people were killed in the accident in numeric number?,
|
66 |
+
How many people were injured in the accident in numeric number?,
|
67 |
+
Location of the accident,
|
68 |
+
Type of road where accident occured,
|
69 |
+
Was there any pedestrian involved?,
|
70 |
+
Do not include any other sentences except the answers seperated by comma only and do not include sentences such as: Here are the answers,
|
71 |
+
if you cannot find or deduce a answer simply put 'Not Available' in place of it.
|
72 |
+
If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
|
73 |
+
return response
|
74 |
+
#### dj2 list contains all column values seperated by comma:
|
75 |
+
dj2=[]
|
76 |
+
|
77 |
+
for i in range(len(df2)):
|
78 |
+
dj2.append(res(i))
|
79 |
+
|
80 |
+
### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
|
81 |
+
def res2(i):
|
82 |
+
response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
|
83 |
+
return response
|
84 |
+
#### dj2 list contains all column values seperated by comma:
|
85 |
+
vehicles=[]
|
86 |
+
|
87 |
+
for i in range(len(df2)):
|
88 |
+
vehicles.append(res2(i))
|
89 |
+
|
90 |
+
|
91 |
+
### Splitting dj2 string based on comma position:
|
92 |
+
Date=[]
|
93 |
+
Time=[]
|
94 |
+
Killed=[]
|
95 |
+
Injured=[]
|
96 |
+
Location=[]
|
97 |
+
Road_Characteristic=[]
|
98 |
+
Pedestrian_Involved=[]
|
99 |
+
#Vehicles_involved=[]
|
100 |
+
|
101 |
+
for i in range(len(dj2)):
|
102 |
+
words = dj2[i].split(",") # Splitting at the comma delimiter
|
103 |
+
#print(f"Date: {words[0]}")
|
104 |
+
Date.append(words[0])
|
105 |
+
|
106 |
+
#print(f"Time: {words[1]}")
|
107 |
+
Time.append(words[1])
|
108 |
+
|
109 |
+
#print(f"Casualities: {words[2]}")
|
110 |
+
Killed.append(words[2])
|
111 |
+
Injured.append(words[3])
|
112 |
+
Location.append(words[4])
|
113 |
+
Road_Characteristic.append(words[5])
|
114 |
+
Pedestrian_Involved.append(words[6])
|
115 |
+
#Vehicles_involved.append(words[7])
|
116 |
+
|
117 |
+
#### Probable type of final dataframe:
|
118 |
+
df2["Accident Date"]=Date
|
119 |
+
df2["Time"]=Time
|
120 |
+
df2["Killed"]=Killed
|
121 |
+
df2["Injured"]=Injured
|
122 |
+
df2["Location"]=Location
|
123 |
+
df2["Road_Characteristic"]=Road_Characteristic
|
124 |
+
df2["Pedestrian_Involved"]=Pedestrian_Involved
|
125 |
+
df2["Vehicles_involved"]=vehicles
|
126 |
+
df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
|
127 |
+
return df3
|
128 |
+
|
129 |
+
|
130 |
+
|