Thamed-Chowdhury commited on
Commit
29035bf
·
verified ·
1 Parent(s): 501a143

Upload 2 files

Browse files
Files changed (2) hide show
  1. LLM_automation_GPT.py +126 -123
  2. LLM_automation_Groq.py +131 -129
LLM_automation_GPT.py CHANGED
@@ -1,123 +1,126 @@
1
- def create_data(description):
2
- print("Running THis Script")
3
- from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
4
- from langchain_openai import ChatOpenAI ##### For using chat openai features
5
- from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
6
- from langchain_community.llms import ollama ### Importing ollama
7
-
8
-
9
- import os
10
- from dotenv import load_dotenv
11
- import pandas as pd
12
-
13
-
14
- load_dotenv()
15
-
16
- ### Set all api keys:
17
- os.environ["OPENAI_API_KEY"]=os.getenv('OPENAI_API')
18
- ### Create Prompt Template:
19
- prompt=ChatPromptTemplate.from_messages(
20
- {
21
- ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
22
- ("user","question: {question}")
23
- }
24
- )
25
-
26
- #### Create OpenAI llm:
27
- llm=ChatOpenAI(model="gpt-4o")
28
-
29
- ### Create an output parser:
30
- output_parser=StrOutputParser()
31
-
32
- #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
33
- #### Here we have created three actions: The prompt, llm and output parser:
34
- chain=prompt|llm|output_parser
35
-
36
- df = description
37
- df = df.fillna(0)
38
- dj=[]
39
- for i in range(len(df)):
40
- dj.append(chain.invoke({"question" : df['Description'][i]+" Is the news about road accident? If no, then reply 'General'. Else if the news is about road accident then check if the news is referring to a specific accident incident or accident in general? Answer only in a word: Either specific or general."}))
41
-
42
- df2=df.copy()
43
- df2['Report Type']=dj
44
- def drp(p):
45
- df2.drop([p],inplace=True)
46
-
47
- ### Removing the general accident types:
48
- for p in range(len(df)):
49
- if "General" in df2['Report Type'][p]:
50
- drp(p)
51
-
52
- ### Reseting index of df3:
53
- df2.reset_index(drop=True,inplace=True)
54
-
55
- ### Now finding column values using llm:
56
- ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
57
- def res(i):
58
- response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only and your answers MUST BE IN ENGLISH:
59
- If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date and do not include Saturday Sunday etc in your date. Only numerics are needed,
60
- Time of Accident occured, How many people were killed in the accident in numeric number?,
61
- How many people were injured in the accident in numeric number?,
62
- Location of the accident,
63
- Type of road where accident occured,
64
- Was there any pedestrian involved?,
65
- Do not include any other sentences except the answers seperated by comma only,
66
- if you cannot find or deduce a answer simply put 'Not Available' in place of it.
67
- If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
68
- return response
69
- #### dj2 list contains all column values seperated by comma:
70
- dj2=[]
71
-
72
- for i in range(len(df2)):
73
- dj2.append(res(i))
74
- ### Finding vehicle
75
- def res2(i):
76
- response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
77
- return response
78
- #### vehicle list contains all vehicles involved:
79
- vehicles=[]
80
-
81
- for i in range(len(df2)):
82
- vehicles.append(res2(i))
83
-
84
-
85
-
86
-
87
- ### Splitting dj2 string based on comma position:
88
- Date=[]
89
- Time=[]
90
- Killed=[]
91
- Injured=[]
92
- Location=[]
93
- Road_Characteristic=[]
94
- Pedestrian_Involved=[]
95
- #Vehicles_involved=[]
96
-
97
- for i in range(len(dj2)):
98
- words = dj2[i].split(",") # Splitting at the comma delimiter
99
- #print(f"Date: {words[0]}")
100
- Date.append(words[0])
101
-
102
- #print(f"Time: {words[1]}")
103
- Time.append(words[1])
104
-
105
- #print(f"Casualities: {words[2]}")
106
- Killed.append(words[2])
107
- Injured.append(words[3])
108
- Location.append(words[4])
109
- Road_Characteristic.append(words[5])
110
- Pedestrian_Involved.append(words[6])
111
- #Vehicles_involved.append(words[7])
112
-
113
- #### Probable type of final dataframe:
114
- df2["Accident Date"]=Date
115
- df2["Time"]=Time
116
- df2["Killed"]=Killed
117
- df2["Injured"]=Injured
118
- df2["Location"]=Location
119
- df2["Road_Characteristic"]=Road_Characteristic
120
- df2["Pedestrian_Involved"]=Pedestrian_Involved
121
- df2["Vehicles Involved"]=vehicles
122
- df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
123
- return df3
 
 
 
 
1
+ def create_data(description):
2
+ print("Running THis Script")
3
+ print("Length of description is: ", len(description))
4
+ from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
5
+ from langchain_openai import ChatOpenAI ##### For using chat openai features
6
+ from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
7
+ from langchain_community.llms import ollama ### Importing ollama
8
+
9
+
10
+ import os
11
+ from dotenv import load_dotenv
12
+ import pandas as pd
13
+
14
+
15
+ load_dotenv()
16
+
17
+ ### Set all api keys:
18
+ os.environ["OPENAI_API_KEY"]="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"
19
+ ### Create Prompt Template:
20
+ prompt=ChatPromptTemplate.from_messages(
21
+ {
22
+ ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
23
+ ("user","question: {question}")
24
+ }
25
+ )
26
+
27
+ #### Create OpenAI llm:
28
+ llm=ChatOpenAI(model="gpt-4o")
29
+
30
+ ### Create an output parser:
31
+ output_parser=StrOutputParser()
32
+
33
+ #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
34
+ #### Here we have created three actions: The prompt, llm and output parser:
35
+ chain=prompt|llm|output_parser
36
+
37
+ df = description
38
+ df = df.fillna(0)
39
+ dj=[]
40
+ for i in range(len(df)):
41
+ dj.append(chain.invoke({"question" : df['Description'][i]+" Is the news about road accident? If no, then reply 'General'. Else if the news is about road accident then check if the news is referring to a specific accident incident or accident in general? Answer only in a word: Either specific or general."}))
42
+
43
+ df2=df.copy()
44
+ df2['Report Type']=dj
45
+ def drp(p):
46
+ df2.drop([p],inplace=True)
47
+
48
+ ### Removing the general accident types:
49
+ for p in range(len(df)):
50
+ if "General" in df2['Report Type'][p]:
51
+ drp(p)
52
+
53
+ ### Reseting index of df3:
54
+ df2.reset_index(drop=True,inplace=True)
55
+
56
+ ### Now finding column values using llm:
57
+ ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
58
+ def res(i):
59
+ response=chain.invoke({"question" : f"""I will give you two strings. 1st string will contain a publish date of a news and the 2nd string will contain the accident news itself.
60
+ If the 2nd string contains more than one accident incidents, only consider the 1st incident. Based on these two strings, you have to answer the following questions. Remember your answer must contain ONLY THE ANSWERS WITHOUT ANY EXTRA WORDS OR SENTENCES:
61
+ what is the date (Day-Month-Year numerical format) of accident occurrence? ;
62
+ Time of Accident occured; How many people were killed in the accident?;
63
+ How many people were injured in the accident?;
64
+ Location of the accident;
65
+ Type of road where accident occured;
66
+ Was there any pedestrian involved?;
67
+ Do not include any extra words or sentences except the answers seperated by semicolons only. Your reply cannot contain sentences such as - 'Here are the answers to the questions'
68
+ string 1 = {df2['Publish Date'][i]}
69
+ string 2 = {df2['Description'][i]}""" })
70
+ return response
71
+ #### dj2 list contains all column values seperated by comma:
72
+ dj2=[]
73
+
74
+ for i in range(len(df2)):
75
+ dj2.append(res(i))
76
+ ### Finding vehicle
77
+ def res2(i):
78
+ response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
79
+ return response
80
+ #### vehicle list contains all vehicles involved:
81
+ vehicles=[]
82
+
83
+ for i in range(len(df2)):
84
+ vehicles.append(res2(i))
85
+
86
+
87
+
88
+
89
+ ### Splitting dj2 string based on comma position:
90
+ Date=[]
91
+ Time=[]
92
+ Killed=[]
93
+ Injured=[]
94
+ Location=[]
95
+ Road_Characteristic=[]
96
+ Pedestrian_Involved=[]
97
+ #Vehicles_involved=[]
98
+
99
+ for i in range(len(dj2)):
100
+ words = dj2[i].split(";") # Splitting at the semicolon delimiter
101
+ #print(f"Date: {words[0]}")
102
+ Date.append(words[0])
103
+
104
+ #print(f"Time: {words[1]}")
105
+ Time.append(words[1])
106
+
107
+ #print(f"Casualities: {words[2]}")
108
+ Killed.append(words[2])
109
+ Injured.append(words[3])
110
+ Location.append(words[4])
111
+ Road_Characteristic.append(words[5])
112
+ Pedestrian_Involved.append(words[6])
113
+ #Vehicles_involved.append(words[7])
114
+
115
+ #### Probable type of final dataframe:
116
+ df2["Accident Date"]=Date
117
+ df2["Time"]=Time
118
+ df2["Killed"]=Killed
119
+ df2["Injured"]=Injured
120
+ df2["Location"]=Location
121
+ df2["Road_Characteristic"]=Road_Characteristic
122
+ df2["Pedestrian_Involved"]=Pedestrian_Involved
123
+ df2["Vehicles Involved"]=vehicles
124
+ df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
125
+ return df3
126
+
LLM_automation_Groq.py CHANGED
@@ -1,129 +1,131 @@
1
- def create_data(description):
2
- from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
3
-
4
- from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
5
- from langchain_groq import ChatGroq
6
-
7
-
8
- import os
9
- from dotenv import load_dotenv
10
- import pandas as pd
11
-
12
- load_dotenv()
13
-
14
- ### Set all api keys:
15
-
16
- #os.environ["LANGCHAIN_TRACING_V2"]="true" ### Will automatically trace our codes using Langsmith
17
- os.environ["GROQ_API_KEY"]=os.getenv('GROQ_API') #### Will be used for monitoring the calls to and from llm (both free and paid)
18
-
19
- ### Create Prompt Template:
20
- prompt=ChatPromptTemplate.from_messages(
21
- {
22
- ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
23
- ("user","question: {question}")
24
- }
25
- )
26
-
27
- #### Create LLama3 70B llm:
28
- llm = ChatGroq(
29
- model="llama3-70b-8192"
30
- ) # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3 `
31
-
32
-
33
- ### Create an output parser:
34
- output_parser=StrOutputParser()
35
-
36
- #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
37
- #### Here we have created three actions: The prompt, llm and output parser:
38
- chain=prompt|llm|output_parser
39
-
40
- df = description
41
- df = df.fillna(0)
42
- dj=[]
43
- for i in range(len(df)):
44
- dj.append(chain.invoke({"question" : df['Description'][i]+" Is the news about road accident? If no, then reply 'General'. Else if the news is about road accident then check if the news is referring to a specific accident incident or accident in general? Answer only in a word: Either specific or general."}))
45
-
46
- df2=df.copy()
47
- df2['Report Type']=dj
48
- def drp(p):
49
- df2.drop([p],inplace=True)
50
-
51
- ### Removing the general accident types:
52
- for p in range(len(df)):
53
- if "General" in df2['Report Type'][p]:
54
- drp(p)
55
-
56
- ### Reseting index of df3:
57
- df2.reset_index(drop=True,inplace=True)
58
-
59
- ### Now finding column values using llm:
60
- ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
61
- def res(i):
62
- response=chain.invoke({"question" : df2['Description'][i]+f"""Provide only the answers of the following question seperated by a comma only and your answers MUST BE IN ENGLISH:
63
- If the news was published on {df2['Publish Date'][i]}, what is the date of accident occurrence? The date must be in Day-Month-Year format. Be careful because publish date and accident occurrence date may or may not be the same. Try to deduce correct accident date,
64
- Time of Accident occured, How many people were killed in the accident in numeric number?,
65
- How many people were injured in the accident in numeric number?,
66
- Location of the accident,
67
- Type of road where accident occured,
68
- Was there any pedestrian involved?,
69
- Do not include any other sentences except the answers seperated by comma only and do not include sentences such as: Here are the answers,
70
- if you cannot find or deduce a answer simply put 'Not Available' in place of it.
71
- If a report mentions more than one specific accident incidents only consider the 1st accident incident and ignore the second one""" })
72
- return response
73
- #### dj2 list contains all column values seperated by comma:
74
- dj2=[]
75
-
76
- for i in range(len(df2)):
77
- dj2.append(res(i))
78
-
79
- ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
80
- def res2(i):
81
- response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
82
- return response
83
- #### dj2 list contains all column values seperated by comma:
84
- vehicles=[]
85
-
86
- for i in range(len(df2)):
87
- vehicles.append(res2(i))
88
-
89
-
90
- ### Splitting dj2 string based on comma position:
91
- Date=[]
92
- Time=[]
93
- Killed=[]
94
- Injured=[]
95
- Location=[]
96
- Road_Characteristic=[]
97
- Pedestrian_Involved=[]
98
- #Vehicles_involved=[]
99
-
100
- for i in range(len(dj2)):
101
- words = dj2[i].split(",") # Splitting at the comma delimiter
102
- #print(f"Date: {words[0]}")
103
- Date.append(words[0])
104
-
105
- #print(f"Time: {words[1]}")
106
- Time.append(words[1])
107
-
108
- #print(f"Casualities: {words[2]}")
109
- Killed.append(words[2])
110
- Injured.append(words[3])
111
- Location.append(words[4])
112
- Road_Characteristic.append(words[5])
113
- Pedestrian_Involved.append(words[6])
114
- #Vehicles_involved.append(words[7])
115
-
116
- #### Probable type of final dataframe:
117
- df2["Accident Date"]=Date
118
- df2["Time"]=Time
119
- df2["Killed"]=Killed
120
- df2["Injured"]=Injured
121
- df2["Location"]=Location
122
- df2["Road_Characteristic"]=Road_Characteristic
123
- df2["Pedestrian_Involved"]=Pedestrian_Involved
124
- df2["Vehicles_involved"]=vehicles
125
- df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
126
- return df3
127
-
128
-
129
-
 
 
 
1
+ def create_data(description):
2
+ print("Running LLM Automation Groq")
3
+ from langchain_core.prompts import ChatPromptTemplate ### To create a chatbot, chatprompttemplate used
4
+
5
+ from langchain_core.output_parsers import StrOutputParser ### Default output parser. Custom parser can also be created
6
+ from langchain_groq import ChatGroq
7
+
8
+
9
+ import os
10
+ from dotenv import load_dotenv
11
+ import pandas as pd
12
+
13
+ load_dotenv()
14
+
15
+ ### Set all api keys:
16
+
17
+ #os.environ["LANGCHAIN_TRACING_V2"]="true" ### Will automatically trace our codes using Langsmith
18
+ os.environ["GROQ_API_KEY"]="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" #### Will be used for monitoring the calls to and from llm (both free and paid)
19
+
20
+ ### Create Prompt Template:
21
+ prompt=ChatPromptTemplate.from_messages(
22
+ {
23
+ ("system", "You are a helpful assistant, please respond to the queries"), ### We need both system and users in prompt
24
+ ("user","question: {question}")
25
+ }
26
+ )
27
+
28
+ #### Create LLama3 70B llm:
29
+ llm = ChatGroq(
30
+ model="llama3-70b-8192"
31
+ ) # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3 `
32
+
33
+
34
+ ### Create an output parser:
35
+ output_parser=StrOutputParser()
36
+
37
+ #### Creating chain: The concept is- output of action before | symbol will be passed as input in action after the symbol.
38
+ #### Here we have created three actions: The prompt, llm and output parser:
39
+ chain=prompt|llm|output_parser
40
+
41
+ df = description
42
+ df = df.fillna(0)
43
+ dj=[]
44
+ for i in range(len(df)):
45
+ dj.append(chain.invoke({"question" : df['Description'][i]+" Is the news about road accident? If no, then reply 'General'. Else if the news is about road accident then check if the news is referring to a specific accident incident or accident in general? Answer only in a word: Either specific or general."}))
46
+
47
+ df2=df.copy()
48
+ df2['Report Type']=dj
49
+ def drp(p):
50
+ df2.drop([p],inplace=True)
51
+
52
+ ### Removing the general accident types:
53
+ for p in range(len(df)):
54
+ if "General" in df2['Report Type'][p]:
55
+ drp(p)
56
+
57
+ ### Reseting index of df3:
58
+ df2.reset_index(drop=True,inplace=True)
59
+
60
+ ### Now finding column values using llm:
61
+ ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
62
+ def res(i):
63
+ response=chain.invoke({"question" : f"""I will give you two strings. 1st string will contain a publish date of a news and the 2nd string will contain the accident news itself.
64
+ If the 2nd string contains more than one accident incidents, only consider the 1st incident. Based on these two strings, you have to answer the following questions. Remember your answer must contain ONLY THE ANSWERS WITHOUT ANY EXTRA WORDS OR SENTENCES:
65
+ what is the date (Day-Month-Year numerical format) of accident occurrence? ;
66
+ Time of Accident occured; How many people were killed in the accident?;
67
+ How many people were injured in the accident?;
68
+ Location of the accident;
69
+ Type of road where accident occured;
70
+ Was there any pedestrian involved?;
71
+ Do not include any extra words or sentences except the answers seperated by semicolons only. Your reply cannot contain sentences such as - 'Here are the answers to the questions'
72
+ string 1 = {df2['Publish Date'][i]}
73
+ string 2 = {df2['Description'][i]}""" })
74
+ return response
75
+ #### dj2 list contains all column values seperated by comma:
76
+ dj2=[]
77
+
78
+ for i in range(len(df2)):
79
+ dj2.append(res(i))
80
+
81
+ ### A function to invoke the llm. For some reason phi3 doesn't give accurate result sometimes if used directly in dj.append()
82
+ def res2(i):
83
+ response=chain.invoke({"question" : df2['Date + Desc'][i]+" Only name the type of vehicles involved in the accident. If multiple vehicles are involved, seperate them by hyphens(-). Example answers: Bus, Truck-Bus etc. If no vehicles are mentioned, your answer will be: Not Available. Your answer should only contain the vehicle name, do not include any extra sentences"})
84
+ return response
85
+ #### dj2 list contains all column values seperated by comma:
86
+ vehicles=[]
87
+
88
+ for i in range(len(df2)):
89
+ vehicles.append(res2(i))
90
+
91
+
92
+ ### Splitting dj2 string based on comma position:
93
+ Date=[]
94
+ Time=[]
95
+ Killed=[]
96
+ Injured=[]
97
+ Location=[]
98
+ Road_Characteristic=[]
99
+ Pedestrian_Involved=[]
100
+ #Vehicles_involved=[]
101
+
102
+ for i in range(len(dj2)):
103
+ words = dj2[i].split(";") # Splitting at the semicolon delimiter
104
+ #print(f"Date: {words[0]}")
105
+ Date.append(words[0])
106
+
107
+ #print(f"Time: {words[1]}")
108
+ Time.append(words[1])
109
+
110
+ #print(f"Casualities: {words[2]}")
111
+ Killed.append(words[2])
112
+ Injured.append(words[3])
113
+ Location.append(words[4])
114
+ Road_Characteristic.append(words[5])
115
+ Pedestrian_Involved.append(words[6])
116
+ #Vehicles_involved.append(words[7])
117
+
118
+ #### Probable type of final dataframe:
119
+ df2["Accident Date"]=Date
120
+ df2["Time"]=Time
121
+ df2["Killed"]=Killed
122
+ df2["Injured"]=Injured
123
+ df2["Location"]=Location
124
+ df2["Road_Characteristic"]=Road_Characteristic
125
+ df2["Pedestrian_Involved"]=Pedestrian_Involved
126
+ df2["Vehicles_involved"]=vehicles
127
+ df3=df2.drop(columns=['Description','Date + Desc','Report Type'])
128
+ return df3
129
+
130
+
131
+