ashischakraborty commited on
Commit
657d3ba
·
verified ·
1 Parent(s): 722578b

Update azure_openai.py

Browse files
Files changed (1) hide show
  1. azure_openai.py +348 -348
azure_openai.py CHANGED
@@ -1,349 +1,349 @@
1
- import streamlit as st
2
- import os
3
- import pandas as pd
4
- # from langchain.chat_models import AzureChatOpenAI
5
- from langchain_openai import AzureChatOpenAI
6
- from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
7
- from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
8
- from pydantic import BaseModel, Field, validator
9
- from langchain.output_parsers.enum import EnumOutputParser
10
- from langchain_core.prompts import PromptTemplate
11
- from enum import Enum
12
-
13
-
14
- os.environ["LANGCHAIN_TRACING_V2"]="true"
15
- os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
16
- LANGCHAIN_API_KEY = st.secrets['LANGCHAIN_API_KEY']
17
- os.environ["LANGCHAIN_PROJECT"]="UC2e2e"
18
-
19
- # LLM Langchain Definition
20
- OPENAI_API_KEY = st.secrets['OPENAI_API_KEY']
21
- OPENAI_API_TYPE = "azure"
22
- OPENAI_API_BASE = "https://davidfearn-gpt4.openai.azure.com"
23
- OPENAI_API_VERSION = "2024-08-01-preview"
24
- OPENAI_MODEL = "gpt-4o-mini"
25
-
26
-
27
- # Function to read file contents
28
- def read_file(file):
29
- """
30
- Reads the content of a text file and returns it as a string.
31
- :param file: The file name to read from the 'assets' directory.
32
- :return: The content of the file as a string or None if an error occurs.
33
- """
34
- fp = f"assets/{file}.md"
35
- try:
36
- with open(fp, 'r', encoding='utf-8') as file:
37
- content = file.read()
38
- return content
39
- except FileNotFoundError:
40
- print(f"The file at {fp} was not found.")
41
- except IOError:
42
- print(f"An error occurred while reading the file at {fp}.")
43
- return None
44
-
45
- # Function to generate structured insights
46
- def process_insight(chunk, topic,source):
47
-
48
- GSKGlossary = read_file("GSKGlossary")
49
- if source== "intl":
50
- SystemMessage = read_file("intl_insight_system_message")
51
- UserMessage = read_file("intl_insight_user_message")
52
- else:
53
- SystemMessage = read_file("ext_insight_system_message")
54
- UserMessage = read_file("ext_insight_user_message")
55
-
56
-
57
- class Insights(BaseModel):
58
- completed: bool = Field(description="This field is used to indicate that you think the number of insights has been completed")
59
- insight: str = Field(description="This field is used to return the MECE insight in string format")
60
-
61
-
62
- llm = AzureChatOpenAI(
63
- openai_api_version=OPENAI_API_VERSION,
64
- openai_api_key=OPENAI_API_KEY,
65
- azure_endpoint=OPENAI_API_BASE,
66
- openai_api_type=OPENAI_API_TYPE,
67
- deployment_name=OPENAI_MODEL,
68
- temperature=0,
69
- )
70
-
71
- system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
72
- structured_llm = llm.with_structured_output(Insights)
73
- prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
74
-
75
- chain = prompt | structured_llm
76
-
77
- new_insights = []
78
- insights_data = []
79
-
80
- while True:
81
- # Invoke the LLM with the current chunk and existing insights
82
- counter = 5 - len(new_insights)
83
- new_insight_response = chain.invoke({"chunk": chunk, "existing_insights": new_insights, "counter": counter, "GSKGlossary": GSKGlossary, "topic":topic})
84
- classification = selectClass(new_insight_response.insight)
85
- # Append the new insight to the list
86
- new_insights.append(new_insight_response.insight)
87
- insights_data.append({
88
-
89
- # "completed": new_insight_response.completed,
90
- "classification": classification,
91
- "insight": new_insight_response.insight,
92
- "chunk": chunk
93
- })
94
-
95
-
96
- # Check if "completed" is True or the list of "new_insights" is >= 3
97
- if new_insight_response.completed and len(new_insights) >= 3:
98
- return pd.DataFrame(insights_data)
99
-
100
- # If the list of "new_insights" reaches 5, return the list
101
- if len(new_insights) == 5:
102
- return pd.DataFrame(insights_data)
103
-
104
- def selectClass(insight):
105
-
106
- classification_system_message = read_file("classification_system_message")
107
- classification_user_message = read_file("classification_user_message")
108
-
109
- class InsightClassification(Enum):
110
- IMPACT = "impact"
111
- CONSULTATION = "consultation"
112
- AWARENESS = "awareness"
113
-
114
- llm = AzureChatOpenAI(
115
- openai_api_version=OPENAI_API_VERSION,
116
- openai_api_key=OPENAI_API_KEY,
117
- azure_endpoint=OPENAI_API_BASE,
118
- openai_api_type=OPENAI_API_TYPE,
119
- deployment_name=OPENAI_MODEL,
120
- temperature=0,
121
- )
122
- parser = EnumOutputParser(enum=InsightClassification)
123
- system_message_template = SystemMessagePromptTemplate.from_template(classification_system_message)
124
-
125
- # structured_llm = llm.with_structured_output(Insights)
126
- prompt = ChatPromptTemplate.from_messages([system_message_template, classification_user_message]).partial(options=parser.get_format_instructions())
127
-
128
- chain = prompt | llm | parser
129
-
130
- result = chain.invoke({"insight": insight})
131
- return result.value
132
-
133
- def process_chunks(chunk, topic,source):
134
- """
135
- Processes chunks from a specific dataframe column, invokes the get_structured function for each chunk,
136
- and combines the resulting dataframes into one dataframe.
137
- :param df: The dataframe containing chunks.
138
- :param temp: Temperature parameter for the LLM.
139
- :param SystemMessage: System message template.
140
- :param UserMessage: User message template.
141
- :param completedMessage: Completion message description.
142
- :param insightMessage: Insight message description.
143
- :param chunk_column: The name of the column containing text chunks to process.
144
- :return: A combined dataframe of insights from all chunks.
145
- """
146
- all_insights = []
147
-
148
- for chunk in chunk["ChunkText"]:
149
- insights_df = process_insight(chunk, topic,source)
150
- all_insights.append(insights_df)
151
-
152
- return pd.concat(all_insights, ignore_index=True)
153
-
154
-
155
- def evaluation_llm(chunk, topic , source):
156
-
157
- GSKGlossary = read_file("GSKGlossary")
158
- if source == "intl":
159
- SystemMessage = read_file("intl_eval_system_message")
160
- UserMessage = read_file("intl_eval_user_message")
161
- else:
162
- SystemMessage = read_file("ext_eval_system_message")
163
- UserMessage = read_file("ext_eval_user_message")
164
-
165
- class Evaluate(BaseModel):
166
- decision: bool = Field(description="True: The content of the document relates to the topic.False: The content of the document does not relate to the topic.")
167
- justification: str = Field(description="Please justify your decision in a logical and structured way.")
168
-
169
- llm = AzureChatOpenAI(
170
- openai_api_version=OPENAI_API_VERSION,
171
- openai_api_key=OPENAI_API_KEY,
172
- azure_endpoint=OPENAI_API_BASE,
173
- openai_api_type=OPENAI_API_TYPE,
174
- deployment_name=OPENAI_MODEL,
175
- temperature=0,
176
- )
177
-
178
-
179
- system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
180
- structured_llm = llm.with_structured_output(Evaluate)
181
-
182
- # Create a chat prompt template combining system and human messages
183
- prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
184
-
185
- chain = prompt | structured_llm
186
-
187
- return chain.invoke({
188
- "chunk": chunk,
189
- "topic": topic,
190
- "GSKGlossary": GSKGlossary
191
- })
192
-
193
- def evaluation_process(df_chunks, topic,source):
194
- """
195
- Iterates over chunks in the DataFrame and processes them using `get_structured`.
196
-
197
- :param df_chunks: DataFrame containing chunks.
198
- :param systemMessage: System message for evaluation.
199
- :param userMessage: User message template for evaluation.
200
- :param temp: Temperature setting for the model.
201
- :param decisionMessage: Description for decision field.
202
- :param justificationMessage: Description for justification field.
203
- :return: Updated DataFrame with decision and justification columns and consensus value.
204
- """
205
- decisions = []
206
- justifications = []
207
-
208
- # Avoid re-inserting columns if they already exist
209
- if "Decision" in df_chunks.columns:
210
- df_chunks = df_chunks.drop(columns=["Decision", "Justification"])
211
-
212
- for _, chunk in df_chunks.iterrows():
213
- result = evaluation_llm(chunk['ChunkText'], topic,source)
214
- decisions.append("True" if result.decision else "False") # Convert bool to string
215
- justifications.append(result.justification)
216
-
217
- # Add new columns to the DataFrame
218
- df_chunks.insert(0, "Decision", decisions)
219
- df_chunks.insert(1, "Justification", justifications)
220
-
221
- # Count all True/False values for consensus and get most frequent value
222
- consensus_count = df_chunks["Decision"].value_counts()
223
- consensus_value = consensus_count.idxmax() # Most frequently occurring value
224
-
225
- return df_chunks, consensus_value, consensus_count
226
-
227
-
228
- def process_compare(insight_df, sopChunk_df, topic):
229
-
230
- GSKGlossary = read_file("GSKGlossary")
231
-
232
- SystemMessage = read_file("compare_system_message")
233
- UserMessage = read_file("compare_user_message")
234
-
235
- # Define the structured output model
236
- class Compare(BaseModel):
237
- review: bool = Field(description="This field is used to indicate whether a review is needed")
238
- justification: str = Field(description="This field is used to justify why a review is needed")
239
-
240
- # Initialize the LLM
241
- llm = AzureChatOpenAI(
242
- openai_api_version=OPENAI_API_VERSION,
243
- openai_api_key=OPENAI_API_KEY,
244
- azure_endpoint=OPENAI_API_BASE,
245
- openai_api_type=OPENAI_API_TYPE,
246
- deployment_name=OPENAI_MODEL,
247
- temperature=0,
248
- )
249
-
250
- # Create the structured output and prompt chain
251
- system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
252
- structured_llm = llm.with_structured_output(Compare)
253
- prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
254
- chain = prompt | structured_llm
255
-
256
- compare_data = []
257
-
258
- # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
259
- for sopChunk_index, sopChunk_row in sopChunk_df.iterrows():
260
- sop_chunk_text = sopChunk_row["ChunkText"] # Extract the ChunkText column
261
- for insight_index, insight_row in insight_df.iterrows():
262
- insight_text = insight_row["insight"] # Extract the insight column
263
-
264
- # Invoke the LLM with the extracted data
265
- compare_response = chain.invoke({
266
- "sopChunk": sop_chunk_text,
267
- "insight": insight_text,
268
- "topic": topic,
269
- "GSKGlossary": GSKGlossary
270
- })
271
-
272
- # Append the response to insights_data
273
- compare_data.append({
274
- "ReviewNeeded": compare_response.review,
275
- "Justification": compare_response.justification,
276
- "SOP": sop_chunk_text,
277
- "Insight": insight_text
278
- })
279
-
280
- # Return the insights as a single DataFrame
281
- print(compare_data)
282
- return pd.DataFrame(compare_data)
283
-
284
- def risk_score_process(compare_df, topic):
285
-
286
- GSKGlossary = read_file("GSKGlossary")
287
- SystemMessage = read_file("risk_scoring_system_message")
288
- UserMessage = read_file("risk_scoring_user_message")
289
-
290
- # Define the Enum for predefined options
291
- class RiskClassification(str, Enum):
292
- HIGH = "high"
293
- MEDIUM = "medium"
294
- LOW = "low"
295
-
296
- # Define the Pydantic model for the structured output
297
- class Risk(BaseModel):
298
- risk_level: RiskClassification = Field(
299
- description="The selected classification option."
300
- )
301
- justification: str = Field(
302
- description="Justify the reason for choosing this risk classification."
303
- )
304
- advice: str = Field(
305
- description="Suggestions for changes that could be made to the standard operating procedure to mitigat the risk."
306
- )
307
-
308
-
309
- llm = AzureChatOpenAI(
310
- openai_api_version=OPENAI_API_VERSION,
311
- openai_api_key=OPENAI_API_KEY,
312
- azure_endpoint=OPENAI_API_BASE,
313
- openai_api_type=OPENAI_API_TYPE,
314
- deployment_name=OPENAI_MODEL,
315
- temperature=0,
316
- )
317
-
318
- system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
319
- structured_llm = llm.with_structured_output(Risk)
320
- prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
321
-
322
- chain = prompt | structured_llm
323
-
324
- risk_data = []
325
-
326
-
327
- # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
328
- for index, row in compare_df.iterrows():
329
-
330
- # Invoke the LLM with the extracted data
331
- risk_response = chain.invoke({
332
- "comparison": row['Justification'],
333
- "insight": row['Insight'],
334
- "SOPchunk":row['SOP'],
335
- "topic": topic
336
- })
337
-
338
- # Append the response to insights_data
339
- risk_data.append({
340
- "RiskLevel": risk_response.risk_level,
341
- "Justification": risk_response.justification,
342
- "advice": risk_response.advice,
343
- "comparison": row['Justification'],
344
- "insight": row['Insight'],
345
- "SOPchunk":row['SOP']
346
- })
347
-
348
- # Return the insights as a single DataFrame
349
  return pd.DataFrame(risk_data)
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ # from langchain.chat_models import AzureChatOpenAI
5
+ from langchain_openai import AzureChatOpenAI
6
+ from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
7
+ from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
8
+ from pydantic import BaseModel, Field, validator
9
+ from langchain.output_parsers.enum import EnumOutputParser
10
+ from langchain_core.prompts import PromptTemplate
11
+ from enum import Enum
12
+
13
+
14
+ #os.environ["LANGCHAIN_TRACING_V2"]="true"
15
+ #os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
16
+ #LANGCHAIN_API_KEY = st.secrets['LANGCHAIN_API_KEY']
17
+ #os.environ["LANGCHAIN_PROJECT"]="UC2e2e"
18
+
19
+ # LLM Langchain Definition
20
+ OPENAI_API_KEY = st.secrets['OPENAI_API_KEY']
21
+ OPENAI_API_TYPE = "azure"
22
+ OPENAI_API_BASE = "https://davidfearn-gpt4.openai.azure.com"
23
+ OPENAI_API_VERSION = "2024-08-01-preview"
24
+ OPENAI_MODEL = "gpt-4o-mini"
25
+
26
+
27
+ # Function to read file contents
28
+ def read_file(file):
29
+ """
30
+ Reads the content of a text file and returns it as a string.
31
+ :param file: The file name to read from the 'assets' directory.
32
+ :return: The content of the file as a string or None if an error occurs.
33
+ """
34
+ fp = f"assets/{file}.md"
35
+ try:
36
+ with open(fp, 'r', encoding='utf-8') as file:
37
+ content = file.read()
38
+ return content
39
+ except FileNotFoundError:
40
+ print(f"The file at {fp} was not found.")
41
+ except IOError:
42
+ print(f"An error occurred while reading the file at {fp}.")
43
+ return None
44
+
45
+ # Function to generate structured insights
46
+ def process_insight(chunk, topic,source):
47
+
48
+ GSKGlossary = read_file("GSKGlossary")
49
+ if source== "intl":
50
+ SystemMessage = read_file("intl_insight_system_message")
51
+ UserMessage = read_file("intl_insight_user_message")
52
+ else:
53
+ SystemMessage = read_file("ext_insight_system_message")
54
+ UserMessage = read_file("ext_insight_user_message")
55
+
56
+
57
+ class Insights(BaseModel):
58
+ completed: bool = Field(description="This field is used to indicate that you think the number of insights has been completed")
59
+ insight: str = Field(description="This field is used to return the MECE insight in string format")
60
+
61
+
62
+ llm = AzureChatOpenAI(
63
+ openai_api_version=OPENAI_API_VERSION,
64
+ openai_api_key=OPENAI_API_KEY,
65
+ azure_endpoint=OPENAI_API_BASE,
66
+ openai_api_type=OPENAI_API_TYPE,
67
+ deployment_name=OPENAI_MODEL,
68
+ temperature=0,
69
+ )
70
+
71
+ system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
72
+ structured_llm = llm.with_structured_output(Insights)
73
+ prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
74
+
75
+ chain = prompt | structured_llm
76
+
77
+ new_insights = []
78
+ insights_data = []
79
+
80
+ while True:
81
+ # Invoke the LLM with the current chunk and existing insights
82
+ counter = 5 - len(new_insights)
83
+ new_insight_response = chain.invoke({"chunk": chunk, "existing_insights": new_insights, "counter": counter, "GSKGlossary": GSKGlossary, "topic":topic})
84
+ classification = selectClass(new_insight_response.insight)
85
+ # Append the new insight to the list
86
+ new_insights.append(new_insight_response.insight)
87
+ insights_data.append({
88
+
89
+ # "completed": new_insight_response.completed,
90
+ "classification": classification,
91
+ "insight": new_insight_response.insight,
92
+ "chunk": chunk
93
+ })
94
+
95
+
96
+ # Check if "completed" is True or the list of "new_insights" is >= 3
97
+ if new_insight_response.completed and len(new_insights) >= 3:
98
+ return pd.DataFrame(insights_data)
99
+
100
+ # If the list of "new_insights" reaches 5, return the list
101
+ if len(new_insights) == 5:
102
+ return pd.DataFrame(insights_data)
103
+
104
+ def selectClass(insight):
105
+
106
+ classification_system_message = read_file("classification_system_message")
107
+ classification_user_message = read_file("classification_user_message")
108
+
109
+ class InsightClassification(Enum):
110
+ IMPACT = "impact"
111
+ CONSULTATION = "consultation"
112
+ AWARENESS = "awareness"
113
+
114
+ llm = AzureChatOpenAI(
115
+ openai_api_version=OPENAI_API_VERSION,
116
+ openai_api_key=OPENAI_API_KEY,
117
+ azure_endpoint=OPENAI_API_BASE,
118
+ openai_api_type=OPENAI_API_TYPE,
119
+ deployment_name=OPENAI_MODEL,
120
+ temperature=0,
121
+ )
122
+ parser = EnumOutputParser(enum=InsightClassification)
123
+ system_message_template = SystemMessagePromptTemplate.from_template(classification_system_message)
124
+
125
+ # structured_llm = llm.with_structured_output(Insights)
126
+ prompt = ChatPromptTemplate.from_messages([system_message_template, classification_user_message]).partial(options=parser.get_format_instructions())
127
+
128
+ chain = prompt | llm | parser
129
+
130
+ result = chain.invoke({"insight": insight})
131
+ return result.value
132
+
133
+ def process_chunks(chunk, topic,source):
134
+ """
135
+ Processes chunks from a specific dataframe column, invokes the get_structured function for each chunk,
136
+ and combines the resulting dataframes into one dataframe.
137
+ :param df: The dataframe containing chunks.
138
+ :param temp: Temperature parameter for the LLM.
139
+ :param SystemMessage: System message template.
140
+ :param UserMessage: User message template.
141
+ :param completedMessage: Completion message description.
142
+ :param insightMessage: Insight message description.
143
+ :param chunk_column: The name of the column containing text chunks to process.
144
+ :return: A combined dataframe of insights from all chunks.
145
+ """
146
+ all_insights = []
147
+
148
+ for chunk in chunk["ChunkText"]:
149
+ insights_df = process_insight(chunk, topic,source)
150
+ all_insights.append(insights_df)
151
+
152
+ return pd.concat(all_insights, ignore_index=True)
153
+
154
+
155
+ def evaluation_llm(chunk, topic , source):
156
+
157
+ GSKGlossary = read_file("GSKGlossary")
158
+ if source == "intl":
159
+ SystemMessage = read_file("intl_eval_system_message")
160
+ UserMessage = read_file("intl_eval_user_message")
161
+ else:
162
+ SystemMessage = read_file("ext_eval_system_message")
163
+ UserMessage = read_file("ext_eval_user_message")
164
+
165
+ class Evaluate(BaseModel):
166
+ decision: bool = Field(description="True: The content of the document relates to the topic.False: The content of the document does not relate to the topic.")
167
+ justification: str = Field(description="Please justify your decision in a logical and structured way.")
168
+
169
+ llm = AzureChatOpenAI(
170
+ openai_api_version=OPENAI_API_VERSION,
171
+ openai_api_key=OPENAI_API_KEY,
172
+ azure_endpoint=OPENAI_API_BASE,
173
+ openai_api_type=OPENAI_API_TYPE,
174
+ deployment_name=OPENAI_MODEL,
175
+ temperature=0,
176
+ )
177
+
178
+
179
+ system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
180
+ structured_llm = llm.with_structured_output(Evaluate)
181
+
182
+ # Create a chat prompt template combining system and human messages
183
+ prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
184
+
185
+ chain = prompt | structured_llm
186
+
187
+ return chain.invoke({
188
+ "chunk": chunk,
189
+ "topic": topic,
190
+ "GSKGlossary": GSKGlossary
191
+ })
192
+
193
+ def evaluation_process(df_chunks, topic,source):
194
+ """
195
+ Iterates over chunks in the DataFrame and processes them using `get_structured`.
196
+
197
+ :param df_chunks: DataFrame containing chunks.
198
+ :param systemMessage: System message for evaluation.
199
+ :param userMessage: User message template for evaluation.
200
+ :param temp: Temperature setting for the model.
201
+ :param decisionMessage: Description for decision field.
202
+ :param justificationMessage: Description for justification field.
203
+ :return: Updated DataFrame with decision and justification columns and consensus value.
204
+ """
205
+ decisions = []
206
+ justifications = []
207
+
208
+ # Avoid re-inserting columns if they already exist
209
+ if "Decision" in df_chunks.columns:
210
+ df_chunks = df_chunks.drop(columns=["Decision", "Justification"])
211
+
212
+ for _, chunk in df_chunks.iterrows():
213
+ result = evaluation_llm(chunk['ChunkText'], topic,source)
214
+ decisions.append("True" if result.decision else "False") # Convert bool to string
215
+ justifications.append(result.justification)
216
+
217
+ # Add new columns to the DataFrame
218
+ df_chunks.insert(0, "Decision", decisions)
219
+ df_chunks.insert(1, "Justification", justifications)
220
+
221
+ # Count all True/False values for consensus and get most frequent value
222
+ consensus_count = df_chunks["Decision"].value_counts()
223
+ consensus_value = consensus_count.idxmax() # Most frequently occurring value
224
+
225
+ return df_chunks, consensus_value, consensus_count
226
+
227
+
228
+ def process_compare(insight_df, sopChunk_df, topic):
229
+
230
+ GSKGlossary = read_file("GSKGlossary")
231
+
232
+ SystemMessage = read_file("compare_system_message")
233
+ UserMessage = read_file("compare_user_message")
234
+
235
+ # Define the structured output model
236
+ class Compare(BaseModel):
237
+ review: bool = Field(description="This field is used to indicate whether a review is needed")
238
+ justification: str = Field(description="This field is used to justify why a review is needed")
239
+
240
+ # Initialize the LLM
241
+ llm = AzureChatOpenAI(
242
+ openai_api_version=OPENAI_API_VERSION,
243
+ openai_api_key=OPENAI_API_KEY,
244
+ azure_endpoint=OPENAI_API_BASE,
245
+ openai_api_type=OPENAI_API_TYPE,
246
+ deployment_name=OPENAI_MODEL,
247
+ temperature=0,
248
+ )
249
+
250
+ # Create the structured output and prompt chain
251
+ system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
252
+ structured_llm = llm.with_structured_output(Compare)
253
+ prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
254
+ chain = prompt | structured_llm
255
+
256
+ compare_data = []
257
+
258
+ # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
259
+ for sopChunk_index, sopChunk_row in sopChunk_df.iterrows():
260
+ sop_chunk_text = sopChunk_row["ChunkText"] # Extract the ChunkText column
261
+ for insight_index, insight_row in insight_df.iterrows():
262
+ insight_text = insight_row["insight"] # Extract the insight column
263
+
264
+ # Invoke the LLM with the extracted data
265
+ compare_response = chain.invoke({
266
+ "sopChunk": sop_chunk_text,
267
+ "insight": insight_text,
268
+ "topic": topic,
269
+ "GSKGlossary": GSKGlossary
270
+ })
271
+
272
+ # Append the response to insights_data
273
+ compare_data.append({
274
+ "ReviewNeeded": compare_response.review,
275
+ "Justification": compare_response.justification,
276
+ "SOP": sop_chunk_text,
277
+ "Insight": insight_text
278
+ })
279
+
280
+ # Return the insights as a single DataFrame
281
+ print(compare_data)
282
+ return pd.DataFrame(compare_data)
283
+
284
+ def risk_score_process(compare_df, topic):
285
+
286
+ GSKGlossary = read_file("GSKGlossary")
287
+ SystemMessage = read_file("risk_scoring_system_message")
288
+ UserMessage = read_file("risk_scoring_user_message")
289
+
290
+ # Define the Enum for predefined options
291
+ class RiskClassification(str, Enum):
292
+ HIGH = "high"
293
+ MEDIUM = "medium"
294
+ LOW = "low"
295
+
296
+ # Define the Pydantic model for the structured output
297
+ class Risk(BaseModel):
298
+ risk_level: RiskClassification = Field(
299
+ description="The selected classification option."
300
+ )
301
+ justification: str = Field(
302
+ description="Justify the reason for choosing this risk classification."
303
+ )
304
+ advice: str = Field(
305
+ description="Suggestions for changes that could be made to the standard operating procedure to mitigat the risk."
306
+ )
307
+
308
+
309
+ llm = AzureChatOpenAI(
310
+ openai_api_version=OPENAI_API_VERSION,
311
+ openai_api_key=OPENAI_API_KEY,
312
+ azure_endpoint=OPENAI_API_BASE,
313
+ openai_api_type=OPENAI_API_TYPE,
314
+ deployment_name=OPENAI_MODEL,
315
+ temperature=0,
316
+ )
317
+
318
+ system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
319
+ structured_llm = llm.with_structured_output(Risk)
320
+ prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
321
+
322
+ chain = prompt | structured_llm
323
+
324
+ risk_data = []
325
+
326
+
327
+ # Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
328
+ for index, row in compare_df.iterrows():
329
+
330
+ # Invoke the LLM with the extracted data
331
+ risk_response = chain.invoke({
332
+ "comparison": row['Justification'],
333
+ "insight": row['Insight'],
334
+ "SOPchunk":row['SOP'],
335
+ "topic": topic
336
+ })
337
+
338
+ # Append the response to insights_data
339
+ risk_data.append({
340
+ "RiskLevel": risk_response.risk_level,
341
+ "Justification": risk_response.justification,
342
+ "advice": risk_response.advice,
343
+ "comparison": row['Justification'],
344
+ "insight": row['Insight'],
345
+ "SOPchunk":row['SOP']
346
+ })
347
+
348
+ # Return the insights as a single DataFrame
349
  return pd.DataFrame(risk_data)