Spaces:
Sleeping
Sleeping
File size: 13,241 Bytes
657d3ba 3daab2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 |
import streamlit as st
import os
import pandas as pd
# from langchain.chat_models import AzureChatOpenAI
from langchain_openai import AzureChatOpenAI
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from pydantic import BaseModel, Field, validator
from langchain.output_parsers.enum import EnumOutputParser
from langchain_core.prompts import PromptTemplate
from enum import Enum
#os.environ["LANGCHAIN_TRACING_V2"]="true"
#os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
#LANGCHAIN_API_KEY = st.secrets['LANGCHAIN_API_KEY']
#os.environ["LANGCHAIN_PROJECT"]="UC2e2e"
# LLM Langchain Definition
OPENAI_API_KEY = st.secrets['OPENAI_API_KEY']
OPENAI_API_TYPE = "azure"
OPENAI_API_BASE = "https://davidfearn-gpt4.openai.azure.com"
OPENAI_API_VERSION = "2024-08-01-preview"
OPENAI_MODEL = "gpt-4o-mini"
# Function to read file contents
def read_file(file):
"""
Reads the content of a text file and returns it as a string.
:param file: The file name to read from the 'assets' directory.
:return: The content of the file as a string or None if an error occurs.
"""
fp = f"assets/{file}.md"
try:
with open(fp, 'r', encoding='utf-8') as file:
content = file.read()
return content
except FileNotFoundError:
print(f"The file at {fp} was not found.")
except IOError:
print(f"An error occurred while reading the file at {fp}.")
return None
# Function to generate structured insights
def process_insight(chunk, topic,source):
GSKGlossary = read_file("GSKGlossary")
if source== "intl":
SystemMessage = read_file("intl_insight_system_message")
UserMessage = read_file("intl_insight_user_message")
else:
SystemMessage = read_file("ext_insight_system_message")
UserMessage = read_file("ext_insight_user_message")
class Insights(BaseModel):
completed: bool = Field(description="This field is used to indicate that you think the number of insights has been completed")
insight: str = Field(description="This field is used to return the MECE insight in string format")
llm = AzureChatOpenAI(
openai_api_version=OPENAI_API_VERSION,
openai_api_key=OPENAI_API_KEY,
azure_endpoint=OPENAI_API_BASE,
openai_api_type=OPENAI_API_TYPE,
deployment_name=OPENAI_MODEL,
temperature=0,
)
system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
structured_llm = llm.with_structured_output(Insights)
prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
chain = prompt | structured_llm
new_insights = []
insights_data = []
while True:
# Invoke the LLM with the current chunk and existing insights
counter = 5 - len(new_insights)
new_insight_response = chain.invoke({"chunk": chunk, "existing_insights": new_insights, "counter": counter, "GSKGlossary": GSKGlossary, "topic":topic})
classification = selectClass(new_insight_response.insight)
# Append the new insight to the list
new_insights.append(new_insight_response.insight)
insights_data.append({
# "completed": new_insight_response.completed,
"classification": classification,
"insight": new_insight_response.insight,
"chunk": chunk
})
# Check if "completed" is True or the list of "new_insights" is >= 3
if new_insight_response.completed and len(new_insights) >= 3:
return pd.DataFrame(insights_data)
# If the list of "new_insights" reaches 5, return the list
if len(new_insights) == 5:
return pd.DataFrame(insights_data)
def selectClass(insight):
classification_system_message = read_file("classification_system_message")
classification_user_message = read_file("classification_user_message")
class InsightClassification(Enum):
IMPACT = "impact"
CONSULTATION = "consultation"
AWARENESS = "awareness"
llm = AzureChatOpenAI(
openai_api_version=OPENAI_API_VERSION,
openai_api_key=OPENAI_API_KEY,
azure_endpoint=OPENAI_API_BASE,
openai_api_type=OPENAI_API_TYPE,
deployment_name=OPENAI_MODEL,
temperature=0,
)
parser = EnumOutputParser(enum=InsightClassification)
system_message_template = SystemMessagePromptTemplate.from_template(classification_system_message)
# structured_llm = llm.with_structured_output(Insights)
prompt = ChatPromptTemplate.from_messages([system_message_template, classification_user_message]).partial(options=parser.get_format_instructions())
chain = prompt | llm | parser
result = chain.invoke({"insight": insight})
return result.value
def process_chunks(chunk, topic,source):
"""
Processes chunks from a specific dataframe column, invokes the get_structured function for each chunk,
and combines the resulting dataframes into one dataframe.
:param df: The dataframe containing chunks.
:param temp: Temperature parameter for the LLM.
:param SystemMessage: System message template.
:param UserMessage: User message template.
:param completedMessage: Completion message description.
:param insightMessage: Insight message description.
:param chunk_column: The name of the column containing text chunks to process.
:return: A combined dataframe of insights from all chunks.
"""
all_insights = []
for chunk in chunk["ChunkText"]:
insights_df = process_insight(chunk, topic,source)
all_insights.append(insights_df)
return pd.concat(all_insights, ignore_index=True)
def evaluation_llm(chunk, topic , source):
GSKGlossary = read_file("GSKGlossary")
if source == "intl":
SystemMessage = read_file("intl_eval_system_message")
UserMessage = read_file("intl_eval_user_message")
else:
SystemMessage = read_file("ext_eval_system_message")
UserMessage = read_file("ext_eval_user_message")
class Evaluate(BaseModel):
decision: bool = Field(description="True: The content of the document relates to the topic.False: The content of the document does not relate to the topic.")
justification: str = Field(description="Please justify your decision in a logical and structured way.")
llm = AzureChatOpenAI(
openai_api_version=OPENAI_API_VERSION,
openai_api_key=OPENAI_API_KEY,
azure_endpoint=OPENAI_API_BASE,
openai_api_type=OPENAI_API_TYPE,
deployment_name=OPENAI_MODEL,
temperature=0,
)
system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
structured_llm = llm.with_structured_output(Evaluate)
# Create a chat prompt template combining system and human messages
prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
chain = prompt | structured_llm
return chain.invoke({
"chunk": chunk,
"topic": topic,
"GSKGlossary": GSKGlossary
})
def evaluation_process(df_chunks, topic,source):
"""
Iterates over chunks in the DataFrame and processes them using `get_structured`.
:param df_chunks: DataFrame containing chunks.
:param systemMessage: System message for evaluation.
:param userMessage: User message template for evaluation.
:param temp: Temperature setting for the model.
:param decisionMessage: Description for decision field.
:param justificationMessage: Description for justification field.
:return: Updated DataFrame with decision and justification columns and consensus value.
"""
decisions = []
justifications = []
# Avoid re-inserting columns if they already exist
if "Decision" in df_chunks.columns:
df_chunks = df_chunks.drop(columns=["Decision", "Justification"])
for _, chunk in df_chunks.iterrows():
result = evaluation_llm(chunk['ChunkText'], topic,source)
decisions.append("True" if result.decision else "False") # Convert bool to string
justifications.append(result.justification)
# Add new columns to the DataFrame
df_chunks.insert(0, "Decision", decisions)
df_chunks.insert(1, "Justification", justifications)
# Count all True/False values for consensus and get most frequent value
consensus_count = df_chunks["Decision"].value_counts()
consensus_value = consensus_count.idxmax() # Most frequently occurring value
return df_chunks, consensus_value, consensus_count
def process_compare(insight_df, sopChunk_df, topic):
GSKGlossary = read_file("GSKGlossary")
SystemMessage = read_file("compare_system_message")
UserMessage = read_file("compare_user_message")
# Define the structured output model
class Compare(BaseModel):
review: bool = Field(description="This field is used to indicate whether a review is needed")
justification: str = Field(description="This field is used to justify why a review is needed")
# Initialize the LLM
llm = AzureChatOpenAI(
openai_api_version=OPENAI_API_VERSION,
openai_api_key=OPENAI_API_KEY,
azure_endpoint=OPENAI_API_BASE,
openai_api_type=OPENAI_API_TYPE,
deployment_name=OPENAI_MODEL,
temperature=0,
)
# Create the structured output and prompt chain
system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
structured_llm = llm.with_structured_output(Compare)
prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
chain = prompt | structured_llm
compare_data = []
# Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
for sopChunk_index, sopChunk_row in sopChunk_df.iterrows():
sop_chunk_text = sopChunk_row["ChunkText"] # Extract the ChunkText column
for insight_index, insight_row in insight_df.iterrows():
insight_text = insight_row["insight"] # Extract the insight column
# Invoke the LLM with the extracted data
compare_response = chain.invoke({
"sopChunk": sop_chunk_text,
"insight": insight_text,
"topic": topic,
"GSKGlossary": GSKGlossary
})
# Append the response to insights_data
compare_data.append({
"ReviewNeeded": compare_response.review,
"Justification": compare_response.justification,
"SOP": sop_chunk_text,
"Insight": insight_text
})
# Return the insights as a single DataFrame
print(compare_data)
return pd.DataFrame(compare_data)
def risk_score_process(compare_df, topic):
GSKGlossary = read_file("GSKGlossary")
SystemMessage = read_file("risk_scoring_system_message")
UserMessage = read_file("risk_scoring_user_message")
# Define the Enum for predefined options
class RiskClassification(str, Enum):
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
# Define the Pydantic model for the structured output
class Risk(BaseModel):
risk_level: RiskClassification = Field(
description="The selected classification option."
)
justification: str = Field(
description="Justify the reason for choosing this risk classification."
)
advice: str = Field(
description="Suggestions for changes that could be made to the standard operating procedure to mitigat the risk."
)
llm = AzureChatOpenAI(
openai_api_version=OPENAI_API_VERSION,
openai_api_key=OPENAI_API_KEY,
azure_endpoint=OPENAI_API_BASE,
openai_api_type=OPENAI_API_TYPE,
deployment_name=OPENAI_MODEL,
temperature=0,
)
system_message_template = SystemMessagePromptTemplate.from_template(SystemMessage)
structured_llm = llm.with_structured_output(Risk)
prompt = ChatPromptTemplate.from_messages([system_message_template, UserMessage])
chain = prompt | structured_llm
risk_data = []
# Iterate over sopChunk_df and insight_df to process "ChunkText" and "insight"
for index, row in compare_df.iterrows():
# Invoke the LLM with the extracted data
risk_response = chain.invoke({
"comparison": row['Justification'],
"insight": row['Insight'],
"SOPchunk":row['SOP'],
"topic": topic
})
# Append the response to insights_data
risk_data.append({
"RiskLevel": risk_response.risk_level,
"Justification": risk_response.justification,
"advice": risk_response.advice,
"comparison": row['Justification'],
"insight": row['Insight'],
"SOPchunk":row['SOP']
})
# Return the insights as a single DataFrame
return pd.DataFrame(risk_data) |