Spaces:

seanpedrickcase
/

llm_topic_modelling

Running

App Files Files Community

seanpedrickcase commited on Dec 10, 2024

Commit

b7f4700

1 Parent(s): c978ec5

Added support for using local models (specifically Gemma 2b) for topic extraction and summary. Generally improved output format safeguards.

Browse files

Files changed (9) hide show

Dockerfile +5 -2
README.md +2 -2
app.py +98 -13
requirements.txt +6 -1
requirements_cpu.txt +17 -0
tools/chatfuncs.py +166 -0
tools/helper_functions.py +11 -5
tools/llm_api_call.py +250 -84
tools/prompts.py +45 -24

Dockerfile CHANGED Viewed

@@ -10,9 +10,12 @@ WORKDIR /src
 COPY requirements.txt .
-RUN pip install --no-cache-dir --target=/install -r requirements.txt
-RUN rm requirements.txt
 # Stage 2: Final runtime image
 FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm

 COPY requirements.txt .
+RUN pip uninstall -y typing_extensions \
+&& pip install --no-cache-dir --target=/install typing_extensions==4.12.2 \
+&& pip install torch==2.5.1+cpu --target=/install --index-url https://download.pytorch.org/whl/cpu \
+&& pip install --no-cache-dir --target=/install -r requirements_cpu.txt
+RUN rm requirements_cpu.txt
 # Stage 2: Final runtime image
 FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm

README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 ---
-title: Large language model topic modeller
 emoji: 📝
 colorFrom: purple
 colorTo: yellow
-sdk: gradio
 app_file: app.py
 pinned: true
 license: cc-by-nc-4.0

 ---
+title: Large language model topic modelling
 emoji: 📝
 colorFrom: purple
 colorTo: yellow
+sdk: 5.8.0
 app_file: app.py
 pinned: true
 license: cc-by-nc-4.0

app.py CHANGED Viewed

@@ -1,15 +1,20 @@
 import os
 import socket
-from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, wipe_logs, model_full_names, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise
 from tools.aws_functions import upload_file_to_s3
-from tools.llm_api_call import llm_query, load_in_data_file, load_in_previous_data_files, sample_reference_table_summaries, summarise_output_topics
 from tools.auth import authenticate_user
 from tools.prompts import initial_table_prompt, prompt2, prompt3, system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt
 #from tools.aws_functions import load_data_from_aws
 import gradio as gr
 import pandas as pd
 from datetime import datetime
 today_rev = datetime.now().strftime("%Y%m%d")
 ensure_output_folder_exists()
@@ -20,7 +25,87 @@ access_logs_data_folder = 'logs/' + today_rev + '/' + host_name + '/'
 feedback_data_folder = 'feedback/' + today_rev + '/' + host_name + '/'
 usage_data_folder = 'usage/' + today_rev + '/' + host_name + '/'
-batch_size_default = 20
 # Create the gradio interface
 app = gr.Blocks(theme = gr.themes.Base())
@@ -94,7 +179,7 @@ with app:
         with gr.Accordion("I have my own list of topics (zero shot topic modelling).", open = False):
             candidate_topics = gr.File(label="Input topics from file (csv). File should have a single column with a header, and all topic keywords below.")
-        context_textbox = gr.Textbox(label="Write a short description (one sentence of less) giving context to the large language model about the your consultation and any relevant context")
         extract_topics_btn = gr.Button("Extract topics from open text", variant="primary")
@@ -151,7 +236,7 @@ with app:
         Define settings that affect large language model output.
         """)
         with gr.Accordion("Settings for LLM generation", open = True):
-            temperature_slide = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, label="Choose LLM temperature setting")
             batch_size_number = gr.Number(label = "Number of responses to submit in a single LLM query", value = batch_size_default, precision=0)
             random_seed = gr.Number(value=42, label="Random seed for LLM generation", visible=False)
@@ -198,13 +283,13 @@ with app:
     extract_topics_btn.click(fn=empty_output_vars_extract_topics, inputs=None, outputs=[master_topic_df_state, master_unique_topics_df_state, master_reference_df_state, text_output_file, text_output_file_list_state, latest_batch_completed, log_files_output, log_files_output_list_state, conversation_metadata_textbox, estimated_time_taken_number]).\
     then(load_in_data_file,
         inputs = [in_data_files, in_colnames, batch_size_number], outputs = [file_data_state, data_file_names_textbox, total_number_of_batches], api_name="load_data").then(\
-        fn=llm_query,
-        inputs=[file_data_state, master_topic_df_state, master_reference_df_state, master_unique_topics_df_state, text_output_summary, data_file_names_textbox, total_number_of_batches, in_api_key, temperature_slide, in_colnames, model_choice, candidate_topics, latest_batch_completed, text_output_summary, text_output_file_list_state, log_files_output_list_state, first_loop_state, conversation_metadata_textbox, initial_table_prompt_textbox, prompt_2_textbox, prompt_3_textbox, system_prompt_textbox, add_to_existing_topics_system_prompt_textbox, add_to_existing_topics_prompt_textbox, number_of_prompts, batch_size_number, context_textbox, estimated_time_taken_number],
-        outputs=[text_output_summary, master_topic_df_state, master_unique_topics_df_state, master_reference_df_state, text_output_file, text_output_file_list_state, latest_batch_completed, log_files_output, log_files_output_list_state, conversation_metadata_textbox, estimated_time_taken_number, summarisation_in_previous_data_files], api_name="llm_query")
     # If the output file count text box changes, keep going with redacting each data file until done. Then reveal the feedback buttons.
-    latest_batch_completed.change(fn=llm_query,
-        inputs=[file_data_state, master_topic_df_state, master_reference_df_state, master_unique_topics_df_state, text_output_summary, data_file_names_textbox, total_number_of_batches, in_api_key, temperature_slide, in_colnames, model_choice, candidate_topics, latest_batch_completed, text_output_summary, text_output_file_list_state, log_files_output_list_state, second_loop_state, conversation_metadata_textbox, initial_table_prompt_textbox, prompt_2_textbox, prompt_3_textbox, system_prompt_textbox, add_to_existing_topics_system_prompt_textbox, add_to_existing_topics_prompt_textbox, number_of_prompts, batch_size_number, context_textbox, estimated_time_taken_number],
         outputs=[text_output_summary, master_topic_df_state, master_unique_topics_df_state, master_reference_df_state, text_output_file, text_output_file_list_state, latest_batch_completed, log_files_output, log_files_output_list_state, conversation_metadata_textbox, estimated_time_taken_number, summarisation_in_previous_data_files]).\
         then(fn = reveal_feedback_buttons,
         outputs=[data_feedback_radio, data_further_details_text, data_submit_feedback_btn, data_feedback_title], scroll_to_output=True)
@@ -224,7 +309,7 @@ with app:
     ###
     # LOGGING AND ON APP LOAD FUNCTIONS
-    ###
     app.load(get_connection_params, inputs=None, outputs=[session_hash_state, s3_output_folder_state, session_hash_textbox])
     # Log usernames and times of access to file (to know who is using the app when running on AWS)
@@ -259,7 +344,7 @@ print(f'The value of RUN_DIRECT_MODE is {MAX_QUEUE_SIZE}')
 MAX_FILE_SIZE = get_or_create_env_var('MAX_FILE_SIZE', '100mb')
 print(f'The value of MAX_FILE_SIZE is {MAX_FILE_SIZE}')
-GRADIO_SERVER_PORT = int(get_or_create_env_var('GRADIO_SERVER_PORT', '7860'))
 print(f'The value of GRADIO_SERVER_PORT is {GRADIO_SERVER_PORT}')
 if __name__ == "__main__":

 import os
 import socket
+from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, wipe_logs, model_full_names, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise, RUN_LOCAL_MODEL
 from tools.aws_functions import upload_file_to_s3
+from tools.llm_api_call import extract_topics, load_in_data_file, load_in_previous_data_files, sample_reference_table_summaries, summarise_output_topics, batch_size_default
 from tools.auth import authenticate_user
 from tools.prompts import initial_table_prompt, prompt2, prompt3, system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt
 #from tools.aws_functions import load_data_from_aws
 import gradio as gr
 import pandas as pd
+import tools.chatfuncs as chatf
+from tools.chatfuncs import llama_cpp_init_config_gpu, llama_cpp_init_config_cpu
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+from torch import cuda, backends
 from datetime import datetime
 today_rev = datetime.now().strftime("%Y%m%d")
 ensure_output_folder_exists()
 feedback_data_folder = 'feedback/' + today_rev + '/' + host_name + '/'
 usage_data_folder = 'usage/' + today_rev + '/' + host_name + '/'
+###
+# Load local model
+###
+# Check for torch cuda
+print("Is CUDA enabled? ", cuda.is_available())
+print("Is a CUDA device available on this computer?", backends.cudnn.enabled)
+if cuda.is_available():
+    torch_device = "cuda"
+    os.system("nvidia-smi")
+else:
+    torch_device =  "cpu"
+print("Device used is: ", torch_device)
+def load_model(local_model_type:str, gpu_layers:int, max_context_length:int, gpu_config:llama_cpp_init_config_gpu=chatf.gpu_config, cpu_config:llama_cpp_init_config_cpu=chatf.cpu_config, torch_device:str=chatf.torch_device):
+    '''
+    Load in a model from Hugging Face hub via the transformers package, or using llama_cpp_python by downloading a GGUF file from Huggingface Hub.
+    '''
+    print("Loading model ", local_model_type)
+    if local_model_type == "Gemma 2b":
+        if torch_device == "cuda":
+            gpu_config.update_gpu(gpu_layers)
+            gpu_config.update_context(max_context_length)
+            print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU. And a maximum context length of ", gpu_config.n_ctx)
+        else:
+            gpu_config.update_gpu(gpu_layers)
+            cpu_config.update_gpu(gpu_layers)
+            # Update context length according to slider
+            gpu_config.update_context(max_context_length)
+            cpu_config.update_context(max_context_length)
+            print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU. And a maximum context length of ", gpu_config.n_ctx)
+        #print(vars(gpu_config))
+        #print(vars(cpu_config))
+        def get_model_path():
+            repo_id = os.environ.get("REPO_ID", "lmstudio-community/gemma-2-2b-it-GGUF")# "bartowski/Llama-3.2-3B-Instruct-GGUF") # "lmstudio-community/gemma-2-2b-it-GGUF")#"QuantFactory/Phi-3-mini-128k-instruct-GGUF")
+            filename = os.environ.get("MODEL_FILE", "gemma-2-2b-it-Q8_0.gguf") # )"Llama-3.2-3B-Instruct-Q5_K_M.gguf") #"gemma-2-2b-it-Q8_0.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf")
+            model_dir = "model/gemma" #"model/phi"  # Assuming this is your intended directory
+            # Construct the expected local path
+            local_path = os.path.join(model_dir, filename)
+            if os.path.exists(local_path):
+                print(f"Model already exists at: {local_path}")
+                return local_path
+            else:
+                print(f"Checking default Hugging Face folder. Downloading model from Hugging Face Hub if not found")
+                return hf_hub_download(repo_id=repo_id, filename=filename)
+        model_path = get_model_path()
+        try:
+            print(vars(gpu_config))
+            llama_model = Llama(model_path=model_path, **vars(gpu_config)) #  type_k=8, type_v = 8, flash_attn=True,
+        except Exception as e:
+            print("GPU load failed")
+            print(e)
+            llama_model = Llama(model_path=model_path, type_k=8, **vars(cpu_config)) # type_v = 8, flash_attn=True,
+        tokenizer = []
+    chatf.model = llama_model
+    chatf.tokenizer = tokenizer
+    chatf.local_model_type = local_model_type
+    load_confirmation = "Finished loading model: " + local_model_type
+    print(load_confirmation)
+    return local_model_type, load_confirmation, local_model_type
+# Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
+local_model_type = "Gemma 2b"
+if RUN_LOCAL_MODEL == "1":
+    load_model(local_model_type, chatf.gpu_layers, chatf.context_length, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
 # Create the gradio interface
 app = gr.Blocks(theme = gr.themes.Base())
         with gr.Accordion("I have my own list of topics (zero shot topic modelling).", open = False):
             candidate_topics = gr.File(label="Input topics from file (csv). File should have a single column with a header, and all topic keywords below.")
+        context_textbox = gr.Textbox(label="Write a short description (up to one sentence) giving context to the large language model about the your consultation and any relevant context")
         extract_topics_btn = gr.Button("Extract topics from open text", variant="primary")
         Define settings that affect large language model output.
         """)
         with gr.Accordion("Settings for LLM generation", open = True):
+            temperature_slide = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, label="Choose LLM temperature setting")
             batch_size_number = gr.Number(label = "Number of responses to submit in a single LLM query", value = batch_size_default, precision=0)
             random_seed = gr.Number(value=42, label="Random seed for LLM generation", visible=False)
     extract_topics_btn.click(fn=empty_output_vars_extract_topics, inputs=None, outputs=[master_topic_df_state, master_unique_topics_df_state, master_reference_df_state, text_output_file, text_output_file_list_state, latest_batch_completed, log_files_output, log_files_output_list_state, conversation_metadata_textbox, estimated_time_taken_number]).\
     then(load_in_data_file,
         inputs = [in_data_files, in_colnames, batch_size_number], outputs = [file_data_state, data_file_names_textbox, total_number_of_batches], api_name="load_data").then(\
+        fn=extract_topics,
+        inputs=[in_data_files, file_data_state, master_topic_df_state, master_reference_df_state, master_unique_topics_df_state, text_output_summary, data_file_names_textbox, total_number_of_batches, in_api_key, temperature_slide, in_colnames, model_choice, candidate_topics, latest_batch_completed, text_output_summary, text_output_file_list_state, log_files_output_list_state, first_loop_state, conversation_metadata_textbox, initial_table_prompt_textbox, prompt_2_textbox, prompt_3_textbox, system_prompt_textbox, add_to_existing_topics_system_prompt_textbox, add_to_existing_topics_prompt_textbox, number_of_prompts, batch_size_number, context_textbox, estimated_time_taken_number],
+        outputs=[text_output_summary, master_topic_df_state, master_unique_topics_df_state, master_reference_df_state, text_output_file, text_output_file_list_state, latest_batch_completed, log_files_output, log_files_output_list_state, conversation_metadata_textbox, estimated_time_taken_number, summarisation_in_previous_data_files], api_name="extract_topics")
     # If the output file count text box changes, keep going with redacting each data file until done. Then reveal the feedback buttons.
+    latest_batch_completed.change(fn=extract_topics,
+        inputs=[in_data_files, file_data_state, master_topic_df_state, master_reference_df_state, master_unique_topics_df_state, text_output_summary, data_file_names_textbox, total_number_of_batches, in_api_key, temperature_slide, in_colnames, model_choice, candidate_topics, latest_batch_completed, text_output_summary, text_output_file_list_state, log_files_output_list_state, second_loop_state, conversation_metadata_textbox, initial_table_prompt_textbox, prompt_2_textbox, prompt_3_textbox, system_prompt_textbox, add_to_existing_topics_system_prompt_textbox, add_to_existing_topics_prompt_textbox, number_of_prompts, batch_size_number, context_textbox, estimated_time_taken_number],
         outputs=[text_output_summary, master_topic_df_state, master_unique_topics_df_state, master_reference_df_state, text_output_file, text_output_file_list_state, latest_batch_completed, log_files_output, log_files_output_list_state, conversation_metadata_textbox, estimated_time_taken_number, summarisation_in_previous_data_files]).\
         then(fn = reveal_feedback_buttons,
         outputs=[data_feedback_radio, data_further_details_text, data_submit_feedback_btn, data_feedback_title], scroll_to_output=True)
     ###
     # LOGGING AND ON APP LOAD FUNCTIONS
+    ###
     app.load(get_connection_params, inputs=None, outputs=[session_hash_state, s3_output_folder_state, session_hash_textbox])
     # Log usernames and times of access to file (to know who is using the app when running on AWS)
 MAX_FILE_SIZE = get_or_create_env_var('MAX_FILE_SIZE', '100mb')
 print(f'The value of MAX_FILE_SIZE is {MAX_FILE_SIZE}')
+GRADIO_SERVER_PORT = int(get_or_create_env_var('GRADIO_SERVER_PORT', '7861'))
 print(f'The value of GRADIO_SERVER_PORT is {GRADIO_SERVER_PORT}')
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 pandas==2.2.3
-gradio==5.6.0
 boto3==1.35.71
 pyarrow==18.1.0
 openpyxl==3.1.3
@@ -10,3 +10,8 @@ google-generativeai==0.8.3
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.10.1

 pandas==2.2.3
+gradio==5.8.0
 boto3==1.35.71
 pyarrow==18.1.0
 openpyxl==3.1.3
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.10.1
+torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
+llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
+transformers==4.47.0
+numpy==1.26.4
+typing_extensions==4.12.2

requirements_cpu.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+pandas==2.2.3
+gradio==5.6.0
+boto3==1.35.71
+pyarrow==18.1.0
+openpyxl==3.1.3
+markdown==3.7
+tabulate==0.9.0
+lxml==5.3.0
+google-generativeai==0.8.3
+html5lib==1.1
+beautifulsoup4==4.12.3
+rapidfuzz==3.10.1
+torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
+llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
+transformers==4.47.0
+numpy==1.26.4
+typing_extensions==4.12.2

tools/chatfuncs.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from typing import TypeVar
+# Model packages
+import torch.cuda
+from transformers import pipeline
+import time
+torch.cuda.empty_cache()
+PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
+model_type = None # global variable setup
+full_text = "" # Define dummy source text (full text) just to enable highlight function to load
+model = [] # Define empty list for model functions to run
+tokenizer = [] # Define empty list for model functions to run
+# Currently set gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda
+if torch.cuda.is_available():
+    torch_device = "cuda"
+    gpu_layers = -1
+else:
+    torch_device =  "cpu"
+    gpu_layers = 0
+print("Running on device:", torch_device)
+threads = torch.get_num_threads() # 8
+print("CPU threads:", threads)
+temperature: float = 0.1
+top_k: int = 3
+top_p: float = 1
+repetition_penalty: float = 1.2 # Mild repetition penalty to prevent repeating table rows
+last_n_tokens: int = 512
+max_new_tokens: int = 4096 # 200
+seed: int = 42
+reset: bool = True
+stream: bool = False
+threads: int = threads
+batch_size:int = 256
+context_length:int = 12288
+sample = True
+class llama_cpp_init_config_gpu:
+    def __init__(self,
+                 last_n_tokens=last_n_tokens,
+                 seed=seed,
+                 n_threads=threads,
+                 n_batch=batch_size,
+                 n_ctx=context_length,
+                 n_gpu_layers=gpu_layers):
+        self.last_n_tokens = last_n_tokens
+        self.seed = seed
+        self.n_threads = n_threads
+        self.n_batch = n_batch
+        self.n_ctx = n_ctx
+        self.n_gpu_layers = n_gpu_layers
+        # self.stop: list[str] = field(default_factory=lambda: [stop_string])
+    def update_gpu(self, new_value):
+        self.n_gpu_layers = new_value
+    def update_context(self, new_value):
+        self.n_ctx = new_value
+class llama_cpp_init_config_cpu(llama_cpp_init_config_gpu):
+    def __init__(self):
+        super().__init__()
+        self.n_gpu_layers = gpu_layers
+        self.n_ctx=context_length
+gpu_config = llama_cpp_init_config_gpu()
+cpu_config = llama_cpp_init_config_cpu()
+class CtransGenGenerationConfig:
+    def __init__(self, temperature=temperature,
+                 top_k=top_k,
+                 top_p=top_p,
+                 repeat_penalty=repetition_penalty,
+                 seed=seed,
+                 stream=stream,
+                 max_tokens=max_new_tokens
+                 ):
+        self.temperature = temperature
+        self.top_k = top_k
+        self.top_p = top_p
+        self.repeat_penalty = repeat_penalty
+        self.seed = seed
+        self.max_tokens=max_tokens
+        self.stream = stream
+    def update_temp(self, new_value):
+        self.temperature = new_value
+def llama_cpp_streaming(history, full_prompt, temperature=temperature):
+    gen_config = CtransGenGenerationConfig()
+    gen_config.update_temp(temperature)
+    print(vars(gen_config))
+    # Pull the generated text from the streamer, and update the model output.
+    start = time.time()
+    NUM_TOKENS=0
+    print('-'*4+'Start Generation'+'-'*4)
+    output = model(
+    full_prompt, **vars(gen_config))
+    history[-1][1] = ""
+    for out in output:
+        if "choices" in out and len(out["choices"]) > 0 and "text" in out["choices"][0]:
+            history[-1][1] += out["choices"][0]["text"]
+            NUM_TOKENS+=1
+            yield history
+        else:
+            print(f"Unexpected output structure: {out}")
+    time_generate = time.time() - start
+    print('\n')
+    print('-'*4+'End Generation'+'-'*4)
+    print(f'Num of generated tokens: {NUM_TOKENS}')
+    print(f'Time for complete generation: {time_generate}s')
+    print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
+    print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
+def call_llama_cpp_model(formatted_string, gen_config):
+    """
+    Calls your generation model with parameters from the CtransGenGenerationConfig object.
+    Args:
+        formatted_string (str): The formatted input text for the model.
+        gen_config (CtransGenGenerationConfig): An object containing generation parameters.
+    """
+    # Extracting parameters from the gen_config object
+    temperature = gen_config.temperature
+    top_k = gen_config.top_k
+    top_p = gen_config.top_p
+    repeat_penalty = gen_config.repeat_penalty
+    seed = gen_config.seed
+    max_tokens = gen_config.max_tokens
+    stream = gen_config.stream
+    # Now you can call your model directly, passing the parameters:
+    output = model(
+        formatted_string,
+        temperature=temperature,
+        top_k=top_k,
+        top_p=top_p,
+        repeat_penalty=repeat_penalty,
+        seed=seed,
+        max_tokens=max_tokens,
+        stream=stream#,
+        #stop=["<|eot_id|>", "\n\n"]
+    )
+    return output

tools/helper_functions.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import gradio as gr
 import pandas as pd
 def empty_output_vars_extract_topics():
     # Empty output objects before processing a new file
@@ -47,12 +46,19 @@ def get_or_create_env_var(var_name, default_value):
 RUN_AWS_FUNCTIONS = get_or_create_env_var("RUN_AWS_FUNCTIONS", "0")
 print(f'The value of RUN_AWS_FUNCTIONS is {RUN_AWS_FUNCTIONS}')
 if RUN_AWS_FUNCTIONS == "1":
-    model_full_names = ["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0", "gemini-1.5-flash-002", "gemini-1.5-pro-002"]
-    model_short_names = ["haiku", "sonnet", "gemini_flash", "gemini_pro"]
 else:
-    model_full_names = ["gemini-1.5-flash-002", "gemini-1.5-pro-002"]
-    model_short_names = ["gemini_flash", "gemini_pro"]
 model_name_map = {short: full for short, full in zip(model_full_names, model_short_names)}

 import gradio as gr
 import pandas as pd
 def empty_output_vars_extract_topics():
     # Empty output objects before processing a new file
 RUN_AWS_FUNCTIONS = get_or_create_env_var("RUN_AWS_FUNCTIONS", "0")
 print(f'The value of RUN_AWS_FUNCTIONS is {RUN_AWS_FUNCTIONS}')
+RUN_LOCAL_MODEL = get_or_create_env_var("RUN_LOCAL_MODEL", "0")
+print(f'The value of RUN_LOCAL_MODEL is {RUN_LOCAL_MODEL}')
 if RUN_AWS_FUNCTIONS == "1":
+    model_full_names = ["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0", "gemini-1.5-flash-002", "gemini-1.5-pro-002", "gemma_2b_it_local"]
+    model_short_names = ["haiku", "sonnet", "gemini_flash", "gemini_pro", "gemma_local"]
 else:
+    model_full_names = ["gemini-1.5-flash-002", "gemini-1.5-pro-002", "gemma_2b_it_local"]
+    model_short_names = ["gemini_flash", "gemini_pro", "gemma_local"]
+if RUN_LOCAL_MODEL == "0":
+    model_full_names.remove("gemma_2b_it_local")
+    model_short_names.remove("gemma_local")
 model_name_map = {short: full for short, full in zip(model_full_names, model_short_names)}

tools/llm_api_call.py CHANGED Viewed

@@ -15,8 +15,11 @@ from gradio import Progress
 from typing import List, Tuple
 from io import StringIO
 from tools.prompts import initial_table_prompt, prompt2, prompt3, system_prompt, summarise_topic_descriptions_prompt, summarise_topic_descriptions_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt
-from tools.helper_functions import output_folder, detect_file_type, get_file_path_end, read_file, get_or_create_env_var, model_name_map
 # ResponseObject class for AWS Bedrock calls
 class ResponseObject:
@@ -27,8 +30,8 @@ class ResponseObject:
 max_tokens = 4096
 timeout_wait = 30 # AWS now seems to have a 60 second minimum wait between API calls
 number_of_api_retry_attempts = 5
-max_time_for_loop = 180
 AWS_DEFAULT_REGION = get_or_create_env_var('AWS_DEFAULT_REGION', 'eu-west-2')
 print(f'The value of AWS_DEFAULT_REGION is {AWS_DEFAULT_REGION}')
@@ -64,7 +67,7 @@ def load_in_file(file_path: str, colname:str=""):
         file_data[colname] = file_data[colname].astype(str).str.replace("\bnan\b", "", regex=True)
-        print(file_data[colname])
     return file_data, file_name
@@ -172,16 +175,24 @@ def data_file_to_markdown_table(file_data:pd.DataFrame, file_name:str, chosen_co
     simple_file = simple_file[start_row:end_row]  # Select the current batch
-    print("simple_file:", simple_file)
     # Remove problematic characters including ASCII and various quote marks
         # Remove problematic characters including control characters, special characters, and excessive leading/trailing whitespace
     simple_file["Response"] = simple_file["Response"].str.replace(r'[\x00-\x1F\x7F]|[""<>]|\\', '', regex=True)  # Remove control and special characters
     simple_file["Response"] = simple_file["Response"].str.strip()  # Remove leading and trailing whitespace
     simple_file["Response"] = simple_file["Response"].str.replace(r'\s+', ' ', regex=True)  # Replace multiple spaces with a single space
     # Remove blank and extremely short responses
-    simple_file = simple_file.loc[~(simple_file["Response"].isnull()) & ~(simple_file["Response"] == "None") & ~(simple_file["Response"] == " ") & ~(simple_file["Response"] == ""),:]#~(simple_file["Response"].str.len() < 5), :]
     simplified_csv_table_path = output_folder + 'simple_markdown_table_' + file_name + '_row_' + str(start_row) + '_to_' + str(end_row) + '.csv'
     simple_file.to_csv(simplified_csv_table_path, index=None)
@@ -353,7 +364,7 @@ def send_request(prompt: str, conversation_history: List[dict], model: object, c
         for i in progress_bar:
             try:
-                print("Calling Gemini model")
                 #print("full_prompt:", full_prompt)
                 #print("generation_config:", config)
@@ -372,10 +383,10 @@ def send_request(prompt: str, conversation_history: List[dict], model: object, c
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
-    else:
         for i in progress_bar:
             try:
-                print("Calling AWS Claude model, attempt", i)
                 response = call_aws_claude(prompt, system_prompt, temperature, max_tokens, model_choice)
                 #progress_bar.close()
@@ -392,11 +403,43 @@ def send_request(prompt: str, conversation_history: List[dict], model: object, c
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
     # Update the conversation history with the new prompt and response
     conversation_history.append({'role': 'user', 'parts': [prompt]})
-    conversation_history.append({'role': 'assistant', 'parts': [response.text]})
     # Print the updated conversation history
     #print("conversation_history:", conversation_history)
@@ -433,16 +476,22 @@ def process_requests(prompts: List[str], system_prompt: str, conversation_histor
         #print("prompt to LLM:", prompt)
         response, conversation_history = send_request(prompt, conversation_history, model=model, config=config, model_choice=model_choice, system_prompt=system_prompt, temperature=temperature)
-        if not isinstance(response, str):
             #print("response.usage_metadata:", response.usage_metadata)
             #print("Response.text:", response.text)
             #print("responses:", responses)
-            responses.append(response)
-        # Create conversation txt object
-        whole_conversation.append(prompt)
-        whole_conversation.append(response.text)
         # Create conversation metadata
         if master == False:
@@ -459,12 +508,15 @@ def process_requests(prompts: List[str], system_prompt: str, conversation_histor
                     whole_conversation_metadata.append(str(response.usage_metadata['HTTPHeaders']['x-amzn-bedrock-output-token-count']))
                     whole_conversation_metadata.append('x-amzn-bedrock-input-token-count:')
                     whole_conversation_metadata.append(str(response.usage_metadata['HTTPHeaders']['x-amzn-bedrock-input-token-count']))
-                else:
                     whole_conversation_metadata.append(str(response.usage_metadata))
             except KeyError as e:
                 print(f"Key error: {e} - Check the structure of response.usage_metadata")
         else:
             print("Response is a string object.")
     return responses, conversation_history, whole_conversation, whole_conversation_metadata
@@ -494,20 +546,26 @@ def clean_markdown_table(text: str):
     if buffer:
         merged_lines.append(buffer)
-    # Ensure consistent number of pipes in each row based on the header
-    header_pipes = merged_lines[0].count('|')  # Use the first row to count number of pipes
     result = []
     for line in merged_lines:
         # Strip excessive whitespace around pipes
         line = re.sub(r'\s*\|\s*', '|', line.strip())
-        # Replace numbers between pipes with commas and a space
-        line = re.sub(r'(?<=\|)(\s*\d+)(,\s*\d+)+(?=\|)', lambda m: ', '.join(m.group(0).split(',')), line)
-        # Replace groups of numbers separated by spaces with commas and a space
-        line = re.sub(r'(?<=\|)(\s*\d+)(\s+\d+)+(?=\|)', lambda m: ', '.join(m.group(0).split()), line)
         # Fix inconsistent number of pipes by adjusting them to match the header
         pipe_count = line.count('|')
         if pipe_count < header_pipes:
@@ -516,12 +574,17 @@ def clean_markdown_table(text: str):
             # If too many pipes, split line and keep the first `header_pipes` columns
             columns = line.split('|')[:header_pipes + 1]  # +1 to keep last pipe at the end
             line = '|'.join(columns)
         result.append(line)
     # Join lines back into the cleaned markdown text
     cleaned_text = '\n'.join(result)
     return cleaned_text
 def clean_column_name(column_name, max_length=20):
@@ -642,8 +705,23 @@ def write_llm_output_and_logs(responses: List[ResponseObject],
     log_files_output_paths.append(whole_conversation_path_meta)
     # Convert output table to markdown and then to a pandas dataframe to csv
-    # try:
-    cleaned_response = clean_markdown_table(responses[-1].text)
     markdown_table = markdown.markdown(cleaned_response, extensions=['tables'])
@@ -653,20 +731,24 @@ def write_llm_output_and_logs(responses: List[ResponseObject],
     html_table = re.sub(r'<p>(.*?)</p>', r'\1', markdown_table)
     html_table = html_table.replace('<p>', '').replace('</p>', '').strip()
-    print("html_table:", html_table)
     # Now ensure that the HTML structure is correct
     if "<table>" not in html_table:
         html_table = f"""
         <table>
             {html_table}
         </table>
         """
     # print("Markdown table as HTML:", html_table)
-    html_buffer = StringIO(html_table)
     try:
         topic_with_response_df = pd.read_html(html_buffer)[0]  # Assuming the first table in the HTML is the one you want
@@ -678,11 +760,16 @@ def write_llm_output_and_logs(responses: List[ResponseObject],
     # Rename columns to ensure consistent use of data frames later in code
-    topic_with_response_df.columns = ["General Topic", "Subtopic", "Sentiment", "Summary", "Response References"]
     # Fill in NA rows with values from above (topics seem to be included only on one row):
     topic_with_response_df = topic_with_response_df.ffill()
     # Strip and lower case topic names to remove issues where model is randomly capitalising topics/sentiment
     topic_with_response_df["General Topic"] = topic_with_response_df["General Topic"].str.strip().str.lower().str.capitalize()
     topic_with_response_df["Subtopic"] = topic_with_response_df["Subtopic"].str.strip().str.lower().str.capitalize()
@@ -695,18 +782,32 @@ def write_llm_output_and_logs(responses: List[ResponseObject],
     # Iterate through each row in the original DataFrame
     for index, row in topic_with_response_df.iterrows():
-        references = re.split(r',\s*|\s+', str(row.iloc[4])) if pd.notna(row.iloc[4]) else ""
         topic = row.iloc[0] if pd.notna(row.iloc[0]) else ""
         subtopic = row.iloc[1] if pd.notna(row.iloc[1]) else ""
         sentiment = row.iloc[2] if pd.notna(row.iloc[2]) else ""
-        summary = row.iloc[3] if pd.notna(row.iloc[3]) else ""
         summary = row_number_string_start + summary
         # Create a new entry for each reference number
         for ref in references:
             reference_data.append({
-                'Response References': ref,
                 'General Topic': topic,
                 'Subtopic': subtopic,
                 'Sentiment': sentiment,
@@ -716,6 +817,8 @@ def write_llm_output_and_logs(responses: List[ResponseObject],
     # Create a new DataFrame from the reference data
     new_reference_df = pd.DataFrame(reference_data)
     # Append on old reference data
     out_reference_df = pd.concat([new_reference_df, existing_reference_df]).dropna(how='all')
@@ -759,7 +862,10 @@ def write_llm_output_and_logs(responses: List[ResponseObject],
     return topic_table_out_path, reference_table_out_path, unique_topics_df_out_path, topic_with_response_df, markdown_table, out_reference_df, out_unique_topics_df, batch_file_path_details, is_error
-def llm_query(file_data:pd.DataFrame,
               existing_topics_table:pd.DataFrame,
               existing_reference_df:pd.DataFrame,
               existing_unique_topics_df:pd.DataFrame,
@@ -770,7 +876,7 @@ def llm_query(file_data:pd.DataFrame,
               temperature:float,
               chosen_cols:List[str],
               model_choice:str,
-              candidate_topics: List=[],
               latest_batch_completed:int=0,
               out_message:List=[],
               out_file_paths:List = [],
@@ -783,7 +889,7 @@ def llm_query(file_data:pd.DataFrame,
               system_prompt:str=system_prompt,
               add_existing_topics_system_prompt:str=add_existing_topics_system_prompt,
               add_existing_topics_prompt:str=add_existing_topics_prompt,
-              number_of_requests:int=1,
               batch_size:int=50,
               context_textbox:str="",
               time_taken:float = 0,
@@ -796,6 +902,7 @@ def llm_query(file_data:pd.DataFrame,
     Query an LLM (Gemini or AWS Anthropic-based) with up to three prompts about a table of open text data. Up to 'batch_size' rows will be queried at a time.
     Parameters:
     - file_data (pd.DataFrame): Pandas dataframe containing the consultation response data.
     - existing_topics_table (pd.DataFrame): Pandas dataframe containing the latest master topic table that has been iterated through batches.
     - existing_reference_df (pd.DataFrame): Pandas dataframe containing the list of Response reference numbers alongside the derived topics and subtopics.
@@ -806,7 +913,7 @@ def llm_query(file_data:pd.DataFrame,
     - in_api_key (str): The API key for authentication.
     - temperature (float): The temperature parameter for the model.
     - chosen_cols (List[str]): A list of chosen columns to process.
-    - candidate_topics (List): A list of existing candidate topics submitted by the user.
     - model_choice (str): The choice of model to use.
     - latest_batch_completed (int): The index of the latest file completed.
     - out_message (list): A list to store output messages.
@@ -835,16 +942,37 @@ def llm_query(file_data:pd.DataFrame,
     config = ""
     final_time = 0.0
     whole_conversation_metadata = []
-    #all_topic_tables_df = []
-    #all_markdown_topic_tables = []
     is_error = False
     # Reset output files on each run:
     # out_file_paths = []
     #model_choice_clean = replace_punctuation_with_underscore(model_choice)
-    model_choice_clean = model_name_map[model_choice]
-    print("model_choice_clean:", model_choice_clean)
     # If this is the first time around, set variables to 0/blank
     if first_loop_state==True:
@@ -852,8 +980,9 @@ def llm_query(file_data:pd.DataFrame,
             latest_batch_completed = 0
             out_message = []
             out_file_paths = []
-    print("latest_batch_completed:", str(latest_batch_completed))
     # If we have already redacted the last file, return the input out_message and file list to the relevant components
     if latest_batch_completed >= num_batches:
@@ -866,7 +995,6 @@ def llm_query(file_data:pd.DataFrame,
         out_time = f"Everything finished in {final_time} seconds."
         print(out_time)
         print("All summaries completed. Creating outputs.")
         model_choice_clean = model_name_map[model_choice]
@@ -931,7 +1059,7 @@ def llm_query(file_data:pd.DataFrame,
         print("out_file_paths:", out_file_paths)
         #final_out_message = '\n'.join(out_message)
-        return display_table, existing_topics_table, existing_unique_topics_df, existing_reference_df, out_file_paths, out_file_paths, latest_batch_completed, log_files_output_paths, log_files_output_paths, whole_conversation_metadata_str, final_time, out_file_paths
@@ -949,18 +1077,14 @@ def llm_query(file_data:pd.DataFrame,
     if not out_file_paths:
         out_file_paths = []
-    # Check if files and text exist
-    if file_data.empty:
-        out_message = "Please enter a data file to summarise."
-        print(out_message)
-        return out_message, existing_topics_table, existing_unique_topics_df, existing_reference_df, out_file_paths, out_file_paths, latest_batch_completed, log_files_output_paths, log_files_output_paths, whole_conversation_metadata_str, final_time, out_file_paths#, out_message
     if model_choice == "anthropic.claude-3-sonnet-20240229-v1:0" and file_data.shape[1] > 300:
         out_message = "Your data has more than 300 rows, using the Sonnet model will be too expensive. Please choose the Haiku model instead."
         print(out_message)
         return out_message, existing_topics_table, existing_unique_topics_df, existing_reference_df, out_file_paths, out_file_paths, latest_batch_completed, log_files_output_paths, log_files_output_paths, whole_conversation_metadata_str, final_time, out_file_paths#, out_message
-    topics_loop_description = "Extracting topics from response batches (each batch of " + str(batch_size) + " responses). " + str(latest_batch_completed) + " batches completed."
     topics_loop = tqdm(range(latest_batch_completed, num_batches), desc = topics_loop_description, unit="batches remaining")
     for i in topics_loop:
@@ -994,39 +1118,53 @@ def llm_query(file_data:pd.DataFrame,
                 if model_choice in ["gemini-1.5-flash-002", "gemini-1.5-pro-002"]:
                     print("Using Gemini model:", model_choice)
                     model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=add_existing_topics_system_prompt, max_tokens=max_tokens)
-                else:
                     print("Using AWS Bedrock model:", model_choice)
                 if candidate_topics:
                     # 'Zero shot topics' are those supplied by the user
-                    zero_shot_topics = read_file(candidate_topics.name)
-                    zero_shot_topics_series = zero_shot_topics.iloc[:, 0].str.strip().str.lower().str.capitalize()
-                    # Max 150 topics allowed
-                    if len(zero_shot_topics_series) > 120:
-                        print("Maximum 120 topics allowed to fit within large language model context limits.")
-                        zero_shot_topics_series = zero_shot_topics_series.iloc[:120]
-                    zero_shot_topics_list = list(zero_shot_topics_series)
-                    print("Zero shot topics are:", zero_shot_topics_list)
-                #all_topic_tables_df_merged = existing_unique_topics_df
-                existing_unique_topics_df["Response References"] = ""
-                # Create the most up to date list of topics and subtopics.
-                # If there are candidate topics, but the existing_unique_topics_df hasn't yet been constructed, then create.
-                if candidate_topics and existing_unique_topics_df.empty:
-                    existing_unique_topics_df = pd.DataFrame(data={'General Topic':'', 'Subtopic':zero_shot_topics_list, 'Sentiment':''})
-                # This part concatenates all zero shot and new topics together, so that for the next prompt the LLM will have the full list available
-                elif candidate_topics and not existing_unique_topics_df.empty:
-                    zero_shot_topics_df = pd.DataFrame(data={'General Topic':'', 'Subtopic':zero_shot_topics_list, 'Sentiment':''})
-                    existing_unique_topics_df = pd.concat([existing_unique_topics_df, zero_shot_topics_df]).drop_duplicates("Subtopic")
-                    zero_shot_topics_list_str = zero_shot_topics_list
                     #existing_unique_topics_df.to_csv(output_folder + "Existing topics with zero shot dropped.csv", index = None)
                 unique_topics_markdown = existing_unique_topics_df[["General Topic", "Subtopic", "Sentiment"]].drop_duplicates(["General Topic", "Subtopic", "Sentiment"]).to_markdown(index=False)
@@ -1035,6 +1173,13 @@ def llm_query(file_data:pd.DataFrame,
                 # Format the summary prompt with the response table and topics
                 formatted_summary_prompt = add_existing_topics_prompt.format(response_table=normalised_simple_markdown_table, topics=unique_topics_markdown, consultation_context=context_textbox, column_name=chosen_cols)
                 # Define the output file path for the formatted prompt
                 formatted_prompt_output_path = output_folder + file_name + "_full_prompt_" + model_choice_clean + "_temp_" + str(temperature) + ".txt"
@@ -1130,7 +1275,17 @@ def llm_query(file_data:pd.DataFrame,
                 if prompt3: formatted_prompt3 = prompt3.format(response_table=normalised_simple_markdown_table)
                 else: formatted_prompt3 = prompt3
-                batch_prompts = [formatted_initial_table_prompt, formatted_prompt2, formatted_prompt3][:number_of_requests]  # Adjust this list to send fewer requests
                 whole_conversation = [system_prompt]
@@ -1173,15 +1328,21 @@ def llm_query(file_data:pd.DataFrame,
                 try:
                     final_table_output_path = output_folder + batch_file_path_details + "_full_final_response_" + model_choice_clean + "_temp_" + str(temperature) + ".txt"
-                    with open(final_table_output_path, "w", encoding='utf-8', errors='replace') as f:
-                        f.write(responses[-1].text)
                     log_files_output_paths.append(final_table_output_path)
                 except Exception as e:
                     print(e)
-                display_table = responses[-1].text
                 new_topic_df = topic_table_df
                 new_reference_df = reference_df
@@ -1260,7 +1421,6 @@ def deduplicate_categories(category_series: pd.Series, join_series:pd.Series, th
     return result_df
 def sample_reference_table_summaries(reference_df:pd.DataFrame,
                                      unique_topics_df:pd.DataFrame,
                                      random_seed:int,
@@ -1380,7 +1540,11 @@ def summarise_output_topics_query(model_choice:str, in_api_key:str, temperature:
     print("Finished summary query")
     # Extract text from the `responses` list
-    response_texts = [resp.text for resp in responses]
     latest_response_text = response_texts[-1]
     #print("latest_response_text:", latest_response_text)
@@ -1482,6 +1646,8 @@ def summarise_output_topics(summarised_references:pd.DataFrame,
         try:
             response, conversation_history, metadata = summarise_output_topics_query(model_choice, in_api_key, temperature, formatted_summary_prompt, summarise_topic_descriptions_system_prompt)
             summarised_output = response
         except Exception as e:
             print(e)
             summarised_output = ""

 from typing import List, Tuple
 from io import StringIO
+GradioFileData = gr.FileData
 from tools.prompts import initial_table_prompt, prompt2, prompt3, system_prompt, summarise_topic_descriptions_prompt, summarise_topic_descriptions_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt
+from tools.helper_functions import output_folder, detect_file_type, get_file_path_end, read_file, get_or_create_env_var, model_name_map, put_columns_in_df
+from tools.chatfuncs import model, CtransGenGenerationConfig, temperature, context_length, call_llama_cpp_model
 # ResponseObject class for AWS Bedrock calls
 class ResponseObject:
 max_tokens = 4096
 timeout_wait = 30 # AWS now seems to have a 60 second minimum wait between API calls
 number_of_api_retry_attempts = 5
+max_time_for_loop = 99999
+batch_size_default = 5
 AWS_DEFAULT_REGION = get_or_create_env_var('AWS_DEFAULT_REGION', 'eu-west-2')
 print(f'The value of AWS_DEFAULT_REGION is {AWS_DEFAULT_REGION}')
         file_data[colname] = file_data[colname].astype(str).str.replace("\bnan\b", "", regex=True)
+        #print(file_data[colname])
     return file_data, file_name
     simple_file = simple_file[start_row:end_row]  # Select the current batch
+    # Now replace the reference numbers with numbers starting from 1
+    simple_file["Reference"] = simple_file["Reference"] - start_row
+    #print("simple_file:", simple_file)
     # Remove problematic characters including ASCII and various quote marks
         # Remove problematic characters including control characters, special characters, and excessive leading/trailing whitespace
     simple_file["Response"] = simple_file["Response"].str.replace(r'[\x00-\x1F\x7F]|[""<>]|\\', '', regex=True)  # Remove control and special characters
     simple_file["Response"] = simple_file["Response"].str.strip()  # Remove leading and trailing whitespace
     simple_file["Response"] = simple_file["Response"].str.replace(r'\s+', ' ', regex=True)  # Replace multiple spaces with a single space
+    simple_file["Response"] = simple_file["Response"].str.replace(r'\n{2,}', '\n', regex=True)  # Replace multiple line breaks with a single line break
+    simple_file["Response"] = simple_file["Response"].str.slice(0, 2500) # Maximum 1,500 character responses
     # Remove blank and extremely short responses
+    simple_file = simple_file.loc[~(simple_file["Response"].isnull()) &\
+                                  ~(simple_file["Response"] == "None") &\
+                                  ~(simple_file["Response"] == " ") &\
+                                  ~(simple_file["Response"] == ""),:]#~(simple_file["Response"].str.len() < 5), :]
     simplified_csv_table_path = output_folder + 'simple_markdown_table_' + file_name + '_row_' + str(start_row) + '_to_' + str(end_row) + '.csv'
     simple_file.to_csv(simplified_csv_table_path, index=None)
         for i in progress_bar:
             try:
+                print("Calling Gemini model, attempt", i + 1)
                 #print("full_prompt:", full_prompt)
                 #print("generation_config:", config)
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
+    elif model_choice in ["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"]:
         for i in progress_bar:
             try:
+                print("Calling AWS Claude model, attempt", i + 1)
                 response = call_aws_claude(prompt, system_prompt, temperature, max_tokens, model_choice)
                 #progress_bar.close()
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
+    else:
+        # This is the Gemma model
+        for i in progress_bar:
+            try:
+                print("Calling Gemma 2B Instruct model, attempt", i + 1)
+                gen_config = CtransGenGenerationConfig()
+                gen_config.update_temp(temperature)
+                response = call_llama_cpp_model(prompt, gen_config)
+                #progress_bar.close()
+                #tqdm._instances.clear()
+                print("Successful call to Gemma model.")
+                print("Response:", response)
+                break
+            except Exception as e:
+                # If fails, try again after X seconds in case there is a throttle limit
+                print("Call to Gemma model failed:", e, " Waiting for ", str(timeout_wait), "seconds and trying again.")
+                time.sleep(timeout_wait)
+                #response = call_aws_claude(prompt, system_prompt, temperature, max_tokens, model_choice)
+            if i == number_of_api_retry_attempts:
+                return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
     # Update the conversation history with the new prompt and response
     conversation_history.append({'role': 'user', 'parts': [prompt]})
+# output_str = output['choices'][0]['text']
+    # Check if is a LLama.cpp model response
+    if 'choices' in response:
+        conversation_history.append({'role': 'assistant', 'parts': [response['choices'][0]['text']]})
+    else:
+        conversation_history.append({'role': 'assistant', 'parts': [response.text]})
     # Print the updated conversation history
     #print("conversation_history:", conversation_history)
         #print("prompt to LLM:", prompt)
         response, conversation_history = send_request(prompt, conversation_history, model=model, config=config, model_choice=model_choice, system_prompt=system_prompt, temperature=temperature)
+        if 'choices' in response:
+            responses.append(response)
+            # Create conversation txt object
+            whole_conversation.append(prompt)
+            whole_conversation.append(response['choices'][0]['text'])
+        else:
+            responses.append(response)
             #print("response.usage_metadata:", response.usage_metadata)
             #print("Response.text:", response.text)
             #print("responses:", responses)
+            # Create conversation txt object
+            whole_conversation.append(prompt)
+            whole_conversation.append(response.text)
         # Create conversation metadata
         if master == False:
                     whole_conversation_metadata.append(str(response.usage_metadata['HTTPHeaders']['x-amzn-bedrock-output-token-count']))
                     whole_conversation_metadata.append('x-amzn-bedrock-input-token-count:')
                     whole_conversation_metadata.append(str(response.usage_metadata['HTTPHeaders']['x-amzn-bedrock-input-token-count']))
+                elif "gemini" in model_choice:
                     whole_conversation_metadata.append(str(response.usage_metadata))
+                else:
+                    whole_conversation_metadata.append(str(response['usage']))
             except KeyError as e:
                 print(f"Key error: {e} - Check the structure of response.usage_metadata")
         else:
             print("Response is a string object.")
+            whole_conversation_metadata.append("Length prompt: " + str(len(prompt)) + ". Length response: " + str(len(response)))
     return responses, conversation_history, whole_conversation, whole_conversation_metadata
     if buffer:
         merged_lines.append(buffer)
+    # Fix the header separator row if necessary
+    if len(merged_lines) > 1:
+        header_pipes = merged_lines[0].count('|')  # Count pipes in the header row
+        header_separator = '|---|' * (header_pipes - 1) + '|---|'  # Generate proper separator
+        # Replace or insert the separator row
+        if not re.match(r'^\|[-:|]+$', merged_lines[1]):  # Check if the second row is a valid separator
+            merged_lines.insert(1, header_separator)
+        else:
+            # Adjust the separator to match the header pipes
+            merged_lines[1] = '|---|' * (header_pipes - 1) + '|'
+    # Ensure consistent number of pipes in each row
     result = []
+    header_pipes = merged_lines[0].count('|')  # Use the header row to count the number of pipes
     for line in merged_lines:
         # Strip excessive whitespace around pipes
         line = re.sub(r'\s*\|\s*', '|', line.strip())
         # Fix inconsistent number of pipes by adjusting them to match the header
         pipe_count = line.count('|')
         if pipe_count < header_pipes:
             # If too many pipes, split line and keep the first `header_pipes` columns
             columns = line.split('|')[:header_pipes + 1]  # +1 to keep last pipe at the end
             line = '|'.join(columns)
+        line = re.sub(r'(\d),(?=\d)', r'\1, ', line)
         result.append(line)
     # Join lines back into the cleaned markdown text
     cleaned_text = '\n'.join(result)
+    # Replace numbers next to commas and other numbers with a space
     return cleaned_text
 def clean_column_name(column_name, max_length=20):
     log_files_output_paths.append(whole_conversation_path_meta)
     # Convert output table to markdown and then to a pandas dataframe to csv
+    def remove_before_last_term(input_string: str) -> str:
+        # Use regex to find the last occurrence of the term
+        match = re.search(r'(\| ?General Topic)', input_string)
+        if match:
+            # Find the last occurrence by using rfind
+            last_index = input_string.rfind(match.group(0))
+            return input_string[last_index:]  # Return everything from the last match onward
+        return input_string  # Return the original string if the term is not found
+    if "choices" in responses[-1]:
+        print("Text response:", responses[-1]["choices"][0]['text'])
+        start_of_table_response = remove_before_last_term(responses[-1]["choices"][0]['text'])
+        cleaned_response = clean_markdown_table(start_of_table_response)
+        print("cleaned_response:", cleaned_response)
+    else:
+        start_of_table_response = remove_before_last_term(responses[-1].text)
+        cleaned_response = clean_markdown_table(start_of_table_response)
     markdown_table = markdown.markdown(cleaned_response, extensions=['tables'])
     html_table = re.sub(r'<p>(.*?)</p>', r'\1', markdown_table)
     html_table = html_table.replace('<p>', '').replace('</p>', '').strip()
     # Now ensure that the HTML structure is correct
     if "<table>" not in html_table:
         html_table = f"""
         <table>
+            <tr>
+                <th>General Topic</th>
+                <th>Subtopic</th>
+                <th>Sentiment</th>
+                <th>Response References</th>
+                <th>Summary</th>
+            </tr>
             {html_table}
         </table>
         """
     # print("Markdown table as HTML:", html_table)
+    html_buffer = StringIO(html_table)
     try:
         topic_with_response_df = pd.read_html(html_buffer)[0]  # Assuming the first table in the HTML is the one you want
     # Rename columns to ensure consistent use of data frames later in code
+    topic_with_response_df.columns = ["General Topic", "Subtopic", "Sentiment", "Response References", "Summary"]
     # Fill in NA rows with values from above (topics seem to be included only on one row):
     topic_with_response_df = topic_with_response_df.ffill()
+    #print("topic_with_response_df:", topic_with_response_df)
+    # For instances where you end up with float values in Response references
+    topic_with_response_df["Response References"] = topic_with_response_df["Response References"].astype(str).str.replace(".0", "", regex=False)
     # Strip and lower case topic names to remove issues where model is randomly capitalising topics/sentiment
     topic_with_response_df["General Topic"] = topic_with_response_df["General Topic"].str.strip().str.lower().str.capitalize()
     topic_with_response_df["Subtopic"] = topic_with_response_df["Subtopic"].str.strip().str.lower().str.capitalize()
     # Iterate through each row in the original DataFrame
     for index, row in topic_with_response_df.iterrows():
+        #references = re.split(r',\s*|\s+', str(row.iloc[4])) if pd.notna(row.iloc[4]) else ""
+        references = re.findall(r'\d+', str(row.iloc[3])) if pd.notna(row.iloc[3]) else []
+        # If no numbers found in the Response References column, check the Summary column in case reference numbers were put there by mistake
+        if not references:
+            references = re.findall(r'\d+', str(row.iloc[4])) if pd.notna(row.iloc[4]) else []
         topic = row.iloc[0] if pd.notna(row.iloc[0]) else ""
         subtopic = row.iloc[1] if pd.notna(row.iloc[1]) else ""
         sentiment = row.iloc[2] if pd.notna(row.iloc[2]) else ""
+        summary = row.iloc[4] if pd.notna(row.iloc[4]) else ""
+        # If the reference response column is very long, and there's nothing in the summary column, assume that the summary was put in the reference column
+        if not summary and len(row.iloc[3] > 30):
+            summary = row.iloc[3]
         summary = row_number_string_start + summary
         # Create a new entry for each reference number
         for ref in references:
+            # Add start_row back onto reference_number
+            try:
+                response_ref_no =  str(int(ref) + int(start_row))
+            except ValueError:
+                print("Reference is not a number")
+                continue
             reference_data.append({
+                'Response References': response_ref_no,
                 'General Topic': topic,
                 'Subtopic': subtopic,
                 'Sentiment': sentiment,
     # Create a new DataFrame from the reference data
     new_reference_df = pd.DataFrame(reference_data)
+    print("new_reference_df:", new_reference_df)
     # Append on old reference data
     out_reference_df = pd.concat([new_reference_df, existing_reference_df]).dropna(how='all')
     return topic_table_out_path, reference_table_out_path, unique_topics_df_out_path, topic_with_response_df, markdown_table, out_reference_df, out_unique_topics_df, batch_file_path_details, is_error
+def extract_topics(in_data_file,
+              file_data:pd.DataFrame,
               existing_topics_table:pd.DataFrame,
               existing_reference_df:pd.DataFrame,
               existing_unique_topics_df:pd.DataFrame,
               temperature:float,
               chosen_cols:List[str],
               model_choice:str,
+              candidate_topics: GradioFileData = [],
               latest_batch_completed:int=0,
               out_message:List=[],
               out_file_paths:List = [],
               system_prompt:str=system_prompt,
               add_existing_topics_system_prompt:str=add_existing_topics_system_prompt,
               add_existing_topics_prompt:str=add_existing_topics_prompt,
+              number_of_prompts_used:int=1,
               batch_size:int=50,
               context_textbox:str="",
               time_taken:float = 0,
     Query an LLM (Gemini or AWS Anthropic-based) with up to three prompts about a table of open text data. Up to 'batch_size' rows will be queried at a time.
     Parameters:
+    - in_data_file (gr.File): Gradio file object containing input data
     - file_data (pd.DataFrame): Pandas dataframe containing the consultation response data.
     - existing_topics_table (pd.DataFrame): Pandas dataframe containing the latest master topic table that has been iterated through batches.
     - existing_reference_df (pd.DataFrame): Pandas dataframe containing the list of Response reference numbers alongside the derived topics and subtopics.
     - in_api_key (str): The API key for authentication.
     - temperature (float): The temperature parameter for the model.
     - chosen_cols (List[str]): A list of chosen columns to process.
+    - candidate_topics (gr.FileData): A Gradio FileData object of existing candidate topics submitted by the user.
     - model_choice (str): The choice of model to use.
     - latest_batch_completed (int): The index of the latest file completed.
     - out_message (list): A list to store output messages.
     config = ""
     final_time = 0.0
     whole_conversation_metadata = []
     is_error = False
+    #llama_system_prefix = "<|start_header_id|>system<|end_header_id|>\n" #"<start_of_turn>user\n"
+    #llama_system_suffix = "<|eot_id|>" #"<end_of_turn>\n<start_of_turn>model\n"
+    #llama_prefix = "<|start_header_id|>system<|end_header_id|>\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.<|eot_id|><|start_header_id|>user<|end_header_id|>\n" #"<start_of_turn>user\n"
+    #llama_suffix = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n" #"<end_of_turn>\n<start_of_turn>model\n"
+    #llama_prefix = "<|user|>\n" # This is for phi 3.5
+    #llama_suffix = "<|end|>\n<|assistant|>" # This is for phi 3.5
+    llama_prefix = "<start_of_turn>user\n"
+    llama_suffix = "<end_of_turn>\n<start_of_turn>model\n"
     # Reset output files on each run:
     # out_file_paths = []
+    # If you have a file input but no file data it hasn't yet been loaded. Load it here.
+    if file_data.empty:
+        print("No data table found, loading from file")
+        try:
+            print("in_data_file:", in_data_file)
+            in_colnames_drop, in_excel_sheets, file_name = put_columns_in_df(in_data_file)
+            print("in_colnames:", in_colnames_drop)
+            file_data, file_name, num_batches = load_in_data_file(in_data_file, chosen_cols, batch_size_default)
+            print("file_data loaded in:", file_data)
+        except:
+            # Check if files and text exist
+            out_message = "Please enter a data file to summarise."
+            print(out_message)
+            return out_message, existing_topics_table, existing_unique_topics_df, existing_reference_df, out_file_paths, out_file_paths, latest_batch_completed, log_files_output_paths, log_files_output_paths, whole_conversation_metadata_str, final_time, out_file_paths#, out_message
     #model_choice_clean = replace_punctuation_with_underscore(model_choice)
+    model_choice_clean = model_name_map[model_choice]
     # If this is the first time around, set variables to 0/blank
     if first_loop_state==True:
             latest_batch_completed = 0
             out_message = []
             out_file_paths = []
+            print("model_choice_clean:", model_choice_clean)
+    #print("latest_batch_completed:", str(latest_batch_completed))
     # If we have already redacted the last file, return the input out_message and file list to the relevant components
     if latest_batch_completed >= num_batches:
         out_time = f"Everything finished in {final_time} seconds."
         print(out_time)
         print("All summaries completed. Creating outputs.")
         model_choice_clean = model_name_map[model_choice]
         print("out_file_paths:", out_file_paths)
         #final_out_message = '\n'.join(out_message)
+        return display_table, existing_topics_table, existing_unique_topics_df, existing_reference_df, out_file_paths, out_file_paths, latest_batch_completed, log_files_output_paths, log_files_output_paths, whole_conversation_metadata_str, final_time, out_file_paths#, out_message
     if not out_file_paths:
         out_file_paths = []
     if model_choice == "anthropic.claude-3-sonnet-20240229-v1:0" and file_data.shape[1] > 300:
         out_message = "Your data has more than 300 rows, using the Sonnet model will be too expensive. Please choose the Haiku model instead."
         print(out_message)
         return out_message, existing_topics_table, existing_unique_topics_df, existing_reference_df, out_file_paths, out_file_paths, latest_batch_completed, log_files_output_paths, log_files_output_paths, whole_conversation_metadata_str, final_time, out_file_paths#, out_message
+    topics_loop_description = "Extracting topics from response batches (each batch of " + str(batch_size) + " responses)."
     topics_loop = tqdm(range(latest_batch_completed, num_batches), desc = topics_loop_description, unit="batches remaining")
     for i in topics_loop:
                 if model_choice in ["gemini-1.5-flash-002", "gemini-1.5-pro-002"]:
                     print("Using Gemini model:", model_choice)
                     model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=add_existing_topics_system_prompt, max_tokens=max_tokens)
+                elif model_choice in ["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"]:
                     print("Using AWS Bedrock model:", model_choice)
+                else:
+                    print("Using local model:", model_choice)
                 if candidate_topics:
                     # 'Zero shot topics' are those supplied by the user
+                    max_topic_no = 120
+                    zero_shot_topics = read_file(candidate_topics.name)
+                    if zero_shot_topics.shape[1] == 1:  # Check if there is only one column
+                        zero_shot_topics_series = zero_shot_topics.iloc[:, 0].str.strip().str.lower().str.capitalize()
+                        # Max 120 topics allowed
+                        if len(zero_shot_topics_series) > max_topic_no:
+                            print("Maximum", max_topic_no, "topics allowed to fit within large language model context limits.")
+                            zero_shot_topics_series = zero_shot_topics_series.iloc[:max_topic_no]
+                        zero_shot_topics_list = list(zero_shot_topics_series)
+                        print("Zero shot topics are:", zero_shot_topics_list)
+                        # Create the most up to date list of topics and subtopics.
+                        # If there are candidate topics, but the existing_unique_topics_df hasn't yet been constructed, then create.
+                        if existing_unique_topics_df.empty:
+                            existing_unique_topics_df = pd.DataFrame(data={'General Topic':'', 'Subtopic':zero_shot_topics_list, 'Sentiment':''})
+                        # This part concatenates all zero shot and new topics together, so that for the next prompt the LLM will have the full list available
+                        elif not existing_unique_topics_df.empty:
+                            zero_shot_topics_df = pd.DataFrame(data={'General Topic':'', 'Subtopic':zero_shot_topics_list, 'Sentiment':''})
+                            existing_unique_topics_df = pd.concat([existing_unique_topics_df, zero_shot_topics_df]).drop_duplicates("Subtopic")
+                            zero_shot_topics_list_str = zero_shot_topics_list
+                    elif set(["General Topic", "Subtopic", "Sentiment"]).issubset(zero_shot_topics.columns):
+                        # Max 120 topics allowed
+                        if zero_shot_topics.shape[0] > max_topic_no:
+                            print("Maximum", max_topic_no, "topics allowed to fit within large language model context limits.")
+                            zero_shot_topics = zero_shot_topics.iloc[:max_topic_no,:]
+                        if existing_unique_topics_df.empty:
+                            existing_unique_topics_df = pd.DataFrame(data={'General Topic':zero_shot_topics.iloc[:,0], 'Subtopic':zero_shot_topics.iloc[:,1], 'Sentiment':zero_shot_topics.iloc[:,2]})
                     #existing_unique_topics_df.to_csv(output_folder + "Existing topics with zero shot dropped.csv", index = None)
+                #all_topic_tables_df_merged = existing_unique_topics_df
+                existing_unique_topics_df["Response References"] = ""
                 unique_topics_markdown = existing_unique_topics_df[["General Topic", "Subtopic", "Sentiment"]].drop_duplicates(["General Topic", "Subtopic", "Sentiment"]).to_markdown(index=False)
                 # Format the summary prompt with the response table and topics
                 formatted_summary_prompt = add_existing_topics_prompt.format(response_table=normalised_simple_markdown_table, topics=unique_topics_markdown, consultation_context=context_textbox, column_name=chosen_cols)
+                if model_choice == "gemma_2b_it_local":
+                    # add_existing_topics_system_prompt = llama_system_prefix + add_existing_topics_system_prompt + llama_system_suffix
+                    # formatted_initial_table_prompt = llama_prefix + formatted_summary_prompt + llama_suffix
+                    formatted_initial_table_prompt = llama_prefix + add_existing_topics_system_prompt + formatted_summary_prompt + llama_suffix
                 # Define the output file path for the formatted prompt
                 formatted_prompt_output_path = output_folder + file_name + "_full_prompt_" + model_choice_clean + "_temp_" + str(temperature) + ".txt"
                 if prompt3: formatted_prompt3 = prompt3.format(response_table=normalised_simple_markdown_table)
                 else: formatted_prompt3 = prompt3
+                if model_choice == "gemma_2b_it_local":
+                    # system_prompt = llama_system_prefix + system_prompt + llama_system_suffix
+                    # formatted_initial_table_prompt = llama_prefix + formatted_initial_table_prompt + llama_suffix
+                    # formatted_prompt2 = llama_prefix + formatted_prompt2 + llama_suffix
+                    # formatted_prompt3 = llama_prefix + formatted_prompt3 + llama_suffix
+                    formatted_initial_table_prompt = llama_prefix + system_prompt + formatted_initial_table_prompt + llama_suffix
+                    formatted_prompt2 = llama_prefix + system_prompt + formatted_prompt2 + llama_suffix
+                    formatted_prompt3 = llama_prefix + system_prompt + formatted_prompt3 + llama_suffix
+                batch_prompts = [formatted_initial_table_prompt, formatted_prompt2, formatted_prompt3][:number_of_prompts_used]  # Adjust this list to send fewer requests
                 whole_conversation = [system_prompt]
                 try:
                     final_table_output_path = output_folder + batch_file_path_details + "_full_final_response_" + model_choice_clean + "_temp_" + str(temperature) + ".txt"
+                    if "choices" in responses[-1]:
+                        with open(final_table_output_path, "w", encoding='utf-8', errors='replace') as f:
+                            f.write(responses[-1]["choices"][0]['text'])
+                        display_table =responses[-1]["choices"][0]['text']
+                    else:
+                        with open(final_table_output_path, "w", encoding='utf-8', errors='replace') as f:
+                            f.write(responses[-1].text)
+                        display_table = responses[-1].text
                     log_files_output_paths.append(final_table_output_path)
                 except Exception as e:
                     print(e)
                 new_topic_df = topic_table_df
                 new_reference_df = reference_df
     return result_df
 def sample_reference_table_summaries(reference_df:pd.DataFrame,
                                      unique_topics_df:pd.DataFrame,
                                      random_seed:int,
     print("Finished summary query")
     # Extract text from the `responses` list
+    if "choices" in responses[-1]:
+        response_texts = [resp["choices"][0]['text'] for resp in responses]
+    else:
+        response_texts = [resp.text for resp in responses]
     latest_response_text = response_texts[-1]
     #print("latest_response_text:", latest_response_text)
         try:
             response, conversation_history, metadata = summarise_output_topics_query(model_choice, in_api_key, temperature, formatted_summary_prompt, summarise_topic_descriptions_system_prompt)
             summarised_output = response
+            summarised_output = re.sub(r'\n{2,}', '\n', summarised_output)  # Replace multiple line breaks with a single line break
+            summarised_output = re.sub(r'^\n{1,}', '', summarised_output)  # Remove one or more line breaks at the start
         except Exception as e:
             print(e)
             summarised_output = ""

tools/prompts.py CHANGED Viewed

@@ -1,15 +1,17 @@
-system_prompt = """You are a researcher analysing responses from a public consultation. . The subject of this consultation is: {consultation_context}. You are analysing a single question from this consultation that is {column_name}."""
-initial_table_prompt = """The responses from the consultation are shown in the following table that contains two columns - Reference and Response:
-'{response_table}'
-Based on the above table, create a markdown table to summarise the consultation responses.
 In the first column identify general topics relevant to responses. Create as many general topics as you can.
 In the second column list subtopics relevant to responses. Make the subtopics as specific as possible and make sure they cover every issue mentioned.
 In the third column write the sentiment of the subtopic: Negative, Neutral, or Positive.
-In the fourth column, write a short summary of the subtopic based on relevant responses. Highlight specific issues that appear relevant responses.
-In the fifth column list the Response reference numbers of responses relevant to the Subtopic separated by commas.
-Do not add any other columns. Return the table in markdown format, and don't include any special characters in the table. Do not add any other text to your response."""
 prompt2 = ""
@@ -17,32 +19,51 @@ prompt3 = ""
 ## Adding existing topics to consultation responses
-add_existing_topics_system_prompt = """You are a researcher analysing responses from a public consultation. The subject of this consultation is: {consultation_context}. You are analysing a single question from this consultation that is {column_name}."""
-add_existing_topics_prompt = """Responses from a recent consultation are shown in the following table:
-'{response_table}'
-And below is a table of topics currently known to be relevant to this consultation:
-'{topics}'
-Your job is to assign responses from the Response column to existing general topics and subtopics, or to new topics if no existing topics are relevant.
-Create a new markdown table to summarise the consultation responses.
-In the first and second columns, assign responses to the General Topics and Subtopics from the Topics table if they are relevant. If you cannot find a relevant topic, add new General Topics and Subtopics to the table. Make the new Subtopics as specific as possible.
 In the third column, write the sentiment of the Subtopic: Negative, Neutral, or Positive.
-In the fourth column, a short summary of the Subtopic based on relevant responses. Highlight specific issues that appear in relevant responses.
-In the fifth column, a list of Response reference numbers relevant to the Subtopic separated by commas.
-Do not add any other columns. Exclude rows for topics that are not assigned to any response. Return the table in markdown format, and do not include any special characters in the table. Do not add any other text to your response."""
-summarise_topic_descriptions_system_prompt = """You are a researcher analysing responses from a public consultation."""
-summarise_topic_descriptions_prompt = """Below is a table with number of paragraphs related to consultation responses:
 '{summaries}'
-Your job is to make a consolidated summary of the above text. Return a summary up to two paragraphs long that includes as much detail as possible from the original text. Return only the summary and no other text.
-Summary:"""

+system_prompt = """You are a researcher analysing responses from an open text dataset. You are analysing a single column from this dataset that is full of open text responses called {column_name}. The context of this analysis is: {consultation_context}. """
+initial_table_prompt = """The open text data is shown in the following table that contains two columns, Reference and Response. Response table:
+{response_table}
+Your task is to create one new markdown table with the headings 'General Topic', 'Subtopic', 'Sentiment', 'Summary', and 'Response references'.
 In the first column identify general topics relevant to responses. Create as many general topics as you can.
 In the second column list subtopics relevant to responses. Make the subtopics as specific as possible and make sure they cover every issue mentioned.
 In the third column write the sentiment of the subtopic: Negative, Neutral, or Positive.
+In the fourth column list each specific Response reference number that is relevant to the Subtopic, separated by commas. Do no write any other text in this column.
+In the fifth and final column, write a short summary of the subtopic based on relevant responses. Highlight specific issues that appear in relevant responses.
+Do not add any other columns. Do not repeat Subtopics with the same Sentiment. Return only one table in markdown format containing all relevant topics. Do not add any other text, thoughts, or notes to your response.
+New table:"""
 prompt2 = ""
 ## Adding existing topics to consultation responses
+add_existing_topics_system_prompt = system_prompt
+add_existing_topics_prompt = """Responses are shown in the following Response table:
+{response_table}
+Topics known to be relevant to this dataset are shown in the following Topics table:
+{topics}
+Your task is to create one new markdown table, assigning responses from the Response table to existing topics, or to create new topics if no existing topics are relevant.
+Create a new markdown table with the headings 'General Topic', 'Subtopic', 'Sentiment', 'Summary', and 'Response references'.
+In the first and second columns, assign General Topics and Subtopics to Responses. Assign topics from the Topics table above if they are very relevant to the text of the Response. If you find a new topic that does not exist in the Topics table, add a new row to the new table. Make the General Topic and Subtopic as specific as possible.
 In the third column, write the sentiment of the Subtopic: Negative, Neutral, or Positive.
+In the fourth column list each specific Response reference number that is relevant to the Subtopic, separated by commas. Do no write any other text in this column.
+In the fifth and final column, write a short summary of the Subtopic based on relevant responses. Highlight specific issues that appear in relevant responses.
+Do not add any other columns. Remove topics from the table that are not assigned to any response. Do not repeat Subtopics with the same Sentiment.
+Return only one table in markdown format containing all relevant topics. Do not add any other text, thoughts, or notes to your response.
+New table:"""
+summarise_topic_descriptions_system_prompt = system_prompt
+summarise_topic_descriptions_prompt = """Below is a table with number of paragraphs related to the data from the open text column:
 '{summaries}'
+Your task is to make a consolidated summary of the above text. Return a summary up to two paragraphs long that includes as much detail as possible from the original text. Return only the summary and no other text.
+Summary:"""
+# example_instruction_prompt_llama3 = """<|start_header_id|>system<|end_header_id|>\n
+# You are an AI assistant that follows instruction extremely well. Help as much as you can.<|eot_id|><|start_header_id|>user<|end_header_id|>\n
+# Summarise the following text in less than {length} words: "{text}"\n
+# Summary:<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"""
+# example_instruction_prompt_phi3 = """<|user|>\n
+# Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.\n
+# CONTENT: {summaries}\n
+# QUESTION: {question}\n
+# Answer:<|end|>\n
+# <|assistant|>"""
+# example_instruction_prompt_gemma = """<start_of_turn>user
+# Categorise the following text into only one of the following categories that seems most relevant: 'cat1', 'cat2', 'cat3', 'cat4'. Answer only with the choice of category. Do not add any other text. Do not explain your choice.
+# Text: {text}<end_of_turn>
+# <start_of_turn>model
+# Category:"""