File size: 26,476 Bytes
0ad40ce
a93e85e
0ad40ce
 
a93e85e
 
 
0ad40ce
 
 
a93e85e
 
318c146
0ad40ce
8e115d0
 
 
a93e85e
 
 
 
 
 
 
 
 
8e115d0
 
 
 
 
 
0ad40ce
 
8af57c5
a93e85e
 
 
 
 
 
 
 
 
 
 
 
 
 
0ad40ce
 
a93e85e
0ad40ce
d44a8b8
0ad40ce
 
a93e85e
0ad40ce
 
15eadf4
0ad40ce
 
a93e85e
15eadf4
a93e85e
 
0ad40ce
 
 
a93e85e
0ad40ce
 
 
 
 
 
a93e85e
0ad40ce
 
 
 
 
 
 
a93e85e
0ad40ce
a93e85e
 
 
 
 
0ad40ce
 
a93e85e
0ad40ce
 
 
 
 
 
a93e85e
0ad40ce
 
 
 
318c146
a93e85e
8e115d0
318c146
8e115d0
0ad40ce
 
 
a93e85e
0ad40ce
 
 
 
a93e85e
8af57c5
a93e85e
 
 
 
 
 
 
 
 
d44a8b8
 
 
 
a93e85e
 
 
 
 
d44a8b8
 
949c8ba
d44a8b8
 
949c8ba
d44a8b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a93e85e
 
 
d44a8b8
 
 
 
 
 
 
 
 
a93e85e
 
 
 
 
 
 
 
 
 
 
d44a8b8
 
 
949c8ba
d44a8b8
 
a93e85e
 
 
 
d44a8b8
 
 
 
 
 
 
 
 
 
a93e85e
 
 
 
 
d44a8b8
 
949c8ba
d44a8b8
 
a93e85e
d44a8b8
 
 
949c8ba
d44a8b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a93e85e
 
 
d44a8b8
 
a93e85e
 
 
 
d44a8b8
 
a93e85e
 
832b728
8af57c5
a93e85e
 
 
 
 
 
 
 
 
8af57c5
 
 
949c8ba
8af57c5
 
 
949c8ba
8af57c5
 
 
 
832b728
8af57c5
a93e85e
 
832b728
a93e85e
 
 
 
 
 
 
 
 
 
 
 
949c8ba
a93e85e
8af57c5
a93e85e
 
 
 
 
 
 
 
 
 
 
8af57c5
 
949c8ba
 
a93e85e
949c8ba
 
 
a93e85e
 
 
 
949c8ba
 
a93e85e
949c8ba
 
 
 
 
a93e85e
949c8ba
 
a93e85e
949c8ba
 
 
a93e85e
949c8ba
 
 
a93e85e
 
 
 
949c8ba
 
a93e85e
949c8ba
 
 
 
 
a93e85e
949c8ba
 
 
 
a93e85e
949c8ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a93e85e
949c8ba
 
 
 
 
 
 
 
 
 
 
a93e85e
 
949c8ba
 
 
a93e85e
 
 
 
 
 
949c8ba
 
 
a93e85e
 
 
 
949c8ba
a93e85e
 
 
 
 
949c8ba
 
a93e85e
8af57c5
a93e85e
 
 
 
 
 
 
 
 
 
 
 
949c8ba
 
8af57c5
a93e85e
8af57c5
a93e85e
8af57c5
 
a93e85e
949c8ba
 
a93e85e
 
949c8ba
 
 
 
 
a93e85e
 
 
949c8ba
 
 
 
 
 
 
a93e85e
 
 
 
949c8ba
 
 
a93e85e
 
8af57c5
d44a8b8
 
0ad40ce
d44a8b8
949c8ba
0ad40ce
d44a8b8
a93e85e
 
 
 
 
 
 
d44a8b8
 
 
a93e85e
 
 
 
 
d44a8b8
949c8ba
 
d44a8b8
832b728
 
 
 
 
 
 
 
 
a93e85e
 
 
 
 
 
 
832b728
a93e85e
832b728
 
 
a93e85e
832b728
 
 
 
 
 
 
 
 
 
a93e85e
d44a8b8
949c8ba
832b728
 
 
 
 
 
 
 
 
8af57c5
832b728
 
 
 
 
 
 
a93e85e
d44a8b8
8af57c5
832b728
 
 
 
 
 
 
 
 
a93e85e
832b728
 
 
 
 
 
949c8ba
 
 
 
 
 
 
 
 
a93e85e
 
 
 
 
949c8ba
 
a93e85e
 
 
 
 
8af57c5
949c8ba
 
 
 
 
 
a93e85e
 
 
 
 
 
 
 
 
949c8ba
 
a93e85e
 
 
 
 
949c8ba
 
a93e85e
 
 
 
 
 
949c8ba
 
 
a93e85e
 
 
 
 
 
 
 
 
 
 
949c8ba
a93e85e
 
 
 
949c8ba
832b728
 
 
 
 
 
 
 
 
 
 
 
 
8af57c5
a93e85e
 
 
 
 
 
 
832b728
 
949c8ba
 
 
 
 
 
a93e85e
 
 
 
 
949c8ba
 
a93e85e
 
 
 
 
949c8ba
8af57c5
a93e85e
 
 
 
 
8af57c5
 
 
949c8ba
 
 
a93e85e
949c8ba
a93e85e
 
 
 
 
8af57c5
949c8ba
 
 
a93e85e
 
 
 
 
 
 
 
 
832b728
a93e85e
 
 
 
832b728
 
 
 
 
 
 
 
 
 
 
 
 
 
a93e85e
 
 
 
 
 
 
 
832b728
a93e85e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
import os
import re
import streamlit as st
from tempfile import NamedTemporaryFile
import anthropic

# Import necessary modules from LangChain
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Function to remove code block markers from the answer
def remove_code_blocks(text):
    """
    Removes code block markers from the answer text.

    Args:
        text (str): The text from which code block markers should be removed.

    Returns:
        str: The text without code block markers.
    """
    code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
    match = re.match(code_block_pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        return text

# Function to process PDF, run Q&A, and return results
def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
    """
    Processes a PDF file, runs Q&A, and returns the results.

    Args:
        api_key (str): OpenAI API key.
        uploaded_file: Uploaded PDF file.
        questions_path (str): Path to the questions file.
        prompt_path (str): Path to the system prompt file.
        display_placeholder: Streamlit placeholder for displaying results.

    Returns:
        list: List of QA results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key

    # Save the uploaded PDF to a temporary file
    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        temp_pdf.write(uploaded_file.read())
        temp_pdf_path = temp_pdf.name

    # Load and split the PDF into documents
    loader = PyPDFLoader(temp_pdf_path)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
    splits = text_splitter.split_documents(docs)

    # Create a vector store from the documents
    vectorstore = FAISS.from_documents(
        documents=splits,
        embedding=OpenAIEmbeddings(model="text-embedding-3-large")
    )
    retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

    # Load the system prompt
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    # Initialize the language model
    llm = ChatOpenAI(model="gpt-4o")

    # Create the question-answering chain
    question_answer_chain = create_stuff_documents_chain(
        llm, prompt, document_variable_name="context"
    )
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

    # Load the questions
    if os.path.exists(questions_path):
        with open(questions_path, "r") as file:
            questions = [line.strip() for line in file.readlines() if line.strip()]
    else:
        raise FileNotFoundError(f"The specified file was not found: {questions_path}")

    # Process each question
    qa_results = []
    for question in questions:
        result = rag_chain.invoke({"input": question})
        answer = result["answer"]

        # Remove code block markers
        answer = remove_code_blocks(answer)

        qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
        qa_results.append(qa_text)
        display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)

    # Clean up temporary PDF file
    os.remove(temp_pdf_path)

    return qa_results

# Function to perform multi-plan QA using an existing vector store
def process_multi_plan_qa(api_key, input_text, display_placeholder):
    """
    Performs multi-plan QA using an existing shared vector store.

    Args:
        api_key (str): OpenAI API key.
        input_text (str): The question to ask.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key

    # Load the existing vector store
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    vector_store = FAISS.load_local(
        "Combined_Summary_Vectorstore",
        embeddings,
        allow_dangerous_deserialization=True
    )

    # Convert the vector store to a retriever
    retriever = vector_store.as_retriever(search_kwargs={"k": 50})

    # Read the system prompt for multi-document QA
    prompt_path = "Prompts/multi_document_qa_system_prompt.md"
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    # Create the question-answering chain
    llm = ChatOpenAI(model="gpt-4o")
    question_answer_chain = create_stuff_documents_chain(
        llm, prompt, document_variable_name="context"
    )
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

    # Process the input text
    result = rag_chain.invoke({"input": input_text})
    answer = result["answer"]

    # Display the answer
    display_placeholder.markdown(f"**Answer:**\n{answer}")

# Function to perform multi-plan QA using multiple individual vector stores
def process_multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeholder):
    """
    Performs multi-plan QA using multiple individual vector stores.

    Args:
        api_key (str): OpenAI API key.
        input_text (str): The question to ask.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key

    # Directory containing individual vector stores
    vectorstore_directory = "Individual_Summary_Vectorstores"

    # List all vector store directories
    vectorstore_names = [
        d for d in os.listdir(vectorstore_directory)
        if os.path.isdir(os.path.join(vectorstore_directory, d))
    ]

    # Initialize a list to collect all retrieved chunks
    all_retrieved_chunks = []

    # Process each vector store
    for vectorstore_name in vectorstore_names:
        vectorstore_path = os.path.join(vectorstore_directory, vectorstore_name)

        # Load the vector store
        embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
        vector_store = FAISS.load_local(
            vectorstore_path,
            embeddings,
            allow_dangerous_deserialization=True
        )

        # Convert the vector store to a retriever
        retriever = vector_store.as_retriever(search_kwargs={"k": 2})

        # Retrieve relevant chunks for the input text
        retrieved_chunks = retriever.invoke(input_text)
        all_retrieved_chunks.extend(retrieved_chunks)

    # Read the system prompt for multi-document QA
    prompt_path = "Prompts/multi_document_qa_system_prompt.md"
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    # Create the question-answering chain
    llm = ChatOpenAI(model="gpt-4o")
    question_answer_chain = create_stuff_documents_chain(
        llm, prompt, document_variable_name="context"
    )

    # Process the combined context
    result = question_answer_chain.invoke({
        "input": input_text,
        "context": all_retrieved_chunks
    })

    # Display the answer
    answer = result["answer"] if "answer" in result else result
    display_placeholder.markdown(f"**Answer:**\n{answer}")

def load_documents_from_pdf(file):
    """
    Loads documents from a PDF file.

    Args:
        file: Uploaded PDF file.

    Returns:
        list: List of documents.
    """
    # Check if the file is a PDF
    if not file.name.endswith('.pdf'):
        raise ValueError("The uploaded file is not a PDF. Please upload a PDF file.")

    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        temp_pdf.write(file.read())
        temp_pdf_path = temp_pdf.name

    loader = PyPDFLoader(temp_pdf_path)
    docs = loader.load()
    os.remove(temp_pdf_path)
    return docs

def load_vector_store_from_path(path):
    """
    Loads a vector store from a given path.

    Args:
        path (str): Path to the vector store.

    Returns:
        FAISS: Loaded vector store.
    """
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    return FAISS.load_local(
        path,
        embeddings,
        allow_dangerous_deserialization=True
    )

# Function to compare documents via one-to-many query approach
def process_one_to_many_query(api_key, focus_input, comparison_inputs, input_text, display_placeholder):
    """
    Compares a focus document against multiple comparison documents using a one-to-many query approach.

    Args:
        api_key (str): OpenAI API key.
        focus_input: Focus document (uploaded file or path to vector store).
        comparison_inputs: List of comparison documents (uploaded files or paths to vector stores).
        input_text (str): The comparison question to ask.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key
    print(comparison_inputs)
    # Load focus documents or vector store
    if isinstance(focus_input, st.runtime.uploaded_file_manager.UploadedFile):
        # If focus_input is an uploaded PDF file
        focus_docs = load_documents_from_pdf(focus_input)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
        focus_splits = text_splitter.split_documents(focus_docs)
        focus_vector_store = FAISS.from_documents(
            focus_splits,
            OpenAIEmbeddings(model="text-embedding-3-large")
        )
        focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
    elif isinstance(focus_input, str) and os.path.isdir(focus_input):
        # If focus_input is a path to a vector store
        focus_vector_store = load_vector_store_from_path(focus_input)
        focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
    else:
        raise ValueError("Invalid focus input type. Must be a PDF file or a path to a vector store.")

    # Retrieve relevant chunks from the focus document
    focus_docs = focus_retriever.invoke(input_text)

    # Initialize list to collect comparison chunks
    comparison_chunks = []
    for comparison_input in comparison_inputs:
        if isinstance(comparison_input, st.runtime.uploaded_file_manager.UploadedFile):
            # If comparison_input is an uploaded PDF file
            comparison_docs = load_documents_from_pdf(comparison_input)
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
            comparison_splits = text_splitter.split_documents(comparison_docs)
            comparison_vector_store = FAISS.from_documents(
                comparison_splits,
                OpenAIEmbeddings(model="text-embedding-3-large")
            )
            comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
        elif isinstance(comparison_input, str) and os.path.isdir(comparison_input):
            # If comparison_input is a path to a vector store
            comparison_vector_store = load_vector_store_from_path(comparison_input)
            comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
        else:
            raise ValueError("Invalid comparison input type. Must be a PDF file or a path to a vector store.")

        # Retrieve relevant chunks from the comparison document
        comparison_docs = comparison_retriever.invoke(input_text)
        comparison_chunks.extend(comparison_docs)

    # Construct the combined context
    combined_context = focus_docs + comparison_chunks

    # Read the system prompt
    prompt_path = "Prompts/comparison_prompt.md"
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}")
        ]
    )

    # Create the question-answering chain
    llm = ChatOpenAI(model="gpt-4o")
    question_answer_chain = create_stuff_documents_chain(
        llm,
        prompt,
        document_variable_name="context"
    )

    # Process the combined context
    result = question_answer_chain.invoke({
        "context": combined_context,
        "input": input_text
    })

    # Display the answer
    answer = result["answer"] if "answer" in result else result
    display_placeholder.markdown(f"**Answer:**\n{answer}")

# Function to list vector store documents
def list_vector_store_documents():
    """
    Lists available vector store documents.

    Returns:
        list: List of document names.
    """
    # Assuming documents are stored in the "Individual_All_Vectorstores" directory
    directory_path = "Individual_All_Vectorstores"
    if not os.path.exists(directory_path):
        raise FileNotFoundError(
            f"The directory '{directory_path}' does not exist. "
            "Run `create_and_save_individual_vector_stores()` to create it."
        )
    # List all available vector stores by document name
    documents = [
        f.replace("_vectorstore", "").replace("_", " ")
        for f in os.listdir(directory_path)
        if f.endswith("_vectorstore")
    ]
    return documents

# Function to compare plans using a long context model
def compare_with_long_context(api_key, anthropic_api_key, input_text, focus_plan_path, selected_summaries, display_placeholder):
    """
    Compares plans using a long context model.

    Args:
        api_key (str): OpenAI API key.
        anthropic_api_key (str): Anthropic API key.
        input_text (str): The comparison question to ask.
        focus_plan_path: Path to the focus plan or uploaded file.
        selected_summaries (list): List of selected summary documents.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the API keys
    os.environ["OPENAI_API_KEY"] = api_key
    os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key

    # Load focus documents
    if isinstance(focus_plan_path, st.runtime.uploaded_file_manager.UploadedFile):
        # If focus_plan_path is an uploaded file
        focus_docs = load_documents_from_pdf(focus_plan_path)
    elif isinstance(focus_plan_path, str):
        # If focus_plan_path is a file path
        focus_loader = PyPDFLoader(focus_plan_path)
        focus_docs = focus_loader.load()
    else:
        raise ValueError("Invalid focus plan input type. Must be an uploaded file or a file path.")

    # Concatenate selected summary documents
    summaries_directory = "CAPS_Summaries"
    summaries_content = ""
    for filename in selected_summaries:
        # Fix the filename by replacing ' Summary' with '_Summary'
        summary_filename = f"{filename.replace(' Summary', '_Summary')}.md"
        with open(os.path.join(summaries_directory, summary_filename), 'r') as file:
            summaries_content += file.read() + "\n\n"

    # Prepare the context
    focus_context = "\n\n".join([doc.page_content for doc in focus_docs])

    # Create the client and message
    client = anthropic.Anthropic(api_key=anthropic_api_key)
    response = client.completions.create(
        model="claude-2",
        max_tokens_to_sample=1024,
        prompt=f"{input_text}\n\nFocus Document:\n{focus_context}\n\nSummaries:\n{summaries_content}"
    )

    # Display the answer
    answer = response.completion
    display_placeholder.markdown(f"**Answer:**\n{answer}", unsafe_allow_html=True)

# Streamlit app layout with tabs
st.title("Climate Policy Analysis Tool")

# API Key Input
api_key = st.text_input("Enter your OpenAI API key:", type="password", key="openai_key")

# Create tabs
tab1, tab2, tab3, tab4, tab5 = st.tabs([
    "Summary Generation",
    "Multi-Plan QA (Shared Vectorstore)",
    "Multi-Plan QA (Multi-Vectorstore)",
    "Plan Comparison Tool",
    "Plan Comparison with Long Context Model"
])

# First tab: Summary Generation
with tab1:
    uploaded_file = st.file_uploader(
        "Upload a Climate Action Plan in PDF format",
        type="pdf",
        key="upload_file"
    )

    prompt_file_path = "Prompts/summary_tool_system_prompt.md"
    questions_file_path = "Prompts/summary_tool_questions.md"

    if st.button("Generate", key="generate_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not uploaded_file:
            st.warning("Please upload a PDF file.")
        else:
            display_placeholder = st.empty()
            with st.spinner("Processing..."):
                try:
                    results = process_pdf(
                        api_key,
                        uploaded_file,
                        questions_file_path,
                        prompt_file_path,
                        display_placeholder
                    )
                    markdown_text = "\n".join(results)

                    # Use the uploaded file's name for the download file
                    base_name = os.path.splitext(uploaded_file.name)[0]
                    download_file_name = f"{base_name}_Summary.md"

                    st.download_button(
                        label="Download Results as Markdown",
                        data=markdown_text,
                        file_name=download_file_name,
                        mime="text/markdown",
                        key="download_button"
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Second tab: Multi-Plan QA (Shared Vectorstore)
with tab2:
    input_text = st.text_input("Ask a question:", key="multi_plan_input")
    if st.button("Ask", key="multi_plan_qa_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not input_text:
            st.warning("Please enter a question.")
        else:
            display_placeholder2 = st.empty()
            with st.spinner("Processing..."):
                try:
                    process_multi_plan_qa(
                        api_key,
                        input_text,
                        display_placeholder2
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Third tab: Multi-Plan QA (Multi-Vectorstore)
with tab3:
    user_input = st.text_input("Ask a question:", key="multi_vectorstore_input")
    if st.button("Ask", key="multi_vectorstore_qa_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not user_input:
            st.warning("Please enter a question.")
        else:
            display_placeholder3 = st.empty()
            with st.spinner("Processing..."):
                try:
                    process_multi_plan_qa_multi_vectorstore(
                        api_key,
                        user_input,
                        display_placeholder3
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Fourth tab: Plan Comparison Tool
with tab4:
    st.header("Plan Comparison Tool")

    # List of documents from vector stores
    vectorstore_documents = list_vector_store_documents()

    # Option to upload a new plan or select from existing vector stores
    focus_option = st.radio(
        "Choose a focus plan:",
        ("Select from existing vector stores", "Upload a new plan"),
        key="focus_option"
    )

    if focus_option == "Upload a new plan":
        focus_uploaded_file = st.file_uploader(
            "Upload a Climate Action Plan to compare",
            type="pdf",
            key="focus_upload"
        )
        if focus_uploaded_file is not None:
            # Directly use the uploaded file
            focus_input = focus_uploaded_file
        else:
            focus_input = None
    else:
        # Select a focus plan from existing vector stores
        selected_focus_plan = st.selectbox(
            "Select a focus plan:",
            vectorstore_documents,
            key="select_focus_plan"
        )
        focus_input = os.path.join(
            "Individual_All_Vectorstores",
            f"{selected_focus_plan.replace(' Summary', '_Summary')}_vectorstore"
        )

    # Option to upload comparison documents or select from existing vector stores
    comparison_option = st.radio(
        "Choose comparison documents:",
        ("Select from existing vector stores", "Upload new documents"),
        key="comparison_option"
    )

    if comparison_option == "Upload new documents":
        comparison_files = st.file_uploader(
            "Upload comparison documents",
            type="pdf",
            accept_multiple_files=True,
            key="comparison_files"
        )
        comparison_inputs = comparison_files
    else:
        # Select comparison documents from existing vector stores
        selected_comparison_plans = st.multiselect(
            "Select comparison documents:",
            vectorstore_documents,
            key="select_comparison_plans"
        )
        comparison_inputs = [
            os.path.join(
                "Individual_All_Vectorstores",
                f"{doc.replace(' Summary', '_Summary')}_vectorstore"
            ) for doc in selected_comparison_plans
        ]

    input_text = st.text_input(
        "Ask a comparison question:",
        key="comparison_input"
    )

    if st.button("Compare", key="compare_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not input_text:
            st.warning("Please enter a comparison question.")
        elif not focus_input:
            st.warning("Please provide a focus plan.")
        elif not comparison_inputs:
            st.warning("Please provide comparison documents.")
        else:
            display_placeholder4 = st.empty()
            with st.spinner("Processing..."):
                try:
                    # Call the process_one_to_many_query function
                    process_one_to_many_query(
                        api_key,
                        focus_input,
                        comparison_inputs,
                        input_text,
                        display_placeholder4
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Fifth tab: Plan Comparison with Long Context Model
with tab5:
    st.header("Plan Comparison with Long Context Model")

    # Anthropics API Key Input
    anthropic_api_key = st.text_input(
        "Enter your Anthropic API key:",
        type="password",
        key="anthropic_key"
    )

    # Option to upload a new plan or select from a list
    focus_option = st.radio(
        "Choose a focus plan:",
        ("Select from existing plans", "Upload a new plan"),
        key="focus_option_long_context"
    )

    if focus_option == "Upload a new plan":
        focus_uploaded_file = st.file_uploader(
            "Upload a Climate Action Plan to compare",
            type="pdf",
            key="focus_upload_long_context"
        )
        if focus_uploaded_file is not None:
            # Directly use the uploaded file
            focus_plan_path = focus_uploaded_file
        else:
            focus_plan_path = None
    else:
        # List of existing plans in CAPS
        plan_list = [f.replace(".pdf", "") for f in os.listdir("CAPS") if f.endswith('.pdf')]
        selected_focus_plan = st.selectbox(
            "Select a focus plan:",
            plan_list,
            key="select_focus_plan_long_context"
        )
        focus_plan_path = os.path.join("CAPS", f"{selected_focus_plan}.pdf")

    # List available summary documents for selection
    summaries_directory = "CAPS_Summaries"
    summary_files = [
        f.replace(".md", "").replace("_", " ")
        for f in os.listdir(summaries_directory) if f.endswith('.md')
    ]
    selected_summaries = st.multiselect(
        "Select summary documents for comparison:",
        summary_files,
        key="selected_summaries"
    )

    input_text = st.text_input(
        "Ask a comparison question:",
        key="comparison_input_long_context"
    )

    if st.button("Compare with Long Context", key="compare_button_long_context"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not anthropic_api_key:
            st.warning("Please provide your Anthropic API key.")
        elif not input_text:
            st.warning("Please enter a comparison question.")
        elif not focus_plan_path:
            st.warning("Please provide a focus plan.")
        else:
            display_placeholder = st.empty()
            with st.spinner("Processing..."):
                try:
                    compare_with_long_context(
                        api_key,
                        anthropic_api_key,
                        input_text,
                        focus_plan_path,
                        selected_summaries,
                        display_placeholder
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")