File size: 6,542 Bytes
60929fd
f51bb92
 
 
9d89b34
f51bb92
 
 
fc2cb23
f51bb92
f2daaee
f51bb92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d89b34
 
 
 
 
f51bb92
8f6647c
 
fc2cb23
 
 
 
 
 
 
 
8f6647c
fc2cb23
8f6647c
fc2cb23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f6647c
f51bb92
f2daaee
f51bb92
 
fc2cb23
 
 
 
e029e22
aaaac46
fc2cb23
e029e22
 
 
aaaac46
e029e22
 
 
 
 
 
fc2cb23
e029e22
b409192
 
3a1356f
b409192
 
 
 
 
 
 
 
 
3a1356f
b409192
 
 
 
 
3a1356f
b409192
3a1356f
 
 
b409192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28ba961
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from config.prompts import prompts
import chainlit as cl


def get_sources(res, answer, stream=True, view_sources=False):
    source_elements = []
    source_dict = {}  # Dictionary to store URL elements

    for idx, source in enumerate(res["context"]):
        source_metadata = source.metadata
        url = source_metadata.get("source", "N/A")
        score = source_metadata.get("score", "N/A")
        page = source_metadata.get("page", 1)

        lecture_tldr = source_metadata.get("tldr", "N/A")
        lecture_recording = source_metadata.get("lecture_recording", "N/A")
        suggested_readings = source_metadata.get("suggested_readings", "N/A")
        date = source_metadata.get("date", "N/A")

        source_type = source_metadata.get("source_type", "N/A")

        url_name = f"{url}_{page}"
        if url_name not in source_dict:
            source_dict[url_name] = {
                "text": source.page_content,
                "url": url,
                "score": score,
                "page": page,
                "lecture_tldr": lecture_tldr,
                "lecture_recording": lecture_recording,
                "suggested_readings": suggested_readings,
                "date": date,
                "source_type": source_type,
            }
        else:
            source_dict[url_name]["text"] += f"\n\n{source.page_content}"

    full_answer = ""  # Not to include the answer again if streaming

    if not stream:  # First, display the answer if not streaming
        full_answer = "**Answer:**\n"
        full_answer += answer

    if view_sources:
        # Then, display the sources
        # check if the answer has sources
        if len(source_dict) == 0:
            full_answer += "\n\n**No sources found.**"
            return full_answer, source_elements, source_dict
        else:
            full_answer += "\n\n**Sources:**\n"
            for idx, (url_name, source_data) in enumerate(source_dict.items()):
                full_answer += f"\nSource {idx + 1} (Score: {source_data['score']}): {source_data['url']}\n"

                name = f"Source {idx + 1} Text\n"
                full_answer += name
                source_elements.append(
                    cl.Text(name=name, content=source_data["text"], display="side")
                )

                # Add a PDF element if the source is a PDF file
                if source_data["url"].lower().endswith(".pdf"):
                    name = f"Source {idx + 1} PDF\n"
                    full_answer += name
                    pdf_url = f"{source_data['url']}#page={source_data['page']+1}"
                    source_elements.append(
                        cl.Pdf(name=name, url=pdf_url, display="side")
                    )

            full_answer += "\n**Metadata:**\n"
            for idx, (url_name, source_data) in enumerate(source_dict.items()):
                full_answer += f"\nSource {idx + 1} Metadata:\n"
                source_elements.append(
                    cl.Text(
                        name=f"Source {idx + 1} Metadata",
                        content=f"Source: {source_data['url']}\n"
                        f"Page: {source_data['page']}\n"
                        f"Type: {source_data['source_type']}\n"
                        f"Date: {source_data['date']}\n"
                        f"TL;DR: {source_data['lecture_tldr']}\n"
                        f"Lecture Recording: {source_data['lecture_recording']}\n"
                        f"Suggested Readings: {source_data['suggested_readings']}\n",
                        display="side",
                    )
                )

    return full_answer, source_elements, source_dict


def get_prompt(config, prompt_type):
    llm_params = config["llm_params"]
    llm_loader = llm_params["llm_loader"]
    use_history = llm_params["use_history"]
    llm_style = llm_params["llm_style"].lower()

    if prompt_type == "qa":
        if llm_loader == "local_llm":
            if use_history:
                return prompts["tiny_llama"]["prompt_with_history"]
            else:
                return prompts["tiny_llama"]["prompt_no_history"]
        else:
            if use_history:
                return prompts["openai"]["prompt_with_history"][llm_style]
            else:
                return prompts["openai"]["prompt_no_history"]
    elif prompt_type == "rephrase":
        return prompts["openai"]["rephrase_prompt"]


# TODO: Do this better
def get_history_chat_resume(steps, k, SYSTEM, LLM):
    conversation_list = []
    count = 0
    for step in reversed(steps):
        if step["name"] not in [SYSTEM]:
            if step["type"] == "user_message":
                conversation_list.append(
                    {"type": "user_message", "content": step["output"]}
                )
                count += 1
            elif step["type"] == "assistant_message":
                if step["name"] == LLM:
                    conversation_list.append(
                        {"type": "ai_message", "content": step["output"]}
                    )
                    count += 1
            else:
                pass
                # raise ValueError("Invalid message type")
        # count += 1
        if count >= 2 * k:  # 2 * k to account for both user and assistant messages
            break
    conversation_list = conversation_list[::-1]
    return conversation_list


def get_history_setup_llm(memory_list):
    conversation_list = []
    for message in memory_list:
        message_dict = message.to_dict() if hasattr(message, "to_dict") else message

        # Check if the type attribute is present as a key or attribute
        message_type = (
            message_dict.get("type", None)
            if isinstance(message_dict, dict)
            else getattr(message, "type", None)
        )

        # Check if content is present as a key or attribute
        message_content = (
            message_dict.get("content", None)
            if isinstance(message_dict, dict)
            else getattr(message, "content", None)
        )

        if message_type in ["ai", "ai_message"]:
            conversation_list.append({"type": "ai_message", "content": message_content})
        elif message_type in ["human", "user_message"]:
            conversation_list.append(
                {"type": "user_message", "content": message_content}
            )
        else:
            raise ValueError("Invalid message type")

    return conversation_list


def get_last_config(steps):
    # TODO: Implement this function
    return None