andreped commited on
Commit
2392ba8
·
1 Parent(s): 1e53020

Refactoring

Browse files
Files changed (1) hide show
  1. knowledge_gpt/main.py +91 -82
knowledge_gpt/main.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
 
3
  from knowledge_gpt.components.sidebar import sidebar
@@ -19,103 +21,110 @@ from knowledge_gpt.core.qa import query_folder
19
  from knowledge_gpt.core.utils import get_llm
20
 
21
 
22
- EMBEDDING = "openai"
23
- VECTOR_STORE = "faiss"
24
- MODEL_LIST = ["gpt-3.5-turbo", "gpt-4"]
25
-
26
- # Uncomment to enable debug mode
27
- # MODEL_LIST.insert(0, "debug")
28
-
29
- st.set_page_config(page_title="KnowledgeGPT", page_icon="📖", layout="wide")
30
- st.header("📖KnowledgeGPT")
31
-
32
- # Enable caching for expensive functions
33
- bootstrap_caching()
34
-
35
- sidebar()
36
-
37
- openai_api_key = st.session_state.get("OPENAI_API_KEY")
38
-
39
-
40
- if not openai_api_key:
41
- st.warning(
42
- "Enter your OpenAI API key in the sidebar. You can get a key at"
43
- " https://platform.openai.com/account/api-keys."
44
- )
45
-
46
-
47
- uploaded_file = st.file_uploader(
48
- "Upload a pdf, docx, or txt file",
49
- type=["pdf", "docx", "txt"],
50
- help="Scanned documents are not supported yet!",
51
- )
52
-
53
- model: str = st.selectbox("Model", options=MODEL_LIST) # type: ignore
54
-
55
- with st.expander("Advanced Options"):
56
- return_all_chunks = st.checkbox("Show all chunks retrieved from vector search")
57
- show_full_doc = st.checkbox("Show parsed contents of the document")
58
 
59
 
60
- if not uploaded_file:
61
- st.stop()
 
 
62
 
63
- try:
64
- file = read_file(uploaded_file)
65
- except Exception as e:
66
- display_file_read_error(e, file_name=uploaded_file.name)
67
 
68
- chunked_file = chunk_file(file, chunk_size=300, chunk_overlap=0)
 
69
 
70
- if not is_file_valid(file):
71
- st.stop()
72
 
 
73
 
74
- if not is_open_ai_key_valid(openai_api_key, model):
75
- st.stop()
76
 
 
 
 
 
 
77
 
78
- with st.spinner("Indexing document... This may take a while⏳"):
79
- folder_index = embed_files(
80
- files=[chunked_file],
81
- embedding=EMBEDDING if model != "debug" else "debug",
82
- vector_store=VECTOR_STORE if model != "debug" else "debug",
83
- openai_api_key=openai_api_key,
84
  )
85
 
86
- with st.form(key="qa_form"):
87
- query = st.text_area("Ask a question about the document")
88
- submit = st.form_submit_button("Submit")
89
-
90
 
91
- if show_full_doc:
92
- with st.expander("Document"):
93
- # Hack to get around st.markdown rendering LaTeX
94
- st.markdown(f"<p>{wrap_doc_in_html(file.docs)}</p>", unsafe_allow_html=True)
95
 
96
-
97
- if submit:
98
- if not is_query_valid(query):
99
  st.stop()
100
 
101
- # Output Columns
102
- answer_col, sources_col = st.columns(2)
 
 
103
 
104
- llm = get_llm(model=model, openai_api_key=openai_api_key, temperature=0)
105
- result = query_folder(
106
- folder_index=folder_index,
107
- query=query,
108
- return_all=return_all_chunks,
109
- llm=llm,
110
- )
111
 
112
- with answer_col:
113
- st.markdown("#### Answer")
114
- st.markdown(result.answer)
 
 
115
 
116
- with sources_col:
117
- st.markdown("#### Sources")
118
- for source in result.sources:
119
- st.markdown(source.page_content)
120
- st.markdown(source.metadata["source"])
121
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
  import streamlit as st
4
 
5
  from knowledge_gpt.components.sidebar import sidebar
 
21
  from knowledge_gpt.core.utils import get_llm
22
 
23
 
24
+ # add all secrets into environmental variables
25
+ try:
26
+ for key, value in st.secrets.items():
27
+ os.environ[key] = value
28
+ except FileNotFoundError as e:
29
+ print(e)
30
+ print("./streamlit/secrets.toml not found. Assuming secrets are already available" "as environmental variables...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
 
33
+ def main():
34
+ EMBEDDING = "openai"
35
+ VECTOR_STORE = "faiss"
36
+ MODEL_LIST = ["gpt-3.5-turbo", "gpt-4"]
37
 
38
+ # Uncomment to enable debug mode
39
+ # MODEL_LIST.insert(0, "debug")
 
 
40
 
41
+ st.set_page_config(page_title="KnowledgeGPT", page_icon="📖", layout="wide")
42
+ st.header("📖KnowledgeGPT")
43
 
44
+ # Enable caching for expensive functions
45
+ bootstrap_caching()
46
 
47
+ sidebar()
48
 
49
+ openai_api_key = st.session_state.get("OPENAI_API_KEY")
 
50
 
51
+ if not openai_api_key:
52
+ st.warning(
53
+ "Enter your OpenAI API key in the sidebar. You can get a key at"
54
+ " https://platform.openai.com/account/api-keys."
55
+ )
56
 
57
+ uploaded_file = st.file_uploader(
58
+ "Upload a pdf, docx, or txt file",
59
+ type=["pdf", "docx", "txt"],
60
+ help="Scanned documents are not supported yet!",
 
 
61
  )
62
 
63
+ model: str = st.selectbox("Model", options=MODEL_LIST) # type: ignore
 
 
 
64
 
65
+ with st.expander("Advanced Options"):
66
+ return_all_chunks = st.checkbox("Show all chunks retrieved from vector search")
67
+ show_full_doc = st.checkbox("Show parsed contents of the document")
 
68
 
69
+ if not uploaded_file:
 
 
70
  st.stop()
71
 
72
+ try:
73
+ file = read_file(uploaded_file)
74
+ except Exception as e:
75
+ display_file_read_error(e, file_name=uploaded_file.name)
76
 
77
+ chunked_file = chunk_file(file, chunk_size=300, chunk_overlap=0)
 
 
 
 
 
 
78
 
79
+ if not is_file_valid(file):
80
+ st.stop()
81
+
82
+ if not is_open_ai_key_valid(openai_api_key, model):
83
+ st.stop()
84
 
85
+ with st.spinner("Indexing document... This may take a while⏳"):
86
+ folder_index = embed_files(
87
+ files=[chunked_file],
88
+ embedding=EMBEDDING if model != "debug" else "debug",
89
+ vector_store=VECTOR_STORE if model != "debug" else "debug",
90
+ openai_api_key=openai_api_key,
91
+ )
92
+
93
+ with st.form(key="qa_form"):
94
+ query = st.text_area("Ask a question about the document")
95
+ submit = st.form_submit_button("Submit")
96
+
97
+ if show_full_doc:
98
+ with st.expander("Document"):
99
+ # Hack to get around st.markdown rendering LaTeX
100
+ st.markdown(f"<p>{wrap_doc_in_html(file.docs)}</p>", unsafe_allow_html=True)
101
+
102
+ if submit:
103
+ if not is_query_valid(query):
104
+ st.stop()
105
+
106
+ # Output Columns
107
+ answer_col, sources_col = st.columns(2)
108
+
109
+ llm = get_llm(model=model, openai_api_key=openai_api_key, temperature=0)
110
+ result = query_folder(
111
+ folder_index=folder_index,
112
+ query=query,
113
+ return_all=return_all_chunks,
114
+ llm=llm,
115
+ )
116
+
117
+ with answer_col:
118
+ st.markdown("#### Answer")
119
+ st.markdown(result.answer)
120
+
121
+ with sources_col:
122
+ st.markdown("#### Sources")
123
+ for source in result.sources:
124
+ st.markdown(source.page_content)
125
+ st.markdown(source.metadata["source"])
126
+ st.markdown("---")
127
+
128
+
129
+ if __name__ == "__main__":
130
+ main()