mostafa-sh commited on
Commit
c603c49
·
1 Parent(s): 88b1620

initial commit to add main codes and files

Browse files
.gitattributes CHANGED
@@ -32,4 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *.json filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ data/**/* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ .devcontainer/
3
+ .streamlit/
4
+ .env
README.md CHANGED
@@ -1,12 +1,18 @@
1
  ---
2
- title: FEM R1
3
  emoji: 📊
4
  colorFrom: green
5
  colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.38.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
  ---
2
+ title: AI-U finite element method
3
  emoji: 📊
4
  colorFrom: green
5
  colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.43.2
8
  app_file: app.py
9
  pinned: false
10
+ short_description: AI-driven instructor for Finite Element Method (FEM) queries
11
  ---
12
 
13
+ # AI U
14
+ This Space is an AI-powered intructor that helps answer questions on Finite element method (FEM).
15
+
16
+ ## How to Use
17
+ 1. Enter your query in the input box.
18
+ 2. The assistant will respond in real time.
app.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from huggingface_hub import snapshot_download
4
+ import streamlit as st
5
+ from utils.help import get_intro, get_disclaimer
6
+ from utils.format import sec_to_time, fix_latex, get_youtube_embed
7
+ from utils.rag_utils import load_youtube_data, load_book_data, load_summary, embed_question_sentence_transformer, fixed_knn_retrieval, get_random_question
8
+ from utils.system_prompts import get_expert_system_prompt, get_synthesis_user_prompt, get_synthesis_system_prompt
9
+ from utils.openai_utils import embed_question_openai, openai_domain_specific_answer_generation, openai_context_integration
10
+ from utils.endpoint_utils import get_inference_endpoint_response, parse_thinking_response, get_custom_inference_endpoint_response
11
+
12
+ st.set_page_config(page_title="AI University")
13
+
14
+ st.markdown("""
15
+ <style>
16
+ .video-wrapper {
17
+ position: relative;
18
+ padding-bottom: 56.25%;
19
+ height: 0;
20
+ }
21
+ .video-wrapper iframe {
22
+ position: absolute;
23
+ top: 0;
24
+ left: 0;
25
+ width: 100%;
26
+ height: 100%;
27
+ }
28
+ </style>
29
+ """, unsafe_allow_html=True)
30
+
31
+ # ---------------------------------------
32
+ # paths
33
+ # ---------------------------------------
34
+ HOME = "."
35
+ data_dir = HOME +"/data"
36
+
37
+ private_data_dir = HOME + "/private_data" # Relative path in your Space
38
+
39
+ # getting private data
40
+ os.makedirs(private_data_dir, exist_ok=True)
41
+ token = os.getenv("HF_API_KEY")
42
+ local_repo_path = snapshot_download(
43
+ repo_id="my-ai-university/data",
44
+ use_auth_token=token,
45
+ repo_type="dataset",
46
+ local_dir=private_data_dir,
47
+ )
48
+
49
+ adapter_path = HOME + "/LLaMA-TOMMI-1.0/"
50
+ base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
51
+ # ---------------------------------------
52
+
53
+ st.title(":red[AI University] :gray[/] FEM")
54
+
55
+ st.markdown(get_intro(), unsafe_allow_html=True)
56
+ st.markdown(" ")
57
+ st.markdown(" ")
58
+
59
+ # Sidebar for settings
60
+ with st.sidebar:
61
+ st.header("Settings")
62
+
63
+ with st.expander('Embedding model', expanded=True):
64
+ embedding_model = st.selectbox("Choose content embedding model", [
65
+ "text-embedding-3-small",
66
+ "all-MiniLM-L6-v2",
67
+ ])
68
+ st.divider()
69
+ st.write('**Video lectures**')
70
+ if embedding_model == "all-MiniLM-L6-v2":
71
+ yt_token_choice = st.select_slider("Token per content", [128, 256], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="yt_token_len")
72
+ elif embedding_model == "text-embedding-3-small":
73
+ yt_token_choice = st.select_slider("Token per content", [256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="yt_token_len")
74
+ yt_chunk_tokens = yt_token_choice
75
+ yt_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[yt_chunk_tokens]
76
+ top_k_YT = st.slider("Number of content pieces to retrieve", 0, yt_max_content, 4, key="yt_token_num")
77
+ yt_overlap_tokens = yt_chunk_tokens // 4
78
+
79
+ st.divider()
80
+ st.write('**Textbook**')
81
+ show_textbook = False
82
+
83
+ if embedding_model == "all-MiniLM-L6-v2":
84
+ latex_token_choice = st.select_slider("Token per content", [128, 256], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="latex_token_len")
85
+ elif embedding_model == "text-embedding-3-small":
86
+ latex_token_choice = st.select_slider("Token per content", [128, 256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="latex_token_len")
87
+ latex_chunk_tokens = latex_token_choice
88
+ latex_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[latex_chunk_tokens]
89
+ top_k_Latex = st.slider("Number of content pieces to retrieve", 0, latex_max_content, 4, key="latex_token_num")
90
+ latex_overlap_tokens = 0
91
+
92
+ st.write(' ')
93
+ with st.expander('Expert model', expanded=True):
94
+ if 'activate_expert' in st.session_state:
95
+ st.session_state.activate_expert = st.toggle("Use expert model", value=st.session_state.activate_expert)
96
+ else:
97
+ st.session_state.activate_expert = st.toggle("Use expert model", value=True)
98
+
99
+ st.session_state.expert_model = st.selectbox(
100
+ "Choose the LLM model",
101
+ ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B", "gpt-4o-mini"],
102
+ index=0, # Default to LLaMA-TOMMI-1.0-11B
103
+ key='a1model'
104
+ )
105
+
106
+ if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
107
+ expert_do_sample = st.toggle("Enable Sampling", value=False, key='expert_sample')
108
+
109
+ if expert_do_sample:
110
+ expert_temperature = st.slider("Temperature", 0.0, 1.0, 0.2, key='expert_temp')
111
+ expert_top_k = st.slider("Top K", 0, 100, 50, key='expert_top_k')
112
+ expert_top_p = st.slider("Top P", 0.0, 1.0, 0.1, key='expert_top_p')
113
+ else:
114
+ expert_num_beams = st.slider("Num Beams", 1, 4, 1, key='expert_num_beams')
115
+
116
+ expert_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='expert_max_new_tokens')
117
+ else:
118
+ expert_api_temperature = st.slider("Temperature", 0.0, 1.0, 0.2, key='a1t')
119
+ expert_api_top_p = st.slider("Top P", 0.0, 1.0, 0.1, key='a1p')
120
+
121
+ with st.expander('Synthesis model', expanded=True):
122
+ st.session_state.synthesis_model = st.selectbox(
123
+ "Choose the LLM model",
124
+ ["DeepSeek-R1-0528-Qwen3-8B", "gpt-4o-mini", "gpt-4.1-mini"],
125
+ index=0, # Default to DeepSeek-R1
126
+ key='a2model'
127
+ )
128
+
129
+ if st.session_state.synthesis_model == "DeepSeek-R1-0528-Qwen3-8B":
130
+ synthesis_deepseek_temperature = st.slider("Temperature", 0.0, 1.0, 0.2, key='synthesis_deepseek_temperature')
131
+ synthesis_deepseek_top_p = st.slider("Top P", 0.0, 1.0, 0.1, key='synthesis_deepseek_top_p')
132
+ synthesis_deepseek_max_tokens = st.slider("Max Tokens", 1000, 4000, 10000, step=100, key='synthesis_deepseek_max_tokens')
133
+
134
+ else:
135
+ synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
136
+ synthesis_api_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
137
+
138
+ # Main content area
139
+ if "question" not in st.session_state:
140
+ st.session_state.question = ""
141
+
142
+ text_area_placeholder = st.empty()
143
+ question_help = "Including details or instructions improves the answer."
144
+ st.session_state.question = text_area_placeholder.text_area(
145
+ "**Enter your query about Finite Element Method**",
146
+ height=120,
147
+ value=st.session_state.question,
148
+ help=question_help
149
+ )
150
+
151
+ _, col1, col2, _ = st.columns([4, 2, 4, 3])
152
+ with col1:
153
+ submit_button_placeholder = st.empty()
154
+
155
+ with col2:
156
+ if st.button("🎲 Random Question"):
157
+ while True:
158
+ random_question = get_random_question(data_dir + "/questions.txt")
159
+ if random_question != st.session_state.question:
160
+ break
161
+ st.session_state.question = random_question
162
+ text_area_placeholder.text_area(
163
+ "**Enter your query about Finite Element Method:**",
164
+ height=120,
165
+ value=st.session_state.question,
166
+ help=question_help
167
+ )
168
+
169
+ # Load YouTube and LaTeX data
170
+ text_data_YT, context_embeddings_YT = load_youtube_data(data_dir, embedding_model, yt_chunk_tokens, yt_overlap_tokens)
171
+ text_data_Latex, context_embeddings_Latex = load_book_data(private_data_dir, embedding_model, latex_chunk_tokens, latex_overlap_tokens)
172
+ summary = load_summary(data_dir + '/KG_FEM_summary.json')
173
+
174
+ # Initialize session state variables
175
+ if 'question_answered' not in st.session_state:
176
+ st.session_state.question_answered = False
177
+ if 'context_by_video' not in st.session_state:
178
+ st.session_state.context_by_video = {}
179
+ if 'context_by_section' not in st.session_state:
180
+ st.session_state.context_by_section = {}
181
+ if 'answer' not in st.session_state:
182
+ st.session_state.answer = ""
183
+ if 'thinking' not in st.session_state:
184
+ st.session_state.thinking = ""
185
+ if 'playing_video_id' not in st.session_state:
186
+ st.session_state.playing_video_id = None
187
+ if 'yt_context_for_display' not in st.session_state:
188
+ st.session_state.yt_context_for_display = ""
189
+ if 'latex_context_count' not in st.session_state:
190
+ st.session_state.latex_context_count = 0
191
+ if 'video_context_count' not in st.session_state:
192
+ st.session_state.video_context_count = 0
193
+
194
+
195
+ if submit_button_placeholder.button("AI Answer", type="primary"):
196
+ if st.session_state.question == "":
197
+ st.markdown("")
198
+ st.write("Please enter a query. :smirk:")
199
+ st.session_state.question_answered = False
200
+
201
+ else:
202
+ with st.spinner("Finding relevant contexts..."):
203
+ if embedding_model == "all-MiniLM-L6-v2":
204
+ question_embedding = embed_question_sentence_transformer(st.session_state.question, model_name="all-MiniLM-L6-v2")
205
+ elif embedding_model == "text-embedding-3-small":
206
+ question_embedding = embed_question_openai(st.session_state.question, embedding_model)
207
+
208
+ initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
209
+ idx_YT = fixed_knn_retrieval(question_embedding, context_embeddings_YT, top_k=top_k_YT, min_k=0)
210
+ idx_Latex = fixed_knn_retrieval(question_embedding, context_embeddings_Latex, top_k=top_k_Latex, min_k=0)
211
+
212
+ relevant_contexts_YT = sorted([text_data_YT[i] for i in idx_YT], key=lambda x: x['order'])
213
+ relevant_contexts_Latex = sorted([text_data_Latex[i] for i in idx_Latex], key=lambda x: x['order'])
214
+
215
+ st.session_state.context_by_video = {}
216
+ for context_item in relevant_contexts_YT:
217
+ video_id = context_item['video_id']
218
+ if video_id not in st.session_state.context_by_video:
219
+ st.session_state.context_by_video[video_id] = []
220
+ st.session_state.context_by_video[video_id].append(context_item)
221
+ st.session_state.video_context_count = len(st.session_state.context_by_video)
222
+
223
+
224
+ st.session_state.context_by_section = {}
225
+ for context_item in relevant_contexts_Latex:
226
+ section_id = context_item['section']
227
+ if section_id not in st.session_state.context_by_section:
228
+ st.session_state.context_by_section[section_id] = []
229
+ st.session_state.context_by_section[section_id].append(context_item)
230
+
231
+ # Build context strings
232
+ yt_context_string = ''
233
+ for i, (video_id, contexts) in enumerate(st.session_state.context_by_video.items(), start=1):
234
+ yt_context_string += f"--- Video {i}: {contexts[0]['title']} ---\n"
235
+ for context_item in contexts:
236
+ start_time = int(context_item['start'])
237
+ yt_context_string += f"Timestamp {sec_to_time(start_time)}: {context_item['text']}\n\n"
238
+
239
+ latex_context_string = ''
240
+ if top_k_Latex > 0:
241
+ for i, (section_id, contexts) in enumerate(st.session_state.context_by_section.items(), start=1):
242
+ latex_context_string += f'--- Textbook Section {i} ({section_id}) ---\n'
243
+ for context_item in contexts:
244
+ latex_context_string += context_item['text'] + '\n\n'
245
+
246
+ context_for_llm = yt_context_string + latex_context_string
247
+ st.session_state.yt_context_for_display = fix_latex(yt_context_string)
248
+ st.session_state.latex_context_count = len(st.session_state.context_by_section)
249
+
250
+
251
+ with st.spinner("Answering the question..."):
252
+ if st.session_state.activate_expert:
253
+ if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
254
+ if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
255
+ use_expert = True
256
+ elif st.session_state.expert_model == "LLaMA-3.2-11B":
257
+ use_expert = False
258
+
259
+ messages = [
260
+ {"role": "system", "content": get_expert_system_prompt()},
261
+ {"role": "user", "content": st.session_state.question}
262
+ ]
263
+
264
+ expert_answer = get_custom_inference_endpoint_response(
265
+ messages=messages,
266
+ use_expert=use_expert,
267
+ tokenizer_max_length=500,
268
+ do_sample=expert_do_sample,
269
+ temperature=expert_temperature if expert_do_sample else None,
270
+ top_k=expert_top_k if expert_do_sample else None,
271
+ top_p=expert_top_p if expert_do_sample else None,
272
+ num_beams=expert_num_beams if not expert_do_sample else 1,
273
+ max_new_tokens=expert_max_new_tokens
274
+ )
275
+ else:
276
+ expert_answer = openai_domain_specific_answer_generation(
277
+ get_expert_system_prompt(),
278
+ st.session_state.question,
279
+ model=st.session_state.expert_model,
280
+ temperature=expert_api_temperature,
281
+ top_p=expert_api_top_p
282
+ )
283
+ st.session_state.expert_answer = fix_latex(expert_answer)
284
+ else:
285
+ st.session_state.expert_answer = 'No Expert Answer. Only use the context.'
286
+
287
+ if st.session_state.synthesis_model == "DeepSeek-R1-0528-Qwen3-8B":
288
+
289
+ messages = [
290
+ {"role": "system", "content": get_synthesis_system_prompt("Finite Element Method")},
291
+ {"role": "user", "content": get_synthesis_user_prompt(st.session_state.question, st.session_state.expert_answer, context_for_llm)}
292
+ ]
293
+
294
+ raw_synthesis_answer = get_inference_endpoint_response(
295
+ model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
296
+ messages=messages,
297
+ temperature=synthesis_deepseek_temperature,
298
+ top_p=synthesis_deepseek_top_p,
299
+ max_tokens=synthesis_deepseek_max_tokens
300
+ )
301
+
302
+ print(raw_synthesis_answer)
303
+ thinking, synthesis_answer = parse_thinking_response(raw_synthesis_answer)
304
+ st.session_state.thinking = thinking
305
+
306
+ else:
307
+ synthesis_answer = openai_context_integration(
308
+ get_synthesis_system_prompt("Finite Element Method"),
309
+ st.session_state.question,
310
+ st.session_state.expert_answer,
311
+ context_for_llm,
312
+ model=st.session_state.synthesis_model,
313
+ temperature=synthesis_api_temperature,
314
+ top_p=synthesis_api_top_p
315
+ )
316
+
317
+ # quick check after getting the answer
318
+ if synthesis_answer.split()[0] == "NOT_ENOUGH_INFO":
319
+ st.markdown("")
320
+ st.markdown("#### Query", unsafe_allow_html=True)
321
+ st.markdown(fix_latex(st.session_state.question))
322
+ st.markdown("#### Answer")
323
+ st.write(":smiling_face_with_tear:")
324
+ st.markdown(synthesis_answer.split('NOT_ENOUGH_INFO')[1])
325
+ st.divider()
326
+ st.caption(get_disclaimer())
327
+ st.session_state.question_answered = False
328
+ st.stop()
329
+ else:
330
+ st.session_state.answer = fix_latex(synthesis_answer)
331
+ st.session_state.question_answered = True
332
+
333
+ if st.session_state.question_answered:
334
+ st.divider()
335
+ st.markdown("#### Query", unsafe_allow_html=True)
336
+ st.markdown(fix_latex(st.session_state.question))
337
+
338
+ # st.markdown(" ")
339
+ st.markdown("#### Inference and Reasoning")
340
+
341
+ # Expander for Initial Expert Answer
342
+ if st.session_state.activate_expert and 'expert_answer' in st.session_state:
343
+ with st.expander("Initial Expert Answer", expanded=False):
344
+ st.info(f"This is the initial answer from the expert model ({st.session_state.expert_model}), used as a starting point for the final synthesis.", icon="🧑‍🏫")
345
+ st.markdown(st.session_state.expert_answer)
346
+
347
+ # Expander for Retrieved Context
348
+ if 'yt_context_for_display' in st.session_state and st.session_state.yt_context_for_display:
349
+ with st.expander("Retrieved Context", expanded=False):
350
+ st.info("This is the raw context retrieved from the knowledge base to inform the final answer.", icon="📚")
351
+ if 'video_context_count' in st.session_state and st.session_state.video_context_count > 0:
352
+ st.success(f"Found {st.session_state.video_context_count} relevant video transcript(s) containing retrieved content.", icon="📺")
353
+ st.markdown(st.session_state.yt_context_for_display)
354
+ if 'latex_context_count' in st.session_state and st.session_state.latex_context_count > 0:
355
+ st.info(f"Additionally, {st.session_state.latex_context_count} relevant sections were found in the textbook: *The Finite Element Method: Linear Static and Dynamic Finite Element Analysis* by Thomas J. R. Hughes · 2012.", icon="📚")
356
+
357
+ # Expander for Model's Thinking Process
358
+ if st.session_state.synthesis_model == "DeepSeek-R1-0528-Qwen3-8B" and 'thinking' in st.session_state and st.session_state.thinking:
359
+ with st.expander(":blue[**Model's Thinking Process**]", expanded=False):
360
+ st.info(f"This is the reasoning from the synthesis model ({st.session_state.synthesis_model}) used to synthesize the final answer.", icon="🤔")
361
+ st.markdown(st.session_state.thinking)
362
+
363
+ # st.markdown("---")
364
+ st.markdown("#### Answer")
365
+ st.markdown(st.session_state.answer)
366
+ st.markdown(" ")
367
+
368
+ if top_k_YT > 0:
369
+ st.markdown("#### Retrieved content in lecture videos")
370
+ for i, (video_id, contexts) in enumerate(st.session_state.context_by_video.items(), start=1):
371
+ with st.container(border=True):
372
+ st.markdown(f"**Video {i} | {contexts[0]['title']}**")
373
+ video_placeholder = st.empty()
374
+ video_placeholder.markdown(get_youtube_embed(video_id, 0, 0), unsafe_allow_html=True)
375
+ st.markdown('')
376
+ with st.container(border=False):
377
+ st.markdown("Retrieved Times")
378
+ cols = st.columns([1 for i in range(len(contexts))] + [9 - len(contexts)])
379
+ for j, context_item in enumerate(contexts):
380
+ start_time = int(context_item['start'])
381
+ label = sec_to_time(start_time)
382
+ if cols[j].button(label, key=f"{video_id}_{start_time}"):
383
+ if st.session_state.playing_video_id is not None:
384
+ st.session_state.playing_video_id = None
385
+ video_placeholder.empty()
386
+ video_placeholder.markdown(get_youtube_embed(video_id, start_time, 1), unsafe_allow_html=True)
387
+ st.session_state.playing_video_id = video_id
388
+
389
+ with st.expander("Video Summary", expanded=False):
390
+ st.markdown(summary[video_id])
391
+
392
+ if show_textbook and top_k_Latex > 0:
393
+ st.markdown("#### Retrieved content in textbook",help="The Finite Element Method: Linear Static and Dynamic Finite Element Analysis")
394
+ for i, (section_id, contexts) in enumerate(st.session_state.context_by_section.items(), start=1):
395
+ st.markdown(f"**Section {i} | {section_id}**")
396
+ for context_item in contexts:
397
+ st.markdown(context_item['text'])
398
+ st.divider()
399
+
400
+ st.markdown(" ")
401
+ st.divider()
402
+ st.caption(get_disclaimer())
data/KG_FEM_summary.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9179743956afab296c02eec5b6b82f8bc49a449721c46ddd9634d7c4be4053a
3
+ size 203883
data/questions.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d23dbdf124c14dfb1c2224708711b4c892b5874f800e7e06240261f229468d61
3
+ size 456242
data/yt_embedding_space_all-MiniLM-L6-v2_tpc128_o32.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25393a46b720884076694fc10f9edb80a67003e452a9187077c69c632f2d45dd
3
+ size 36670448
data/yt_embedding_space_all-MiniLM-L6-v2_tpc256_o64.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb839d14084e8c305100359eabaefb517b83c9673368cb09b0da23673ce05df3
3
+ size 17898177
data/yt_embedding_space_text-embedding-3-small_tpc1024_o256.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0bb1fc517dfa8eeea1f7545bec577556e7bd170ec885cdf25eab3f5d665d2ba
3
+ size 17109772
data/yt_embedding_space_text-embedding-3-small_tpc256_o64.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f69373b4df1d9bef5a64a68d898c596fee6007c26a7ac6ff58d56f786c93d60
3
+ size 62427532
data/yt_embedding_space_text-embedding-3-small_tpc512_o128.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1cc71a26650538650196102342e9f48242eedf5ed2bbf1fb4c2299691edd6c6
3
+ size 31637503
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.26.3
2
+ openai==1.57.0
3
+ sentence-transformers==2.7.0
4
+ streamlit==1.43.2
5
+ python-dotenv==1.0.1
utils/endpoint_utils.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import time
3
+ import os
4
+ import re
5
+ import json
6
+
7
+ from huggingface_hub import InferenceClient
8
+
9
+ # Helper function to parse the response
10
+ def parse_thinking_response(response_text):
11
+ """
12
+ Parses a model's response to separate the thinking process
13
+ from the final answer.
14
+ """
15
+ match = re.search(r"<think>(.*?)</think>(.*)", response_text, re.DOTALL)
16
+ if match:
17
+ thinking = match.group(1).strip()
18
+ final_answer = match.group(2).strip()
19
+ return thinking, final_answer
20
+ else:
21
+ return None, response_text.strip()
22
+
23
+
24
+ def get_inference_endpoint_response(
25
+ model,
26
+ messages,
27
+ temperature,
28
+ top_p,
29
+ max_tokens
30
+ ):
31
+ """
32
+ Serverless API (Pay-as-you-go)
33
+ """
34
+ client = InferenceClient(
35
+ provider="auto",
36
+ api_key=os.getenv("HF_API_KEY")
37
+ )
38
+
39
+ completion = client.chat.completions.create(
40
+ model=model,
41
+ messages=messages,
42
+ temperature=temperature,
43
+ top_p=top_p,
44
+ max_tokens=max_tokens
45
+ )
46
+
47
+ # Get the raw response content
48
+ raw_response = completion.choices[0].message.content
49
+
50
+ return raw_response
51
+
52
+
53
+ def get_custom_inference_endpoint_response(
54
+ messages: list,
55
+ use_expert: bool = True,
56
+ tokenizer_max_length: int = 512,
57
+ do_sample: bool = False,
58
+ temperature: float = 0.6,
59
+ top_k: int = 50,
60
+ top_p: float = 0.95,
61
+ num_beams: int = 1,
62
+ max_new_tokens: int = 1024,
63
+ **kwargs # To catch any other unused arguments
64
+ ):
65
+ """
66
+ Contacts a custom Hugging Face inference endpoint with retry logic.
67
+ This function is tailored to a custom EndpointHandler that expects a specific
68
+ payload structure: {"inputs": {"messages": [...], "settings": {...}}}.
69
+ """
70
+ endpoint_url = os.getenv("HF_ENDPOINT_URL")
71
+ hf_endpoint_token = os.getenv("HF_ENDPOINT_TOKEN")
72
+
73
+ if not endpoint_url or not hf_endpoint_token:
74
+ return "Error: HF_ENDPOINT_URL and HF_ENDPOINT_TOKEN environment variables must be set."
75
+
76
+ headers = {
77
+ "Authorization": f"Bearer {hf_endpoint_token}",
78
+ "Content-Type": "application/json"
79
+ }
80
+
81
+ # --- PAYLOAD STRUCTURE FOR THE CUSTOM ENDPOINT HANDLER ---
82
+ # This handler expects a 'settings' dictionary nested inside 'inputs'.
83
+ settings = {
84
+ "use_expert": use_expert,
85
+ "tokenizer_max_length": tokenizer_max_length,
86
+ "do_sample": do_sample,
87
+ "temperature": temperature,
88
+ "top_k": top_k,
89
+ "top_p": top_p,
90
+ "num_beams": num_beams,
91
+ "max_new_tokens": max_new_tokens,
92
+ }
93
+
94
+ # The server-side EndpointHandler is designed to handle parameter logic,
95
+ # so we send all parameters from the client.
96
+
97
+ # The final payload must match the nested structure the custom handler expects.
98
+ payload = {
99
+ "inputs": {
100
+ "messages": messages,
101
+ "settings": settings
102
+ }
103
+ }
104
+
105
+ # --- Retry Logic ---
106
+ max_retries = 5
107
+ wait_time = 30 # seconds to wait between retries
108
+
109
+ for attempt in range(max_retries):
110
+ print(f"Attempting to contact endpoint, attempt {attempt + 1}/{max_retries}...")
111
+ # Log the exact payload being sent for easier debugging
112
+ print(f"Payload: {json.dumps(payload, indent=2)}")
113
+ try:
114
+ response = requests.post(endpoint_url, headers=headers, json=payload)
115
+
116
+ # Raise an exception for bad status codes (4xx or 5xx)
117
+ response.raise_for_status()
118
+
119
+ result = response.json()
120
+ print(f"Success! Response: {result}")
121
+
122
+ # The custom handler returns a dictionary with a 'response' key.
123
+ # This parsing logic correctly extracts it.
124
+ return result.get('response', 'Error: "response" key not found in the result.')
125
+
126
+ except requests.exceptions.HTTPError as errh:
127
+ # Handle specific 503 error for model loading
128
+ if errh.response.status_code == 503 and attempt < max_retries - 1:
129
+ print(f"Service Unavailable (503). Endpoint may be starting up. Retrying in {wait_time} seconds...")
130
+ time.sleep(wait_time)
131
+ else:
132
+ error_message = f"HTTP Error: {errh}\nResponse: {errh.response.text}"
133
+ print(error_message)
134
+ return error_message
135
+ except requests.exceptions.RequestException as err:
136
+ error_message = f"Request Error: {err}"
137
+ print(error_message)
138
+ return error_message
139
+ except json.JSONDecodeError:
140
+ error_message = f"JSON Decode Error: Failed to parse response from server.\nResponse Text: {response.text}"
141
+ print(error_message)
142
+ return error_message
143
+
144
+
145
+ return "Error: Failed to get a response after multiple retries."
utils/format.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def sec_to_time(start_time):
4
+ return f"{start_time // 60:02}:{start_time % 60:02}"
5
+
6
+ def fix_latex(text):
7
+ text = re.sub(r"\\\(", r"$",text)
8
+ text = re.sub(r"\\\)", r"$",text)
9
+ text = re.sub(r"\\\[", r"$$",text)
10
+ text = re.sub(r"\\\]", r"$$",text)
11
+ return text
12
+
13
+ def get_youtube_embed(video_id, start_time=0, autoplay=0):
14
+ embed_code = f'''
15
+ <div class="video-wrapper">
16
+ <iframe src="https://www.youtube.com/embed/{video_id}?start={start_time}&autoplay={autoplay}&rel=0"
17
+ frameborder="0" allowfullscreen></iframe>
18
+ </div>
19
+ '''
20
+ return embed_code
utils/help.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def get_intro():
3
+ return """
4
+ Welcome to <span style='color:red'><a href='https://my-ai-university.com/' target='_blank' style='text-decoration: none; color: red;'>AI University</a></span> — an AI-powered platform designed to address scientific course queries, dynamically adapting to instructors' teaching styles and students' learning needs.
5
+ This prototype showcases the capabilities of the <span style='color:red'><a href='https://github.com/my-ai-university' target='_blank' style='text-decoration: none; color: red;'>AI University platform</a></span> by providing expert answers to queries related to a graduate-level <span style='color:red'><a href='https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ' target='_blank' style='text-decoration: none; color: red;'>Finite Element Method (FEM)</a></span> course.
6
+ """
7
+
8
+ def get_disclaimer():
9
+ return """
10
+
11
+ :gray[AI University is developed at the University of Southern California by Mostafa Shojaei, Rahul Gulati, Benjamin Jasperson, Shangshang Wang, Simone Cimolato, Dangli Cao, Willie Neiswanger, and Krishna Garikipati.]
12
+
13
+ :gray[**Resources:**]
14
+ [AI University](https://my-ai-university.com/),
15
+ &nbsp; [ArXiv](https://arxiv.org/abs/2504.08846),
16
+ &nbsp; [GitHub](https://github.com/my-ai-university/finite-element-method),
17
+ &nbsp; [HuggingFace](https://huggingface.co/my-ai-university),
18
+ &nbsp; [WandB](https://wandb.ai/my-ai-university/finite-element-method)
19
+
20
+ :gray[**Main Data Sources:**]
21
+ [Introduction to Finite Element Methods (FEM) by Prof. Krishna Garikipati](https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ),
22
+ &nbsp; [The Finite Element Method: Linear Static and Dynamic Finite Element Analysis by Thomas J. R. Hughes](https://www.google.com/books/edition/_/cHH2n_qBK0IC?hl=en).
23
+
24
+ :gray[**Disclaimer and Copyright Notice:**] :gray[1. AI-Generated Responses: Answers are generated using AI and, while thorough, may not always be 100% accurate. Please verify the information independently. 2. Content Ownership: All video content and lecture material referenced belong to their original creators. We encourage users to view the original material on verified platforms to ensure authenticity and accuracy. 3. Educational Fair Use: This tool is intended solely for educational purposes and operates under the principles of fair use. It is not authorized for commercial applications.]
25
+ """
26
+
utils/openai_utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI
3
+
4
+ from dotenv import load_dotenv
5
+ load_dotenv() #
6
+
7
+ #--------------------------------------------------------
8
+ # Initialize OpenAI client
9
+ #--------------------------------------------------------
10
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
11
+
12
+ def embed_question_openai(texts, model="text-embedding-3-small"):
13
+ response = client.embeddings.create(
14
+ input=texts,
15
+ model=model
16
+ )
17
+ return response.data[0].embedding
18
+
19
+
20
+ def openai_domain_specific_answer_generation(system_prompt, question, model="gpt4o-mini", temperature=0.3, top_p=0.1):
21
+
22
+ prompt = f"""
23
+ Question:
24
+ {question}
25
+
26
+ Answer (provide a precise, domain-specific response):
27
+ """
28
+
29
+ response = client.chat.completions.create(
30
+ model=model,
31
+ messages=[
32
+ {
33
+ "role": "system",
34
+ "content": system_prompt
35
+ },
36
+ {
37
+ "role": "user",
38
+ "content": prompt
39
+ }
40
+ ],
41
+ temperature=temperature, # Set low for deterministic and precise responses.
42
+ top_p=top_p, # Focus on high-probability outputs to ensure accuracy.
43
+ frequency_penalty=0.1, # Reduce repetition of technical terms.
44
+ presence_penalty=0.0 # Prevent introduction of unrelated ideas.
45
+ )
46
+
47
+ return response.choices[0].message.content
48
+
49
+ def openai_context_integration(system_prompt, query, expert_answer, retrieved_context, model="gpt4o-mini", temperature=0.3, top_p=0.3):
50
+
51
+ prompt = f"""
52
+ Question:
53
+ {query}
54
+
55
+ Direct Answer:
56
+ {expert_answer}
57
+
58
+ Retrieved Context:
59
+ {retrieved_context}
60
+
61
+ Final Answer:
62
+ """
63
+
64
+ response = client.chat.completions.create(
65
+ model=model,
66
+ messages=[
67
+ {
68
+ "role": "system",
69
+ "content": system_prompt
70
+ },
71
+ {
72
+ "role": "user",
73
+ "content": prompt
74
+ }
75
+ ],
76
+ temperature=temperature, # Maintain some flexibility for smooth blending.
77
+ top_p=top_p, # Prioritize high-probability outputs to stay focused on the inputs.
78
+ frequency_penalty=0.1, # Allow necessary repetition for clarity.
79
+ presence_penalty=0.0 # Neutral to avoid introducing unrelated ideas.
80
+ )
81
+
82
+ return response.choices[0].message.content
utils/rag_utils.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import random
4
+ import streamlit as st
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ @st.cache_resource
8
+ def load_youtube_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
9
+ embedding_space_file_name = f'{base_path}/yt_embedding_space_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
10
+ with open(embedding_space_file_name, 'r') as json_file:
11
+ loaded_data = json.load(json_file)
12
+
13
+ embedding_space = np.array(loaded_data['embedding_space'])
14
+ return loaded_data['chunks'], embedding_space
15
+
16
+ @st.cache_resource
17
+ def load_book_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
18
+ embedding_space_file_name = f'{base_path}/latex_embedding_space_by_sections_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
19
+ with open(embedding_space_file_name, 'r') as json_file:
20
+ loaded_data = json.load(json_file)
21
+
22
+ embedding_space = np.array(loaded_data['embedding_space'])
23
+ return loaded_data['chunks'], embedding_space
24
+
25
+ @st.cache_resource
26
+ def load_summary(file_path):
27
+ with open(file_path, 'r') as file:
28
+ transcripts = json.load(file)
29
+ return transcripts
30
+
31
+ def embed_question_sentence_transformer(texts, model_name="sentence-transformers/all-MiniLM-L6-v2"):
32
+ model = SentenceTransformer(model_name)
33
+ embeddings = model.encode(texts)
34
+
35
+ return embeddings.tolist()
36
+
37
+ def fixed_knn_retrieval(question_embedding, context_embeddings, top_k=5, min_k=1):
38
+
39
+ question_embedding = np.array(question_embedding)
40
+
41
+ # Normalize
42
+ question_embedding = question_embedding / np.linalg.norm(question_embedding)
43
+ context_embeddings = context_embeddings / np.linalg.norm(context_embeddings, axis=1, keepdims=True)
44
+
45
+ # Calculate cosine similarities between the question embedding and all context embeddings.
46
+ similarities = np.dot(context_embeddings, question_embedding)
47
+ # Sort the similarities in descending order and get the corresponding indices.
48
+ sorted_indices = np.argsort(similarities)[::-1]
49
+ # Select the top_k most similar contexts, ensuring at least min_k contexts are selected.
50
+ selected_indices = sorted_indices[:max(top_k, min_k)].tolist()
51
+ return selected_indices
52
+
53
+
54
+ def get_random_question(text_file):
55
+ with open(text_file, "r") as file:
56
+ questions = [line.strip() for line in file]
57
+ return random.choice(questions)
utils/system_prompts.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_expert_system_prompt():
2
+ # system_prompt = f"""
3
+ # You are a highly specialized assistant for the subject Finite Element Method (FEM). Provide a direct and focused answer to the following question based on your specialized training.
4
+ # """
5
+ system_prompt = (
6
+ "You are an AI professor for a Finite Element Method (FEM) course. "
7
+ "You are asked a question by a student and return an appropriate answer based on course material. "
8
+ "Your response focuses on FEM fundamentals, theories, and applications as presented in the course. "
9
+ "Use standard latex notation when replying with mathematical notation."
10
+ )
11
+ return system_prompt
12
+
13
+ def get_synthesis_user_prompt(question,direct_answer,context):
14
+
15
+ return f"""
16
+ Question:
17
+ {question}
18
+
19
+ Direct Answer:
20
+ {direct_answer}
21
+
22
+ Retrieved Context:
23
+ {context}
24
+
25
+ Final Answer:
26
+ """
27
+
28
+
29
+ def get_synthesis_system_prompt(subject_matter="Finite Element Method (FEM)"):
30
+ """
31
+ Returns the system prompt for the synthesis model.
32
+ This version incorporates the user's detailed instructions for guided synthesis.
33
+ """
34
+ system_prompt = f"""
35
+ You are an AI teaching assistant for a {subject_matter} course. Your task is to synthesize a final, high-quality answer to the student's **Question** by intelligently integrating two sources: a preliminary **Direct Answer** and the official **Retrieved Context** from the course materials.
36
+
37
+ By synthesizing we mean that your final answer must always be grounded **exclusively** in the provided **Direct Answer** and **Retrieved Context**. Therefore, never use any external knowledge including your existing knowledge.
38
+
39
+ IMPORTANT INITIAL CHECK: Analyze the provided **Question**, **Direct Answer**, and **Retrieved Context**.
40
+
41
+ If the **Direct Answer** AND the **Retrieved Context** together lack sufficient information to answer the **Question**, respond EXACTLY as follows and then STOP:
42
+ "NOT_ENOUGH_INFO The provided context doesn't contain enough information to fully answer this question. You may want to increase the number of relevant context passages or adjust the options and try again."
43
+
44
+ Else continue with the remaining guidelines.
45
+
46
+ Guidelines:
47
+ 1. Your primary synthesizing goal is to use the **Retrieved Context** to validate, improve, and expand upon the **Direct Answer**.
48
+ a. If the **Direct Answer** is accurate and relevant, use it as the foundation for your response. Your task is then to enrich it by weaving in specific details, examples, and citations from the **Retrieved Context** to create a more comprehensive and well-supported answer.
49
+ b. If the **Direct Answer** is poor, inaccurate, or irrelevant, you should rely more heavily on the **Retrieved Context** to construct the correct answer from the ground up.
50
+
51
+ 2. Referencing:
52
+ a. Always cite your sources by referencing the video number and the given time in brackets and **bold** (e.g., [**Video 3, time 03:14**]) after each piece of information you use in your answer.
53
+ b. You may cite multiple references if they discuss the same content (e.g., [**Video 3, time 03:14; Video 1, time 12:04**]). However, try to reference them separately if they cover different aspects of the answer.
54
+
55
+ 3. Style and Formatting:
56
+ a. Provide the answer in markdown format. Any latex formating should be converted to an equivalent markdown format.
57
+ b. Do not use any titles, sections, or subsections. Use mainly paragraphs. Bold text, items, and bullet points if it helps.
58
+ c. Symbols and equations within the text MUST be placed between $ and $, e.g., $x=0$ is the min of $\sigma(x)=x^2$.
59
+ d. For equations between paragraphs, use \\n\\n$ and $\\n\\n. For example, in the following equation: \\n\\n$ E = mc^2 $\\n\\n, note $c$ as the speed of light. Remove any equation number/tags in the raw data.
60
+
61
+ 4. Use technical language appropriate for a {subject_matter} course, but be prepared to explain complex terms if asked.
62
+
63
+ 5. If the question involves calculations, show your work step-by-step, citing the relevant formulas or methods from the context.
64
+ """
65
+ return system_prompt
66
+