Spaces:

my-ai-university
/

FEM-r1

Running

App Files Files Community

mostafa-sh commited on Jul 25

Commit

c603c49

1 Parent(s): 88b1620

initial commit to add main codes and files

Browse files

Files changed (18) hide show

.gitattributes +2 -0
.gitignore +4 -0
README.md +10 -4
app.py +402 -0
data/KG_FEM_summary.json +3 -0
data/questions.txt +3 -0
data/yt_embedding_space_all-MiniLM-L6-v2_tpc128_o32.json +3 -0
data/yt_embedding_space_all-MiniLM-L6-v2_tpc256_o64.json +3 -0
data/yt_embedding_space_text-embedding-3-small_tpc1024_o256.json +3 -0
data/yt_embedding_space_text-embedding-3-small_tpc256_o64.json +3 -0
data/yt_embedding_space_text-embedding-3-small_tpc512_o128.json +3 -0
requirements.txt +5 -0
utils/endpoint_utils.py +145 -0
utils/format.py +20 -0
utils/help.py +26 -0
utils/openai_utils.py +82 -0
utils/rag_utils.py +57 -0
utils/system_prompts.py +66 -0

.gitattributes CHANGED Viewed

@@ -32,4 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/**/* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__/
+.devcontainer/
+.streamlit/
+.env

README.md CHANGED Viewed

@@ -1,12 +1,18 @@
 ---
-title: FEM R1
 emoji: 📊
 colorFrom: green
 colorTo: blue
-sdk: gradio
-sdk_version: 5.38.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AI-U finite element method
 emoji: 📊
 colorFrom: green
 colorTo: blue
+sdk: streamlit
+sdk_version: 1.43.2
 app_file: app.py
 pinned: false
+short_description: AI-driven instructor for Finite Element Method (FEM) queries
 ---
+# AI U
+This Space is an AI-powered intructor that helps answer questions on Finite element method (FEM).
+## How to Use
+1. Enter your query in the input box.
+2. The assistant will respond in real time.

app.py ADDED Viewed

	@@ -0,0 +1,402 @@

+import os
+from huggingface_hub import snapshot_download
+import streamlit as st
+from utils.help import get_intro, get_disclaimer
+from utils.format import sec_to_time, fix_latex, get_youtube_embed
+from utils.rag_utils import load_youtube_data, load_book_data, load_summary, embed_question_sentence_transformer, fixed_knn_retrieval, get_random_question
+from utils.system_prompts import get_expert_system_prompt, get_synthesis_user_prompt, get_synthesis_system_prompt
+from utils.openai_utils import embed_question_openai, openai_domain_specific_answer_generation, openai_context_integration
+from utils.endpoint_utils import get_inference_endpoint_response, parse_thinking_response, get_custom_inference_endpoint_response
+st.set_page_config(page_title="AI University")
+st.markdown("""
+    <style>
+    .video-wrapper {
+        position: relative;
+        padding-bottom: 56.25%;
+        height: 0;
+    }
+    .video-wrapper iframe {
+        position: absolute;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# ---------------------------------------
+# paths
+# ---------------------------------------
+HOME = "."
+data_dir = HOME +"/data"
+private_data_dir = HOME + "/private_data"  # Relative path in your Space
+# getting private data
+os.makedirs(private_data_dir, exist_ok=True)
+token = os.getenv("HF_API_KEY")
+local_repo_path = snapshot_download(
+    repo_id="my-ai-university/data",
+    use_auth_token=token,
+    repo_type="dataset",
+    local_dir=private_data_dir,
+)
+adapter_path = HOME + "/LLaMA-TOMMI-1.0/"
+base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+# ---------------------------------------
+st.title(":red[AI University] :gray[/] FEM")
+st.markdown(get_intro(), unsafe_allow_html=True)
+st.markdown(" ")
+st.markdown(" ")
+# Sidebar for settings
+with st.sidebar:
+    st.header("Settings")
+    with st.expander('Embedding model', expanded=True):
+        embedding_model = st.selectbox("Choose content embedding model", [
+            "text-embedding-3-small",
+            "all-MiniLM-L6-v2",
+        ])
+        st.divider()
+        st.write('**Video lectures**')
+        if embedding_model == "all-MiniLM-L6-v2":
+            yt_token_choice = st.select_slider("Token per content", [128, 256], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="yt_token_len")
+        elif embedding_model == "text-embedding-3-small":
+            yt_token_choice = st.select_slider("Token per content", [256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="yt_token_len")
+        yt_chunk_tokens = yt_token_choice
+        yt_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[yt_chunk_tokens]
+        top_k_YT = st.slider("Number of content pieces to retrieve", 0, yt_max_content, 4, key="yt_token_num")
+        yt_overlap_tokens = yt_chunk_tokens // 4
+        st.divider()
+        st.write('**Textbook**')
+        show_textbook = False
+        if embedding_model == "all-MiniLM-L6-v2":
+            latex_token_choice = st.select_slider("Token per content", [128, 256], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="latex_token_len")
+        elif embedding_model == "text-embedding-3-small":
+            latex_token_choice = st.select_slider("Token per content", [128, 256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="latex_token_len")
+        latex_chunk_tokens = latex_token_choice
+        latex_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[latex_chunk_tokens]
+        top_k_Latex = st.slider("Number of content pieces to retrieve", 0, latex_max_content, 4, key="latex_token_num")
+        latex_overlap_tokens = 0
+    st.write(' ')
+    with st.expander('Expert model', expanded=True):
+        if 'activate_expert' in st.session_state:
+            st.session_state.activate_expert = st.toggle("Use expert model", value=st.session_state.activate_expert)
+        else:
+            st.session_state.activate_expert = st.toggle("Use expert model", value=True)
+        st.session_state.expert_model = st.selectbox(
+            "Choose the LLM model",
+            ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B", "gpt-4o-mini"],
+            index=0,  # Default to LLaMA-TOMMI-1.0-11B
+            key='a1model'
+        )
+        if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
+            expert_do_sample = st.toggle("Enable Sampling", value=False, key='expert_sample')
+            if expert_do_sample:
+                expert_temperature = st.slider("Temperature", 0.0, 1.0, 0.2, key='expert_temp')
+                expert_top_k = st.slider("Top K", 0, 100, 50, key='expert_top_k')
+                expert_top_p = st.slider("Top P", 0.0, 1.0, 0.1, key='expert_top_p')
+            else:
+                expert_num_beams = st.slider("Num Beams", 1, 4, 1, key='expert_num_beams')
+            expert_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='expert_max_new_tokens')
+        else:
+            expert_api_temperature = st.slider("Temperature", 0.0, 1.0, 0.2, key='a1t')
+            expert_api_top_p = st.slider("Top P", 0.0, 1.0, 0.1, key='a1p')
+    with st.expander('Synthesis model', expanded=True):
+        st.session_state.synthesis_model = st.selectbox(
+            "Choose the LLM model",
+            ["DeepSeek-R1-0528-Qwen3-8B", "gpt-4o-mini", "gpt-4.1-mini"],
+            index=0, # Default to DeepSeek-R1
+            key='a2model'
+        )
+        if st.session_state.synthesis_model == "DeepSeek-R1-0528-Qwen3-8B":
+            synthesis_deepseek_temperature = st.slider("Temperature", 0.0, 1.0, 0.2, key='synthesis_deepseek_temperature')
+            synthesis_deepseek_top_p = st.slider("Top P", 0.0, 1.0, 0.1, key='synthesis_deepseek_top_p')
+            synthesis_deepseek_max_tokens = st.slider("Max Tokens", 1000, 4000, 10000, step=100, key='synthesis_deepseek_max_tokens')
+        else:
+            synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
+            synthesis_api_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
+# Main content area
+if "question" not in st.session_state:
+    st.session_state.question = ""
+text_area_placeholder = st.empty()
+question_help = "Including details or instructions improves the answer."
+st.session_state.question = text_area_placeholder.text_area(
+    "**Enter your query about Finite Element Method**",
+    height=120,
+    value=st.session_state.question,
+    help=question_help
+)
+_, col1, col2, _ = st.columns([4, 2, 4, 3])
+with col1:
+    submit_button_placeholder = st.empty()
+with col2:
+    if st.button("🎲 Random Question"):
+        while True:
+            random_question = get_random_question(data_dir + "/questions.txt")
+            if random_question != st.session_state.question:
+                break
+        st.session_state.question = random_question
+        text_area_placeholder.text_area(
+            "**Enter your query about Finite Element Method:**",
+            height=120,
+            value=st.session_state.question,
+            help=question_help
+        )
+# Load YouTube and LaTeX data
+text_data_YT, context_embeddings_YT = load_youtube_data(data_dir, embedding_model, yt_chunk_tokens, yt_overlap_tokens)
+text_data_Latex, context_embeddings_Latex = load_book_data(private_data_dir, embedding_model, latex_chunk_tokens, latex_overlap_tokens)
+summary = load_summary(data_dir + '/KG_FEM_summary.json')
+# Initialize session state variables
+if 'question_answered' not in st.session_state:
+    st.session_state.question_answered = False
+if 'context_by_video' not in st.session_state:
+    st.session_state.context_by_video = {}
+if 'context_by_section' not in st.session_state:
+    st.session_state.context_by_section = {}
+if 'answer' not in st.session_state:
+    st.session_state.answer = ""
+if 'thinking' not in st.session_state:
+    st.session_state.thinking = ""
+if 'playing_video_id' not in st.session_state:
+    st.session_state.playing_video_id = None
+if 'yt_context_for_display' not in st.session_state:
+    st.session_state.yt_context_for_display = ""
+if 'latex_context_count' not in st.session_state:
+    st.session_state.latex_context_count = 0
+if 'video_context_count' not in st.session_state:
+    st.session_state.video_context_count = 0
+if submit_button_placeholder.button("AI Answer", type="primary"):
+    if st.session_state.question == "":
+        st.markdown("")
+        st.write("Please enter a query. :smirk:")
+        st.session_state.question_answered = False
+    else:
+        with st.spinner("Finding relevant contexts..."):
+            if embedding_model == "all-MiniLM-L6-v2":
+                question_embedding = embed_question_sentence_transformer(st.session_state.question, model_name="all-MiniLM-L6-v2")
+            elif embedding_model ==  "text-embedding-3-small":
+                question_embedding = embed_question_openai(st.session_state.question, embedding_model)
+            initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
+            idx_YT = fixed_knn_retrieval(question_embedding, context_embeddings_YT, top_k=top_k_YT, min_k=0)
+            idx_Latex = fixed_knn_retrieval(question_embedding, context_embeddings_Latex, top_k=top_k_Latex, min_k=0)
+            relevant_contexts_YT = sorted([text_data_YT[i] for i in idx_YT], key=lambda x: x['order'])
+            relevant_contexts_Latex = sorted([text_data_Latex[i] for i in idx_Latex], key=lambda x: x['order'])
+            st.session_state.context_by_video = {}
+            for context_item in relevant_contexts_YT:
+                video_id = context_item['video_id']
+                if video_id not in st.session_state.context_by_video:
+                    st.session_state.context_by_video[video_id] = []
+                st.session_state.context_by_video[video_id].append(context_item)
+            st.session_state.video_context_count = len(st.session_state.context_by_video)
+            st.session_state.context_by_section = {}
+            for context_item in relevant_contexts_Latex:
+                section_id = context_item['section']
+                if section_id not in st.session_state.context_by_section:
+                    st.session_state.context_by_section[section_id] = []
+                st.session_state.context_by_section[section_id].append(context_item)
+            # Build context strings
+            yt_context_string = ''
+            for i, (video_id, contexts) in enumerate(st.session_state.context_by_video.items(), start=1):
+                yt_context_string += f"--- Video {i}: {contexts[0]['title']} ---\n"
+                for context_item in contexts:
+                    start_time = int(context_item['start'])
+                    yt_context_string += f"Timestamp {sec_to_time(start_time)}: {context_item['text']}\n\n"
+            latex_context_string = ''
+            if top_k_Latex > 0:
+                for i, (section_id, contexts) in enumerate(st.session_state.context_by_section.items(), start=1):
+                    latex_context_string += f'--- Textbook Section {i} ({section_id}) ---\n'
+                    for context_item in contexts:
+                        latex_context_string += context_item['text'] + '\n\n'
+            context_for_llm = yt_context_string + latex_context_string
+            st.session_state.yt_context_for_display = fix_latex(yt_context_string)
+            st.session_state.latex_context_count = len(st.session_state.context_by_section)
+        with st.spinner("Answering the question..."):
+            if st.session_state.activate_expert:
+                if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
+                    if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
+                        use_expert = True
+                    elif st.session_state.expert_model == "LLaMA-3.2-11B":
+                        use_expert = False
+                    messages = [
+                        {"role": "system", "content": get_expert_system_prompt()},
+                        {"role": "user", "content": st.session_state.question}
+                    ]
+                    expert_answer = get_custom_inference_endpoint_response(
+                        messages=messages,
+                        use_expert=use_expert,
+                        tokenizer_max_length=500,
+                        do_sample=expert_do_sample,
+                        temperature=expert_temperature if expert_do_sample else None,
+                        top_k=expert_top_k if expert_do_sample else None,
+                        top_p=expert_top_p if expert_do_sample else None,
+                        num_beams=expert_num_beams if not expert_do_sample else 1,
+                        max_new_tokens=expert_max_new_tokens
+                    )
+                else:
+                    expert_answer = openai_domain_specific_answer_generation(
+                        get_expert_system_prompt(),
+                        st.session_state.question,
+                        model=st.session_state.expert_model,
+                        temperature=expert_api_temperature,
+                        top_p=expert_api_top_p
+                    )
+                st.session_state.expert_answer = fix_latex(expert_answer)
+            else:
+                st.session_state.expert_answer = 'No Expert Answer. Only use the context.'
+            if st.session_state.synthesis_model == "DeepSeek-R1-0528-Qwen3-8B":
+                messages = [
+                    {"role": "system", "content": get_synthesis_system_prompt("Finite Element Method")},
+                    {"role": "user", "content": get_synthesis_user_prompt(st.session_state.question, st.session_state.expert_answer, context_for_llm)}
+                ]
+                raw_synthesis_answer = get_inference_endpoint_response(
+                    model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
+                    messages=messages,
+                    temperature=synthesis_deepseek_temperature,
+                    top_p=synthesis_deepseek_top_p,
+                    max_tokens=synthesis_deepseek_max_tokens
+                )
+                print(raw_synthesis_answer)
+                thinking, synthesis_answer = parse_thinking_response(raw_synthesis_answer)
+                st.session_state.thinking = thinking
+            else:
+                synthesis_answer = openai_context_integration(
+                    get_synthesis_system_prompt("Finite Element Method"),
+                    st.session_state.question,
+                    st.session_state.expert_answer,
+                    context_for_llm,
+                    model=st.session_state.synthesis_model,
+                    temperature=synthesis_api_temperature,
+                    top_p=synthesis_api_top_p
+                )
+        # quick check after getting the answer
+        if synthesis_answer.split()[0] == "NOT_ENOUGH_INFO":
+            st.markdown("")
+            st.markdown("#### Query", unsafe_allow_html=True)
+            st.markdown(fix_latex(st.session_state.question))
+            st.markdown("#### Answer")
+            st.write(":smiling_face_with_tear:")
+            st.markdown(synthesis_answer.split('NOT_ENOUGH_INFO')[1])
+            st.divider()
+            st.caption(get_disclaimer())
+            st.session_state.question_answered = False
+            st.stop()
+        else:
+            st.session_state.answer = fix_latex(synthesis_answer)
+            st.session_state.question_answered = True
+if st.session_state.question_answered:
+    st.divider()
+    st.markdown("#### Query", unsafe_allow_html=True)
+    st.markdown(fix_latex(st.session_state.question))
+    # st.markdown(" ")
+    st.markdown("#### Inference and Reasoning")
+    # Expander for Initial Expert Answer
+    if st.session_state.activate_expert and 'expert_answer' in st.session_state:
+        with st.expander("Initial Expert Answer", expanded=False):
+            st.info(f"This is the initial answer from the expert model ({st.session_state.expert_model}), used as a starting point for the final synthesis.", icon="🧑‍🏫")
+            st.markdown(st.session_state.expert_answer)
+    # Expander for Retrieved Context
+    if 'yt_context_for_display' in st.session_state and st.session_state.yt_context_for_display:
+        with st.expander("Retrieved Context", expanded=False):
+            st.info("This is the raw context retrieved from the knowledge base to inform the final answer.", icon="📚")
+            if 'video_context_count' in st.session_state and st.session_state.video_context_count > 0:
+                st.success(f"Found {st.session_state.video_context_count} relevant video transcript(s) containing retrieved content.", icon="📺")
+            st.markdown(st.session_state.yt_context_for_display)
+            if 'latex_context_count' in st.session_state and st.session_state.latex_context_count > 0:
+                st.info(f"Additionally, {st.session_state.latex_context_count} relevant sections were found in the textbook: *The Finite Element Method: Linear Static and Dynamic Finite Element Analysis* by Thomas J. R. Hughes · 2012.", icon="📚")
+    # Expander for Model's Thinking Process
+    if st.session_state.synthesis_model == "DeepSeek-R1-0528-Qwen3-8B" and 'thinking' in st.session_state and st.session_state.thinking:
+        with st.expander(":blue[**Model's Thinking Process**]", expanded=False):
+            st.info(f"This is the reasoning from the synthesis model ({st.session_state.synthesis_model}) used to synthesize the final answer.", icon="🤔")
+            st.markdown(st.session_state.thinking)
+    # st.markdown("---")
+    st.markdown("#### Answer")
+    st.markdown(st.session_state.answer)
+    st.markdown(" ")
+    if top_k_YT > 0:
+        st.markdown("#### Retrieved content in lecture videos")
+        for i, (video_id, contexts) in enumerate(st.session_state.context_by_video.items(), start=1):
+            with st.container(border=True):
+                st.markdown(f"**Video {i} | {contexts[0]['title']}**")
+                video_placeholder = st.empty()
+                video_placeholder.markdown(get_youtube_embed(video_id, 0, 0), unsafe_allow_html=True)
+                st.markdown('')
+                with st.container(border=False):
+                    st.markdown("Retrieved Times")
+                    cols = st.columns([1 for i in range(len(contexts))] + [9 - len(contexts)])
+                    for j, context_item in enumerate(contexts):
+                        start_time = int(context_item['start'])
+                        label = sec_to_time(start_time)
+                        if cols[j].button(label, key=f"{video_id}_{start_time}"):
+                            if st.session_state.playing_video_id is not None:
+                                st.session_state.playing_video_id = None
+                                video_placeholder.empty()
+                            video_placeholder.markdown(get_youtube_embed(video_id, start_time, 1), unsafe_allow_html=True)
+                            st.session_state.playing_video_id = video_id
+                with st.expander("Video Summary", expanded=False):
+                    st.markdown(summary[video_id])
+    if show_textbook and top_k_Latex > 0:
+        st.markdown("#### Retrieved content in textbook",help="The Finite Element Method: Linear Static and Dynamic Finite Element Analysis")
+        for i, (section_id, contexts) in enumerate(st.session_state.context_by_section.items(), start=1):
+            st.markdown(f"**Section {i} | {section_id}**")
+            for context_item in contexts:
+                st.markdown(context_item['text'])
+                st.divider()
+st.markdown(" ")
+st.divider()
+st.caption(get_disclaimer())

data/KG_FEM_summary.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9179743956afab296c02eec5b6b82f8bc49a449721c46ddd9634d7c4be4053a
+size 203883

data/questions.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d23dbdf124c14dfb1c2224708711b4c892b5874f800e7e06240261f229468d61
+size 456242

data/yt_embedding_space_all-MiniLM-L6-v2_tpc128_o32.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25393a46b720884076694fc10f9edb80a67003e452a9187077c69c632f2d45dd
+size 36670448

data/yt_embedding_space_all-MiniLM-L6-v2_tpc256_o64.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb839d14084e8c305100359eabaefb517b83c9673368cb09b0da23673ce05df3
+size 17898177

data/yt_embedding_space_text-embedding-3-small_tpc1024_o256.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0bb1fc517dfa8eeea1f7545bec577556e7bd170ec885cdf25eab3f5d665d2ba
+size 17109772

data/yt_embedding_space_text-embedding-3-small_tpc256_o64.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f69373b4df1d9bef5a64a68d898c596fee6007c26a7ac6ff58d56f786c93d60
+size 62427532

data/yt_embedding_space_text-embedding-3-small_tpc512_o128.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1cc71a26650538650196102342e9f48242eedf5ed2bbf1fb4c2299691edd6c6
+size 31637503

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy==1.26.3
+openai==1.57.0
+sentence-transformers==2.7.0
+streamlit==1.43.2
+python-dotenv==1.0.1

utils/endpoint_utils.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import requests
+import time
+import os
+import re
+import json
+from huggingface_hub import InferenceClient
+# Helper function to parse the response
+def parse_thinking_response(response_text):
+    """
+    Parses a model's response to separate the thinking process
+    from the final answer.
+    """
+    match = re.search(r"<think>(.*?)</think>(.*)", response_text, re.DOTALL)
+    if match:
+        thinking = match.group(1).strip()
+        final_answer = match.group(2).strip()
+        return thinking, final_answer
+    else:
+        return None, response_text.strip()
+def get_inference_endpoint_response(
+    model,
+    messages,
+    temperature,
+    top_p,
+    max_tokens
+):
+    """
+    Serverless API (Pay-as-you-go)
+    """
+    client = InferenceClient(
+        provider="auto",
+        api_key=os.getenv("HF_API_KEY")
+    )
+    completion = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        temperature=temperature,
+        top_p=top_p,
+        max_tokens=max_tokens
+    )
+    # Get the raw response content
+    raw_response = completion.choices[0].message.content
+    return raw_response
+def get_custom_inference_endpoint_response(
+    messages: list,
+    use_expert: bool = True,
+    tokenizer_max_length: int = 512,
+    do_sample: bool = False,
+    temperature: float = 0.6,
+    top_k: int = 50,
+    top_p: float = 0.95,
+    num_beams: int = 1,
+    max_new_tokens: int = 1024,
+    **kwargs  # To catch any other unused arguments
+):
+    """
+    Contacts a custom Hugging Face inference endpoint with retry logic.
+    This function is tailored to a custom EndpointHandler that expects a specific
+    payload structure: {"inputs": {"messages": [...], "settings": {...}}}.
+    """
+    endpoint_url = os.getenv("HF_ENDPOINT_URL")
+    hf_endpoint_token = os.getenv("HF_ENDPOINT_TOKEN")
+    if not endpoint_url or not hf_endpoint_token:
+        return "Error: HF_ENDPOINT_URL and HF_ENDPOINT_TOKEN environment variables must be set."
+    headers = {
+        "Authorization": f"Bearer {hf_endpoint_token}",
+        "Content-Type": "application/json"
+    }
+    # --- PAYLOAD STRUCTURE FOR THE CUSTOM ENDPOINT HANDLER ---
+    # This handler expects a 'settings' dictionary nested inside 'inputs'.
+    settings = {
+        "use_expert": use_expert,
+        "tokenizer_max_length": tokenizer_max_length,
+        "do_sample": do_sample,
+        "temperature": temperature,
+        "top_k": top_k,
+        "top_p": top_p,
+        "num_beams": num_beams,
+        "max_new_tokens": max_new_tokens,
+    }
+    # The server-side EndpointHandler is designed to handle parameter logic,
+    # so we send all parameters from the client.
+    # The final payload must match the nested structure the custom handler expects.
+    payload = {
+        "inputs": {
+            "messages": messages,
+            "settings": settings
+        }
+    }
+    # --- Retry Logic ---
+    max_retries = 5
+    wait_time = 30  # seconds to wait between retries
+    for attempt in range(max_retries):
+        print(f"Attempting to contact endpoint, attempt {attempt + 1}/{max_retries}...")
+        # Log the exact payload being sent for easier debugging
+        print(f"Payload: {json.dumps(payload, indent=2)}")
+        try:
+            response = requests.post(endpoint_url, headers=headers, json=payload)
+            # Raise an exception for bad status codes (4xx or 5xx)
+            response.raise_for_status()
+            result = response.json()
+            print(f"Success! Response: {result}")
+            # The custom handler returns a dictionary with a 'response' key.
+            # This parsing logic correctly extracts it.
+            return result.get('response', 'Error: "response" key not found in the result.')
+        except requests.exceptions.HTTPError as errh:
+            # Handle specific 503 error for model loading
+            if errh.response.status_code == 503 and attempt < max_retries - 1:
+                print(f"Service Unavailable (503). Endpoint may be starting up. Retrying in {wait_time} seconds...")
+                time.sleep(wait_time)
+            else:
+                error_message = f"HTTP Error: {errh}\nResponse: {errh.response.text}"
+                print(error_message)
+                return error_message
+        except requests.exceptions.RequestException as err:
+            error_message = f"Request Error: {err}"
+            print(error_message)
+            return error_message
+        except json.JSONDecodeError:
+            error_message = f"JSON Decode Error: Failed to parse response from server.\nResponse Text: {response.text}"
+            print(error_message)
+            return error_message
+    return "Error: Failed to get a response after multiple retries."

utils/format.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import re
+def sec_to_time(start_time):
+    return f"{start_time // 60:02}:{start_time % 60:02}"
+def fix_latex(text):
+    text = re.sub(r"\\\(", r"$",text)
+    text = re.sub(r"\\\)", r"$",text)
+    text = re.sub(r"\\\[", r"$$",text)
+    text = re.sub(r"\\\]", r"$$",text)
+    return text
+def get_youtube_embed(video_id, start_time=0, autoplay=0):
+    embed_code = f'''
+    <div class="video-wrapper">
+        <iframe src="https://www.youtube.com/embed/{video_id}?start={start_time}&autoplay={autoplay}&rel=0"
+        frameborder="0" allowfullscreen></iframe>
+    </div>
+    '''
+    return embed_code

utils/help.py ADDED Viewed

	@@ -0,0 +1,26 @@

+def get_intro():
+    return """
+Welcome to <span style='color:red'><a href='https://my-ai-university.com/' target='_blank' style='text-decoration: none; color: red;'>AI University</a></span> — an AI-powered platform designed to address scientific course queries, dynamically adapting to instructors' teaching styles and students' learning needs.
+This prototype showcases the capabilities of the <span style='color:red'><a href='https://github.com/my-ai-university' target='_blank' style='text-decoration: none; color: red;'>AI University platform</a></span> by providing expert answers to queries related to a graduate-level <span style='color:red'><a href='https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ' target='_blank' style='text-decoration: none; color: red;'>Finite Element Method (FEM)</a></span> course.
+"""
+def get_disclaimer():
+    return """
+:gray[AI University is developed at the University of Southern California by Mostafa Shojaei, Rahul Gulati, Benjamin Jasperson, Shangshang Wang, Simone Cimolato, Dangli Cao, Willie Neiswanger, and Krishna Garikipati.]
+:gray[**Resources:**]
+[AI University](https://my-ai-university.com/),
+&nbsp; [ArXiv](https://arxiv.org/abs/2504.08846),
+&nbsp; [GitHub](https://github.com/my-ai-university/finite-element-method),
+&nbsp; [HuggingFace](https://huggingface.co/my-ai-university),
+&nbsp; [WandB](https://wandb.ai/my-ai-university/finite-element-method)
+:gray[**Main Data Sources:**]
+[Introduction to Finite Element Methods (FEM) by Prof. Krishna Garikipati](https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ),
+&nbsp; [The Finite Element Method: Linear Static and Dynamic Finite Element Analysis by Thomas J. R. Hughes](https://www.google.com/books/edition/_/cHH2n_qBK0IC?hl=en).
+:gray[**Disclaimer and Copyright Notice:**] :gray[1. AI-Generated Responses: Answers are generated using AI and, while thorough, may not always be 100% accurate. Please verify the information independently. 2. Content Ownership: All video content and lecture material referenced belong to their original creators. We encourage users to view the original material on verified platforms to ensure authenticity and accuracy. 3. Educational Fair Use: This tool is intended solely for educational purposes and operates under the principles of fair use. It is not authorized for commercial applications.]
+"""

utils/openai_utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()  #
+#--------------------------------------------------------
+# Initialize OpenAI client
+#--------------------------------------------------------
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+def embed_question_openai(texts, model="text-embedding-3-small"):
+    response = client.embeddings.create(
+        input=texts,
+        model=model
+    )
+    return response.data[0].embedding
+def openai_domain_specific_answer_generation(system_prompt, question, model="gpt4o-mini", temperature=0.3, top_p=0.1):
+    prompt = f"""
+    Question:
+    {question}
+    Answer (provide a precise, domain-specific response):
+    """
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": system_prompt
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+        temperature=temperature,      # Set low for deterministic and precise responses.
+        top_p=top_p,                  # Focus on high-probability outputs to ensure accuracy.
+        frequency_penalty=0.1,        # Reduce repetition of technical terms.
+        presence_penalty=0.0          # Prevent introduction of unrelated ideas.
+    )
+    return response.choices[0].message.content
+def openai_context_integration(system_prompt, query, expert_answer, retrieved_context, model="gpt4o-mini", temperature=0.3, top_p=0.3):
+    prompt = f"""
+    Question:
+    {query}
+    Direct Answer:
+    {expert_answer}
+    Retrieved Context:
+    {retrieved_context}
+    Final Answer:
+    """
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": system_prompt
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+        temperature=temperature,      # Maintain some flexibility for smooth blending.
+        top_p=top_p,                  # Prioritize high-probability outputs to stay focused on the inputs.
+        frequency_penalty=0.1,        # Allow necessary repetition for clarity.
+        presence_penalty=0.0          # Neutral to avoid introducing unrelated ideas.
+    )
+    return response.choices[0].message.content

utils/rag_utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import json
+import numpy as np
+import random
+import streamlit as st
+from sentence_transformers import SentenceTransformer
+@st.cache_resource
+def load_youtube_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
+    embedding_space_file_name = f'{base_path}/yt_embedding_space_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
+    with open(embedding_space_file_name, 'r') as json_file:
+        loaded_data = json.load(json_file)
+    embedding_space = np.array(loaded_data['embedding_space'])
+    return loaded_data['chunks'], embedding_space
+@st.cache_resource
+def load_book_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
+    embedding_space_file_name = f'{base_path}/latex_embedding_space_by_sections_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
+    with open(embedding_space_file_name, 'r') as json_file:
+        loaded_data = json.load(json_file)
+    embedding_space = np.array(loaded_data['embedding_space'])
+    return loaded_data['chunks'], embedding_space
+@st.cache_resource
+def load_summary(file_path):
+    with open(file_path, 'r') as file:
+        transcripts = json.load(file)
+    return transcripts
+def embed_question_sentence_transformer(texts, model_name="sentence-transformers/all-MiniLM-L6-v2"):
+    model = SentenceTransformer(model_name)
+    embeddings = model.encode(texts)
+    return embeddings.tolist()
+def fixed_knn_retrieval(question_embedding, context_embeddings, top_k=5, min_k=1):
+    question_embedding = np.array(question_embedding)
+    # Normalize
+    question_embedding = question_embedding / np.linalg.norm(question_embedding)
+    context_embeddings = context_embeddings / np.linalg.norm(context_embeddings, axis=1, keepdims=True)
+    # Calculate cosine similarities between the question embedding and all context embeddings.
+    similarities = np.dot(context_embeddings, question_embedding)
+    # Sort the similarities in descending order and get the corresponding indices.
+    sorted_indices = np.argsort(similarities)[::-1]
+    # Select the top_k most similar contexts, ensuring at least min_k contexts are selected.
+    selected_indices = sorted_indices[:max(top_k, min_k)].tolist()
+    return selected_indices
+def get_random_question(text_file):
+    with open(text_file, "r") as file:
+        questions = [line.strip() for line in file]
+    return random.choice(questions)

utils/system_prompts.py ADDED Viewed

	@@ -0,0 +1,66 @@

+def get_expert_system_prompt():
+    # system_prompt = f"""
+    # You are a highly specialized assistant for the subject Finite Element Method (FEM). Provide a direct and focused answer to the following question based on your specialized training.
+    # """
+    system_prompt = (
+    "You are an AI professor for a Finite Element Method (FEM) course. "
+    "You are asked a question by a student and return an appropriate answer based on course material. "
+    "Your response focuses on FEM fundamentals, theories, and applications as presented in the course. "
+    "Use standard latex notation when replying with mathematical notation."
+    )
+    return system_prompt
+def get_synthesis_user_prompt(question,direct_answer,context):
+    return f"""
+Question:
+{question}
+Direct Answer:
+{direct_answer}
+Retrieved Context:
+{context}
+Final Answer:
+"""
+def get_synthesis_system_prompt(subject_matter="Finite Element Method (FEM)"):
+    """
+    Returns the system prompt for the synthesis model.
+    This version incorporates the user's detailed instructions for guided synthesis.
+    """
+    system_prompt = f"""
+You are an AI teaching assistant for a {subject_matter} course. Your task is to synthesize a final, high-quality answer to the student's **Question** by intelligently integrating two sources: a preliminary **Direct Answer** and the official **Retrieved Context** from the course materials.
+By synthesizing we mean that your final answer must always be grounded **exclusively** in the provided **Direct Answer** and **Retrieved Context**. Therefore, never use any external knowledge including your existing knowledge.
+IMPORTANT INITIAL CHECK: Analyze the provided **Question**, **Direct Answer**, and **Retrieved Context**.
+If the **Direct Answer** AND the **Retrieved Context** together lack sufficient information to answer the **Question**, respond EXACTLY as follows and then STOP:
+"NOT_ENOUGH_INFO The provided context doesn't contain enough information to fully answer this question. You may want to increase the number of relevant context passages or adjust the options and try again."
+Else continue with the remaining guidelines.
+Guidelines:
+1. Your primary synthesizing goal is to use the **Retrieved Context** to validate, improve, and expand upon the **Direct Answer**.
+    a. If the **Direct Answer** is accurate and relevant, use it as the foundation for your response. Your task is then to enrich it by weaving in specific details, examples, and citations from the **Retrieved Context** to create a more comprehensive and well-supported answer.
+    b. If the **Direct Answer** is poor, inaccurate, or irrelevant, you should rely more heavily on the **Retrieved Context** to construct the correct answer from the ground up.
+2. Referencing:
+    a. Always cite your sources by referencing the video number and the given time in brackets and **bold** (e.g., [**Video 3, time 03:14**]) after each piece of information you use in your answer.
+    b. You may cite multiple references if they discuss the same content (e.g., [**Video 3, time 03:14; Video 1, time 12:04**]). However, try to reference them separately if they cover different aspects of the answer.
+3. Style and Formatting:
+    a. Provide the answer in markdown format. Any latex formating should be converted to an equivalent markdown format.
+    b. Do not use any titles, sections, or subsections. Use mainly paragraphs. Bold text, items, and bullet points if it helps.
+    c. Symbols and equations within the text MUST be placed between $ and $, e.g., $x=0$ is the min of $\sigma(x)=x^2$.
+    d. For equations between paragraphs, use \\n\\n$ and $\\n\\n. For example, in the following equation: \\n\\n$ E = mc^2 $\\n\\n, note $c$ as the speed of light. Remove any equation number/tags in the raw data.
+4. Use technical language appropriate for a {subject_matter} course, but be prepared to explain complex terms if asked.
+5. If the question involves calculations, show your work step-by-step, citing the relevant formulas or methods from the context.
+    """
+    return system_prompt