Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Sleeping

App Files Files Community

awacke1 commited on Dec 19, 2024

Commit

7938082

verified ·

1 Parent(s): 30a0d44

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -215

app.py CHANGED Viewed

@@ -16,6 +16,15 @@ import requests
 from collections import defaultdict
 from audio_recorder_streamlit import audio_recorder
 import streamlit.components.v1 as components
 # Initialize session state
 if 'search_history' not in st.session_state:
@@ -30,6 +39,10 @@ if 'search_columns' not in st.session_state:
     st.session_state['search_columns'] = []
 if 'initial_search_done' not in st.session_state:
     st.session_state['initial_search_done'] = False
 class VideoSearch:
     def __init__(self):
@@ -37,11 +50,10 @@ class VideoSearch:
         self.load_dataset()
     def fetch_dataset_rows(self):
-        """Fetch dataset from Hugging Face API"""
         try:
             url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
             response = requests.get(url, timeout=30)
             if response.status_code == 200:
                 data = response.json()
                 if 'rows' in data:
@@ -63,8 +75,7 @@ class VideoSearch:
                                                         if col not in ['video_embed', 'description_embed', 'audio_embed']]
                     return df
             return self.load_example_data()
-        except Exception as e:
             return self.load_example_data()
     def prepare_features(self):
@@ -88,7 +99,7 @@ class VideoSearch:
                     if data:
                         embeddings[col] = np.array(data)
-                except Exception as e:
                     continue
             # Set main embeddings for search
@@ -102,14 +113,13 @@ class VideoSearch:
             else:
                 self.text_embeds = self.video_embeds
-        except Exception as e:
             # Fallback to random embeddings
             num_rows = len(self.dataset)
             self.video_embeds = np.random.randn(num_rows, 384)
             self.text_embeds = np.random.randn(num_rows, 384)
     def load_example_data(self):
-        """Load example data as fallback"""
         example_data = [
             {
                 "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
@@ -129,27 +139,22 @@ class VideoSearch:
         self.prepare_features()
     def search(self, query, column=None, top_k=20):
-        """Search videos using query with column filtering"""
-        # Semantic search
         query_embedding = self.text_model.encode([query])[0]
         video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
         text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
         combined_sims = 0.5 * video_sims + 0.5 * text_sims
-        # Column-specific text search if specified
-        if column and column in self.dataset.columns:
             mask = self.dataset[column].astype(str).str.contains(query, case=False)
-            combined_sims[~mask] *= 0.5  # Reduce scores for non-matching rows
-        # Get top results
         top_k = min(top_k, 100)
         top_indices = np.argsort(combined_sims)[-top_k:][::-1]
         results = []
         for idx in top_indices:
-            result = {
-                'relevance_score': float(combined_sims[idx])
-            }
             for col in self.dataset.columns:
                 if col not in ['video_embed', 'description_embed', 'audio_embed']:
                     result[col] = self.dataset.iloc[idx][col]
@@ -157,127 +162,19 @@ class VideoSearch:
         return results
-def main():
-    st.title("🎥 Video Search with Speech Recognition")
-    # Initialize search
-    search = VideoSearch()
-    # Create tabs
-    tab1, tab2, tab3 = st.tabs(["🔍 Search", "🎙️ Voice Input", "📂 Files"])
-    with tab1:
-        st.subheader("Search Videos")
-        # Search interface
-        col1, col2 = st.columns([3, 1])
-        with col1:
-            query = st.text_input("Enter your search query:", value="ancient" if not st.session_state['initial_search_done'] else "")
-        with col2:
-            search_column = st.selectbox("Search in field:",
-                                       ["All Fields"] + st.session_state['search_columns'])
-        col3, col4 = st.columns(2)
-        with col3:
-            num_results = st.slider("Number of results:", 1, 100, 20)
-        with col4:
-            search_button = st.button("🔍 Search")
-        # Process search
-        if (search_button or not st.session_state['initial_search_done']) and query:
-            st.session_state['initial_search_done'] = True
-            selected_column = None if search_column == "All Fields" else search_column
-            results = search.search(query, selected_column, num_results)
-            st.session_state['search_history'].append({
-                'query': query,
-                'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                'results': results[:5]  # Store only top 5 for history
-            })
-            for i, result in enumerate(results, 1):
-                with st.expander(f"Result {i}: {result['description'][:100]}...",
-                               expanded=i==1):
-                    cols = st.columns([2, 1])
-                    with cols[0]:
-                        st.markdown("**Description:**")
-                        st.write(result['description'])
-                        st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
-                        st.markdown(f"**Views:** {result['views']:,}")
-                    with cols[1]:
-                        st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
-                        if result.get('youtube_id'):
-                            st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
-                        if st.button(f"🔊 Audio Summary", key=f"audio_{i}"):
-                            summary = f"Video summary: {result['description'][:200]}"
-                            audio_file = asyncio.run(generate_speech(summary))
-                            if audio_file:
-                                st.audio(audio_file)
-                                if os.path.exists(audio_file):
-                                    os.remove(audio_file)
-    with tab2:
-        st.subheader("Voice Input")
-        col1, col2 = st.columns(2)
-        with col1:
-            st.write("🎙️ Speech Recognition")
-            voice_input = speech_component()
-            if voice_input and voice_input != st.session_state['last_voice_input']:
-                st.session_state['last_voice_input'] = voice_input
-                st.markdown("**Transcribed Text:**")
-                st.write(voice_input)
-                if st.button("🔍 Search"):
-                    results = search.search(voice_input, None, num_results)
-                    for i, result in enumerate(results, 1):
-                        with st.expander(f"Result {i}", expanded=i==1):
-                            st.write(result['description'])
-                            if result.get('youtube_id'):
-                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
-        with col2:
-            st.write("🎵 Audio Recording")
-            audio_bytes = audio_recorder()
-            if audio_bytes:
-                audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
-                with open(audio_path, "wb") as f:
-                    f.write(audio_bytes)
-                st.success("Audio recorded successfully!")
-                if os.path.exists(audio_path):
-                    os.remove(audio_path)
-    with tab3:
-        show_file_manager()
-    # Sidebar
-    with st.sidebar:
-        st.subheader("⚙️ Settings & History")
-        if st.button("🗑️ Clear History"):
-            st.session_state['search_history'] = []
-            st.rerun()
-        st.markdown("### Recent Searches")
-        for entry in reversed(st.session_state['search_history'][-5:]):
-            with st.expander(f"{entry['timestamp']}: {entry['query']}"):
-                for i, result in enumerate(entry['results'], 1):
-                    st.write(f"{i}. {result['description'][:100]}...")
-        st.markdown("### Voice Settings")
-        st.selectbox("TTS Voice:",
-                    ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
-                    key="tts_voice")
-async def generate_speech(text, voice="en-US-AriaNeural"):
-    """Generate speech using Edge TTS"""
     if not text.strip():
         return None
     try:
-        communicate = edge_tts.Communicate(text, voice)
         audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
         await communicate.save(audio_file)
         return audio_file
@@ -285,11 +182,19 @@ async def generate_speech(text, voice="en-US-AriaNeural"):
         st.error(f"Error generating speech: {e}")
         return None
 def show_file_manager():
     """Display file manager interface"""
     st.subheader("📂 File Manager")
-    # File operations
     col1, col2 = st.columns(2)
     with col1:
         uploaded_file = st.file_uploader("Upload File", type=['txt', 'md', 'mp3'])
@@ -297,16 +202,15 @@ def show_file_manager():
             with open(uploaded_file.name, "wb") as f:
                 f.write(uploaded_file.getvalue())
             st.success(f"Uploaded: {uploaded_file.name}")
-            st.rerun()
     with col2:
         if st.button("🗑 Clear All Files"):
             for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
                 os.remove(f)
             st.success("All files cleared!")
-            st.rerun()
-    # Show existing files
     files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
     if files:
         st.write("### Existing Files")
@@ -315,52 +219,54 @@ def show_file_manager():
                 if f.endswith('.mp3'):
                     st.audio(f)
                 else:
-                    with open(f, 'r') as file:
                         st.text_area("Content", file.read(), height=100)
                 if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
                     os.remove(f)
-                    st.rerun()
-@st.cache_data(ttl=3600)
-def load_file_list():
-    """Cache file listing"""
-    return glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
-@st.cache_resource
-def get_speech_model():
-    """Cache speech model initialization"""
-    return edge_tts.Communicate
-async def generate_speech(text, voice="en-US-AriaNeural"):
-    """Generate speech using Edge TTS with cached model"""
-    if not text.strip():
-        return None
-    try:
-        communicate = get_speech_model()(text, voice)
-        audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
-        await communicate.save(audio_file)
-        return audio_file
-    except Exception as e:
-        st.error(f"Error generating speech: {e}")
-        return None
 def main():
-    st.title("🎥 Video Search with Speech Recognition")
-    # Initialize search with cached model
     search = VideoSearch()
     # Create tabs
-    tab1, tab2, tab3 = st.tabs(["🔍 Search", "🎙️ Voice Input", "📂 Files"])
     with tab1:
         st.subheader("Search Videos")
-        # Search interface
         col1, col2 = st.columns([3, 1])
         with col1:
             query = st.text_input("Enter your search query:",
-                                value="ancient" if not st.session_state['initial_search_done'] else "")
         with col2:
             search_column = st.selectbox("Search in field:",
                                        ["All Fields"] + st.session_state['search_columns'])
@@ -370,8 +276,7 @@ def main():
             num_results = st.slider("Number of results:", 1, 100, 20)
         with col4:
             search_button = st.button("🔍 Search")
-        # Process search
         if (search_button or not st.session_state['initial_search_done']) and query:
             st.session_state['initial_search_done'] = True
             selected_column = None if search_column == "All Fields" else search_column
@@ -381,12 +286,11 @@ def main():
             st.session_state['search_history'].append({
                 'query': query,
                 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                'results': results[:5]  # Store only top 5 for history
             })
             for i, result in enumerate(results, 1):
-                with st.expander(f"Result {i}: {result['description'][:100]}...",
-                               expanded=i==1):
                     cols = st.columns([2, 1])
                     with cols[0]:
                         st.markdown("**Description:**")
@@ -404,68 +308,77 @@ def main():
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
-                                if os.path.exists(audio_file):
-                                    os.remove(audio_file)
     with tab2:
         st.subheader("Voice Input")
-        col1, col2 = st.columns(2)
-        with col1:
-            st.write("🎙️ Speech Recognition")
-        with col2:
-            st.write("🎵 Audio Recording")
-            audio_bytes = audio_recorder()
-            if audio_bytes:
-                audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
-                with open(audio_path, "wb") as f:
-                    f.write(audio_bytes)
-                st.success("Audio recorded successfully!")
-                if os.path.exists(audio_path):
-                    os.remove(audio_path)
     with tab3:
         show_file_manager()
     # Sidebar
     with st.sidebar:
         st.subheader("⚙️ Settings & History")
         if st.button("🗑️ Clear History"):
             st.session_state['search_history'] = []
-            st.rerun()
         st.markdown("### Recent Searches")
         for entry in reversed(st.session_state['search_history'][-5:]):
             with st.expander(f"{entry['timestamp']}: {entry['query']}"):
                 for i, result in enumerate(entry['results'], 1):
                     st.write(f"{i}. {result['description'][:100]}...")
         st.markdown("### Voice Settings")
-        st.selectbox("TTS Voice:",["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],key="tts_voice")
-    with col2:
-        if st.button("🗑 Clear All Files"):
-            for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
-                os.remove(f)
-            st.success("All files cleared!")
-    # Show existing files
-    files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
-    if files:
-        st.write("### Existing Files")
-        for f in files:
-            with st.expander(f"📄 {os.path.basename(f)}"):
-                if f.endswith('.mp3'):
-                    st.audio(f)
-                else:
-                    with open(f, 'r') as file:
-                        st.text_area("Content", file.read(), height=100)
-                if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
-                    os.remove(f)
-                    st.rerun()
 if __name__ == "__main__":
-    main()

 from collections import defaultdict
 from audio_recorder_streamlit import audio_recorder
 import streamlit.components.v1 as components
+import openai
+from dotenv import load_dotenv
+# Load environment
+load_dotenv()
+openai.api_key = os.getenv('OPENAI_API_KEY')
+# Ensure edge_tts and other dependencies are installed
+# pip install edge-tts openai streamlit-audiorecorder
 # Initialize session state
 if 'search_history' not in st.session_state:
     st.session_state['search_columns'] = []
 if 'initial_search_done' not in st.session_state:
     st.session_state['initial_search_done'] = False
+if 'tts_voice' not in st.session_state:
+    st.session_state['tts_voice'] = "en-US-AriaNeural"
+if 'arxiv_last_query' not in st.session_state:
+    st.session_state['arxiv_last_query'] = ""
 class VideoSearch:
     def __init__(self):
         self.load_dataset()
     def fetch_dataset_rows(self):
+        """Fetch dataset from HF API"""
         try:
             url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
             response = requests.get(url, timeout=30)
             if response.status_code == 200:
                 data = response.json()
                 if 'rows' in data:
                                                         if col not in ['video_embed', 'description_embed', 'audio_embed']]
                     return df
             return self.load_example_data()
+        except Exception:
             return self.load_example_data()
     def prepare_features(self):
                     if data:
                         embeddings[col] = np.array(data)
+                except:
                     continue
             # Set main embeddings for search
             else:
                 self.text_embeds = self.video_embeds
+        except Exception:
             # Fallback to random embeddings
             num_rows = len(self.dataset)
             self.video_embeds = np.random.randn(num_rows, 384)
             self.text_embeds = np.random.randn(num_rows, 384)
     def load_example_data(self):
         example_data = [
             {
                 "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
         self.prepare_features()
     def search(self, query, column=None, top_k=20):
         query_embedding = self.text_model.encode([query])[0]
         video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
         text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
         combined_sims = 0.5 * video_sims + 0.5 * text_sims
+        # Column filtering
+        if column and column in self.dataset.columns and column != "All Fields":
             mask = self.dataset[column].astype(str).str.contains(query, case=False)
+            combined_sims[~mask] *= 0.5
         top_k = min(top_k, 100)
         top_indices = np.argsort(combined_sims)[-top_k:][::-1]
         results = []
         for idx in top_indices:
+            result = {'relevance_score': float(combined_sims[idx])}
             for col in self.dataset.columns:
                 if col not in ['video_embed', 'description_embed', 'audio_embed']:
                     result[col] = self.dataset.iloc[idx][col]
         return results
+# Use edge_tts for TTS
+@st.cache_resource
+def get_speech_model():
+    """Cache speech model initialization."""
+    return edge_tts.Communicate
+async def generate_speech(text, voice=None):
     if not text.strip():
         return None
+    if not voice:
+        voice = st.session_state['tts_voice']
     try:
+        communicate = get_speech_model()(text, voice)
         audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
         await communicate.save(audio_file)
         return audio_file
         st.error(f"Error generating speech: {e}")
         return None
+def transcribe_audio(audio_path):
+    """Transcribe audio using Whisper."""
+    try:
+        with open(audio_path, "rb") as f:
+            transcription = openai.Audio.transcribe("whisper-1", f)
+        return transcription["text"].strip()
+    except Exception as e:
+        st.error(f"Error transcribing audio: {e}")
+        return ""
 def show_file_manager():
     """Display file manager interface"""
     st.subheader("📂 File Manager")
     col1, col2 = st.columns(2)
     with col1:
         uploaded_file = st.file_uploader("Upload File", type=['txt', 'md', 'mp3'])
             with open(uploaded_file.name, "wb") as f:
                 f.write(uploaded_file.getvalue())
             st.success(f"Uploaded: {uploaded_file.name}")
+            st.experimental_rerun()
     with col2:
         if st.button("🗑 Clear All Files"):
             for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
                 os.remove(f)
             st.success("All files cleared!")
+            st.experimental_rerun()
     files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
     if files:
         st.write("### Existing Files")
                 if f.endswith('.mp3'):
                     st.audio(f)
                 else:
+                    with open(f, 'r', encoding='utf-8') as file:
                         st.text_area("Content", file.read(), height=100)
                 if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
                     os.remove(f)
+                    st.experimental_rerun()
+##########################
+# Arxiv Integration      #
+##########################
+# You need to implement or integrate perform_ai_lookup from your second app into this code.
+# This is a placeholder. Replace with your actual perform_ai_lookup function logic.
+# Ensure you have your Arxiv RAG model endpoint available.
+# Example placeholder implementation (replace with your actual second app code):
+def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
+    # Placeholder: In your real code, you'll call your Arxiv RAG endpoint and get results.
+    # Here we just simulate a response.
+    mock_answer = f"This is a mock Arxiv response for query: {q}.\nReferences:\n[Paper 1] Example Title"
+    st.markdown(f"**Arxiv Search Results for '{q}':**\n\n{mock_answer}")
+    if vocal_summary:
+        audio_file = asyncio.run(generate_speech("This is a spoken summary of Arxiv results."))
+        if audio_file:
+            st.audio(audio_file)
+    # Add any other logic: extended_refs, titles_summary, etc.
+    return mock_answer
+############################
+# Main App Layout & Logic  #
+############################
 def main():
+    st.title("🎥 Video & Arxiv Search with Voice")
+    # Initialize search class
     search = VideoSearch()
     # Create tabs
+    tab1, tab2, tab3, tab4 = st.tabs(["🔍 Search", "🎙️ Voice Input", "📚 Arxiv", "📂 Files"])
+    # ---- Tab 1: Video Search ----
     with tab1:
         st.subheader("Search Videos")
         col1, col2 = st.columns([3, 1])
         with col1:
             query = st.text_input("Enter your search query:",
+                                  value="ancient" if not st.session_state['initial_search_done'] else "")
         with col2:
             search_column = st.selectbox("Search in field:",
                                        ["All Fields"] + st.session_state['search_columns'])
             num_results = st.slider("Number of results:", 1, 100, 20)
         with col4:
             search_button = st.button("🔍 Search")
         if (search_button or not st.session_state['initial_search_done']) and query:
             st.session_state['initial_search_done'] = True
             selected_column = None if search_column == "All Fields" else search_column
             st.session_state['search_history'].append({
                 'query': query,
                 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                'results': results[:5]
             })
             for i, result in enumerate(results, 1):
+                with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=(i==1)):
                     cols = st.columns([2, 1])
                     with cols[0]:
                         st.markdown("**Description:**")
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
+                                # Optionally delete after playing:
+                                # if os.path.exists(audio_file):
+                                #    os.remove(audio_file)
+    # ---- Tab 2: Voice Input ----
     with tab2:
         st.subheader("Voice Input")
+        st.write("🎙️ Record your voice and automatically transcribe to text:")
+        audio_bytes = audio_recorder()
+        if audio_bytes:
+            # Save the recorded audio for transcription
+            audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
+            with open(audio_path, "wb") as f:
+                f.write(audio_bytes)
+            st.success("Audio recorded successfully!")
+            # Transcribe using Whisper
+            voice_query = transcribe_audio(audio_path)
+            if voice_query:
+                st.markdown("**Transcribed Text:**")
+                st.write(voice_query)
+                st.session_state['last_voice_input'] = voice_query
+                if st.button("🔍 Search from Voice"):
+                    results = search.search(voice_query, None, 20)
+                    for i, result in enumerate(results, 1):
+                        with st.expander(f"Result {i}", expanded=(i==1)):
+                            st.write(result['description'])
+                            if result.get('youtube_id'):
+                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
+            # Clean up
+            if os.path.exists(audio_path):
+                os.remove(audio_path)
+    # ---- Tab 3: Arxiv Search ----
     with tab3:
+        st.subheader("Arxiv Search")
+        q = st.text_input("Enter your Arxiv search query:", value=st.session_state['arxiv_last_query'])
+        vocal_summary = st.checkbox("🎙 Short Audio Summary", value=True)
+        extended_refs = st.checkbox("📜 Extended References", value=False)
+        titles_summary = st.checkbox("🔖 Titles Only", value=True)
+        full_audio = st.checkbox("📚 Full Audio Results", value=False)
+        if st.button("🔍 Arxiv Search"):
+            st.session_state['arxiv_last_query'] = q
+            perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
+                              titles_summary=titles_summary, full_audio=full_audio)
+    # ---- Tab 4: File Manager ----
+    with tab4:
         show_file_manager()
     # Sidebar
     with st.sidebar:
         st.subheader("⚙️ Settings & History")
         if st.button("🗑️ Clear History"):
             st.session_state['search_history'] = []
+            st.experimental_rerun()
         st.markdown("### Recent Searches")
         for entry in reversed(st.session_state['search_history'][-5:]):
             with st.expander(f"{entry['timestamp']}: {entry['query']}"):
                 for i, result in enumerate(entry['results'], 1):
                     st.write(f"{i}. {result['description'][:100]}...")
         st.markdown("### Voice Settings")
+        st.selectbox("TTS Voice:",
+                     ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
+                     key="tts_voice")
 if __name__ == "__main__":
+    main()