Spaces:

HEHEBOIOG
/

NewsInferno

Sleeping

App Files Files Community

HEHEBOIOG commited on Dec 9, 2024

Commit

6c15522

verified ·

1 Parent(s): cf3027b

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -141

app.py CHANGED Viewed

@@ -7,28 +7,21 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_groq import ChatGroq
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain.memory import ConversationBufferMemory
-from langchain.chains import ConversationalRetrievalChain
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer
 import tavily
-# Evaluation Metrics Libraries
-from rouge_score import rouge_scorer
-from nltk.translate.bleu_score import sentence_bleu
-from nltk.tokenize import word_tokenize
-from sklearn.metrics.pairwise import cosine_similarity
-from textstat import flesch_reading_ease, flesch_kincaid_grade
 class AdvancedRAGChatbot:
     def __init__(self,
                  tavily_api_key: str,
                  embedding_model: str = "BAAI/bge-large-en-v1.5",
                  llm_model: str = "llama-3.3-70b-versatile",
                  temperature: float = 0.7):
-        """Initialize the Advanced RAG Chatbot with Enhanced Metrics"""
         os.environ["TAVILY_API_KEY"] = tavily_api_key
-        # Tavily Client
         self.tavily_client = tavily.TavilyClient(tavily_api_key)
         # NLP Components
@@ -37,69 +30,44 @@ class AdvancedRAGChatbot:
         self.sentiment_analyzer = pipeline("sentiment-analysis")
         self.ner_pipeline = pipeline("ner", aggregation_strategy="simple")
-        # Evaluation Components
-        self.rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
         # Language Model Configuration
         self.llm = self._configure_llm(llm_model, temperature)
         # Conversation Memory
         self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-    def _calculate_comprehensive_metrics(self, query: str, response: str, web_sources: List[Dict]) -> Dict[str, Any]:
-        """Calculate comprehensive evaluation metrics"""
-        metrics = {}
-        # Readability Metrics
-        metrics['flesch_reading_ease'] = flesch_reading_ease(response)
-        metrics['flesch_kincaid_grade'] = flesch_kincaid_grade(response)
-        # Length Metrics
-        metrics['query_length'] = len(word_tokenize(query))
-        metrics['response_length'] = len(word_tokenize(response))
-        # BLEU Score (compared against web sources)
-        reference_texts = [word_tokenize(source.get('content', '')) for source in web_sources]
-        candidate_tokens = word_tokenize(response)
-        bleu_scores = []
-        for ref in reference_texts:
-            try:
-                bleu_score = sentence_bleu([ref], candidate_tokens)
-                bleu_scores.append(bleu_score)
-            except Exception:
-                pass
-        metrics['average_bleu_score'] = np.mean(bleu_scores) if bleu_scores else 0.0
-        # ROUGE Scores
-        reference_text = ' '.join([source.get('content', '') for source in web_sources])
-        rouge_scores = self.rouge_scorer.score(reference_text, response)
-        metrics['rouge_scores'] = {
-            'rouge1': rouge_scores['rouge1'].fmeasure,
-            'rouge2': rouge_scores['rouge2'].fmeasure,
-            'rougeL': rouge_scores['rougeL'].fmeasure
-        }
-        # Semantic Similarity
         try:
-            web_source_embeddings = self.semantic_model.encode([source.get('content', '') for source in web_sources])
-            response_embedding = self.semantic_model.encode([response])[0]
-            semantic_similarities = cosine_similarity([response_embedding], web_source_embeddings)[0]
-            metrics['semantic_similarity'] = {
-                'mean': np.mean(semantic_similarities),
-                'max': np.max(semantic_similarities),
-                'min': np.min(semantic_similarities)
-            }
         except Exception as e:
-            st.warning(f"Semantic similarity calculation error: {e}")
-            metrics['semantic_similarity'] = {'mean': 0, 'max': 0, 'min': 0}
-        return metrics
     def process_query(self, query: str) -> Dict[str, Any]:
-        """Process the user query with comprehensive evaluation"""
         # Web Search
         web_results = self._tavily_web_search(query)
@@ -134,91 +102,109 @@ class AdvancedRAGChatbot:
         # Generate Response
         response = self.llm.invoke(full_prompt)
-        response_content = response.content
-        # Calculate Comprehensive Metrics
-        evaluation_metrics = self._calculate_comprehensive_metrics(
-            query,
-            response_content,
-            web_results
-        )
         return {
-            "response": response_content,
             "web_sources": web_results,
             "semantic_similarity": semantic_score.tolist(),
             "sentiment": sentiment_result,
-            "named_entities": entities,
-            "evaluation_metrics": evaluation_metrics
         }
 def main():
-    # [Previous main function code remains the same]
-    # Add a new section to display comprehensive metrics
-    with col2:
-        st.header("Response & Metrics")
-        if submit_button and user_input:
-            with st.spinner("Searching web and processing query..."):
-                try:
-                    response = chatbot.process_query(user_input)
-                    # Existing response display code...
-                    # Comprehensive Metrics Display
-                    st.markdown("### 📊 Comprehensive Evaluation Metrics")
-                    # Readability Metrics
-                    col_read1, col_read2 = st.columns(2)
-                    with col_read1:
-                        st.metric(
-                            "Flesch Reading Ease",
-                            f"{response['evaluation_metrics']['flesch_reading_ease']:.2f}",
-                            help="Higher scores indicate easier readability"
-                        )
-                    with col_read2:
-                        st.metric(
-                            "Flesch-Kincaid Grade",
-                            f"{response['evaluation_metrics']['flesch_kincaid_grade']:.2f}",
-                            help="US grade level required to understand the text"
-                        )
-                    # Length and BLEU Metrics
-                    col_len1, col_len2, col_len3 = st.columns(3)
-                    with col_len1:
-                        st.metric("Query Length", response['evaluation_metrics']['query_length'])
-                    with col_len2:
-                        st.metric("Response Length", response['evaluation_metrics']['response_length'])
-                    with col_len3:
-                        st.metric(
-                            "BLEU Score",
-                            f"{response['evaluation_metrics']['average_bleu_score']:.4f}",
-                            help="Measures similarity to reference texts"
-                        )
-                    # ROUGE Scores
-                    st.markdown("#### 📈 ROUGE Scores")
-                    rouge_metrics = response['evaluation_metrics']['rouge_scores']
-                    col_rouge1, col_rouge2, col_rouge3 = st.columns(3)
-                    with col_rouge1:
-                        st.metric("ROUGE-1", f"{rouge_metrics['rouge1']:.4f}")
-                    with col_rouge2:
-                        st.metric("ROUGE-2", f"{rouge_metrics['rouge2']:.4f}")
-                    with col_rouge3:
-                        st.metric("ROUGE-L", f"{rouge_metrics['rougeL']:.4f}")
-                    # Semantic Similarity
-                    st.markdown("#### 🔍 Semantic Similarity")
-                    sem_sim = response['evaluation_metrics']['semantic_similarity']
-                    col_sem1, col_sem2, col_sem3 = st.columns(3)
-                    with col_sem1:
-                        st.metric("Mean Similarity", f"{sem_sim['mean']:.4f}")
-                    with col_sem2:
-                        st.metric("Max Similarity", f"{sem_sim['max']:.4f}")
-                    with col_sem3:
-                        st.metric("Min Similarity", f"{sem_sim['min']:.4f}")
-                except Exception as e:
-                    st.error(f"An error occurred: {e}")
 if __name__ == "__main__":
     main()

 from langchain_groq import ChatGroq
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain.memory import ConversationBufferMemory
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer
 import tavily
 class AdvancedRAGChatbot:
     def __init__(self,
                  tavily_api_key: str,
                  embedding_model: str = "BAAI/bge-large-en-v1.5",
                  llm_model: str = "llama-3.3-70b-versatile",
                  temperature: float = 0.7):
+        """Initialize the Advanced RAG Chatbot with Tavily web search integration"""
+        # Set the Tavily API key as an environment variable
         os.environ["TAVILY_API_KEY"] = tavily_api_key
+        # Correct Tavily Client initialization
         self.tavily_client = tavily.TavilyClient(tavily_api_key)
         # NLP Components
         self.sentiment_analyzer = pipeline("sentiment-analysis")
         self.ner_pipeline = pipeline("ner", aggregation_strategy="simple")
         # Language Model Configuration
         self.llm = self._configure_llm(llm_model, temperature)
         # Conversation Memory
         self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    def _configure_embeddings(self, model_name: str):
+        """Configure embeddings with normalization"""
+        encode_kwargs = {'normalize_embeddings': True, 'show_progress_bar': True}
+        return HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
+    def _configure_llm(self, model_name: str, temperature: float):
+        """Configure the Language Model with Groq"""
+        return ChatGroq(
+            model_name=model_name,
+            temperature=temperature,
+            max_tokens=4096,
+            streaming=True
+        )
+    def _tavily_web_search(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
+        """Perform web search using Tavily API"""
         try:
+            search_result = self.tavily_client.search(
+                query=query,
+                max_results=max_results,
+                search_depth="advanced",
+                include_domains=[],
+                exclude_domains=[],
+                include_answer=True
+            )
+            return search_result.get('results', [])
         except Exception as e:
+            st.error(f"Tavily Search Error: {e}")
+            return []
     def process_query(self, query: str) -> Dict[str, Any]:
+        """Process the user query with web search and NLP techniques"""
         # Web Search
         web_results = self._tavily_web_search(query)
         # Generate Response
         response = self.llm.invoke(full_prompt)
         return {
+            "response": response.content,
             "web_sources": web_results,
             "semantic_similarity": semantic_score.tolist(),
             "sentiment": sentiment_result,
+            "named_entities": entities
         }
 def main():
+    # Page Configuration
+    st.set_page_config(
+        page_title="Web-Powered RAG Chatbot",
+        page_icon="🌐",
+        layout="wide",
+        initial_sidebar_state="expanded"
+    )
+    # Retrieve Tavily API Key from Environment Variable
+    tavily_api_key = os.getenv("TAVILY_API_KEY")
+    if not tavily_api_key:
+        st.warning("Tavily API Key is missing. Please set the 'TAVILY_API_KEY' environment variable.")
+        st.stop()
+    # Sidebar Configuration
+    with st.sidebar:
+        st.header("🔧 Chatbot Settings")
+        st.markdown("Customize your AI assistant's behavior")
+        # Model Configuration
+        embedding_model = st.selectbox(
+            "Embedding Model",
+            ["BAAI/bge-large-en-v1.5", "sentence-transformers/all-MiniLM-L6-v2"]
+        )
+        temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
+        # Additional Controls
+        st.divider()
+        st.info("Powered by Tavily Web Search")
+    # Initialize Chatbot
+    chatbot = AdvancedRAGChatbot(
+        tavily_api_key=tavily_api_key,
+        embedding_model=embedding_model,
+        temperature=temperature
+    )
+    # Main Chat Interface
+    st.title("🌐 Web-Powered RAG Chatbot")
+    # Chat input with placeholder
+    user_input = st.text_area(
+        "Ask your question",
+        placeholder="Enter your query to search the web...",
+        height=250
+    )
+    # Submit button
+    submit_button = st.button("Search & Analyze", type="primary")
+    # Response container
+    if submit_button and user_input:
+        with st.spinner("Searching web and processing query..."):
+            try:
+                response = chatbot.process_query(user_input)
+                # Bot Response
+                st.markdown("#### AI's Answer")
+                st.write(response['response'])
+                # Sentiment Analysis
+                st.markdown("#### Sentiment Analysis")
+                sentiment = response['sentiment']
+                st.metric(
+                    label="Sentiment",
+                    value=sentiment['label'],
+                    delta=f"{sentiment['score']:.2%}"
+                )
+                # Named Entities
+                st.markdown("#### Detected Entities")
+                if response['named_entities']:
+                    for entity in response['named_entities']:
+                        word = entity.get('word', 'Unknown')
+                        entity_type = entity.get('entity_type', entity.get('entity', 'Unknown Type'))
+                        st.text(f"{word} ({entity_type})")
+                else:
+                    st.info("No entities detected")
+                # Web Sources
+                if response['web_sources']:
+                    st.markdown("#### Web Sources")
+                    for i, source in enumerate(response['web_sources'], 1):
+                        with st.expander(f"Source {i}: {source.get('title', 'Untitled')}"):
+                            st.write(source.get('content', 'No content available'))
+                            if source.get('url'):
+                                st.markdown(f"[Original Source]({source['url']})")
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
+    else:
+        st.info("Enter a query to search the web and get an AI-powered response")
 if __name__ == "__main__":
     main()