sango07 commited on
Commit
afb3c1e
Β·
verified Β·
1 Parent(s): a3e3740

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -35
app.py CHANGED
@@ -1,49 +1,134 @@
1
  import streamlit as st
 
 
2
  from evaluation_module import RAGEvaluator
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # Initialize evaluator
5
  evaluator = RAGEvaluator()
6
 
7
- st.title("RAG System Evaluation Dashboard")
8
-
9
- st.write("## Input Data")
10
 
11
- # Pre-filled input fields for testing
12
- question = st.text_input("Question", "What are the causes of climate change?")
13
- context = st.text_area("Reference Context (top 'k' documents)", """
 
14
  Climate change is caused by a variety of factors, including natural processes and human activities. Human activities, such as burning fossil fuels, deforestation, and industrial processes, release greenhouse gases into the atmosphere. These gases trap heat from the sun, causing the Earth's temperature to rise. Natural processes, such as volcanic eruptions and variations in solar radiation, also play a role in climate change.
15
- """)
16
- generated_output = st.text_area("LLM Generated Output", """
 
 
 
17
  Climate change is primarily caused by human activities that release greenhouse gases into the atmosphere. These activities include burning fossil fuels for energy, deforestation, and various industrial processes. The increase in greenhouse gases, such as carbon dioxide and methane, traps more heat in the Earth's atmosphere, leading to a rise in global temperatures. Natural factors, like volcanic activity and changes in solar radiation, can also contribute to climate change, but their impact is relatively minor compared to human activities.
18
- """)
19
 
20
- if st.button("Evaluate"):
 
21
  if question and context and generated_output:
22
- st.write("### Evaluation Results")
23
-
24
  # Perform evaluations
25
  metrics = evaluator.evaluate_all(generated_output, context)
26
-
27
- # Display metrics with explanations
28
- st.write(f"**BLEU Score**: {metrics['BLEU']}")
29
- st.write("BLEU measures the overlap between the generated output and reference text based on n-grams. Higher scores indicate better match.")
30
-
31
- st.write(f"**ROUGE-1 Score**: {metrics['ROUGE-1']}")
32
- st.write("ROUGE-1 measures the overlap of unigrams between the generated output and reference text. Higher scores indicate better match.")
33
-
34
- st.write(f"**BERT Precision**: {metrics['BERT P']}")
35
- st.write(f"**BERT Recall**: {metrics['BERT R']}")
36
- st.write(f"**BERT F1 Score**: {metrics['BERT F1']}")
37
- st.write("BERTScore evaluates the semantic similarity between the generated output and reference text using BERT embeddings.")
38
-
39
- st.write(f"**Perplexity**: {metrics['Perplexity']}")
40
- st.write("Perplexity measures how well a language model predicts the text. Lower values indicate better fluency and coherence.")
41
-
42
- st.write(f"**Diversity**: {metrics['Diversity']}")
43
- st.write("Diversity measures the uniqueness of bigrams in the generated output. Higher values indicate more diverse and varied output.")
44
-
45
- st.write(f"**Racial Bias**: {metrics['Racial Bias']}")
46
- st.write("Racial Bias score indicates the presence of biased language in the generated output. Higher scores indicate more bias.")
47
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  else:
49
- st.write("Please provide all inputs to evaluate.")
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import plotly.graph_objs as go
4
  from evaluation_module import RAGEvaluator
5
 
6
+ # Page configuration
7
+ st.set_page_config(
8
+ page_title="RAG Evaluation Dashboard",
9
+ page_icon="πŸ”",
10
+ layout="wide"
11
+ )
12
+
13
+ # Custom CSS for improved styling
14
+ st.markdown("""
15
+ <style>
16
+ .main-header {
17
+ color: #2C3E50;
18
+ font-weight: bold;
19
+ text-align: center;
20
+ padding-bottom: 20px;
21
+ }
22
+ .metric-card {
23
+ background-color: #F0F4F8;
24
+ border-radius: 10px;
25
+ padding: 15px;
26
+ margin-bottom: 15px;
27
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
28
+ }
29
+ .stTextArea, .stTextInput {
30
+ background-color: #FFFFFF;
31
+ border-radius: 8px;
32
+ }
33
+ .stButton>button {
34
+ background-color: #3498DB;
35
+ color: white;
36
+ font-weight: bold;
37
+ border: none;
38
+ border-radius: 8px;
39
+ padding: 10px 20px;
40
+ }
41
+ .stButton>button:hover {
42
+ background-color: #2980B9;
43
+ }
44
+ </style>
45
+ """, unsafe_allow_html=True)
46
+
47
+ # Title
48
+ st.markdown("<h1 class='main-header'>πŸ” RAG System Evaluation Dashboard</h1>", unsafe_allow_html=True)
49
+
50
  # Initialize evaluator
51
  evaluator = RAGEvaluator()
52
 
53
+ # Input Section
54
+ col1, col2 = st.columns(2)
 
55
 
56
+ with col1:
57
+ st.markdown("### πŸ“ Input Details")
58
+ question = st.text_input("Question", "What are the causes of climate change?", key="question_input")
59
+ context = st.text_area("Reference Context", """
60
  Climate change is caused by a variety of factors, including natural processes and human activities. Human activities, such as burning fossil fuels, deforestation, and industrial processes, release greenhouse gases into the atmosphere. These gases trap heat from the sun, causing the Earth's temperature to rise. Natural processes, such as volcanic eruptions and variations in solar radiation, also play a role in climate change.
61
+ """, height=200, key="context_input")
62
+
63
+ with col2:
64
+ st.markdown("### πŸ’¬ Generated Output")
65
+ generated_output = st.text_area("LLM Response", """
66
  Climate change is primarily caused by human activities that release greenhouse gases into the atmosphere. These activities include burning fossil fuels for energy, deforestation, and various industrial processes. The increase in greenhouse gases, such as carbon dioxide and methane, traps more heat in the Earth's atmosphere, leading to a rise in global temperatures. Natural factors, like volcanic activity and changes in solar radiation, can also contribute to climate change, but their impact is relatively minor compared to human activities.
67
+ """, height=200, key="output_input")
68
 
69
+ # Evaluation Button
70
+ if st.button("Run Evaluation", key="eval_button"):
71
  if question and context and generated_output:
 
 
72
  # Perform evaluations
73
  metrics = evaluator.evaluate_all(generated_output, context)
74
+
75
+ # Metrics Display
76
+ st.markdown("### πŸ“Š Evaluation Metrics")
77
+
78
+ # Create columns for metrics
79
+ metric_cols = st.columns(4)
80
+
81
+ # Metrics with explanatory tooltips
82
+ metrics_info = {
83
+ "BLEU": "Measures n-gram overlap between generated and reference text",
84
+ "ROUGE-1": "Unigram overlap between generated and reference text",
85
+ "BERT F1": "Semantic similarity using BERT embeddings",
86
+ "Perplexity": "Lower values indicate better language model prediction",
87
+ "Diversity": "Higher values suggest more unique output",
88
+ "Racial Bias": "Indicates potential biased language presence"
89
+ }
90
+
91
+ # Display metrics in a grid
92
+ for i, (metric_name, metric_value) in enumerate(metrics.items()):
93
+ with metric_cols[i % 4]:
94
+ st.markdown(f"""
95
+ <div class='metric-card'>
96
+ <h4>{metric_name}</h4>
97
+ <p style='font-size:24px; color:#2980B9; font-weight:bold;'>{metric_value:.4f}</p>
98
+ <small>{metrics_info.get(metric_name, "")}</small>
99
+ </div>
100
+ """, unsafe_allow_html=True)
101
+
102
+ # Visualization of Metrics
103
+ st.markdown("### πŸ“ˆ Metrics Visualization")
104
+
105
+ # Create a bar chart of metrics
106
+ metric_df = pd.DataFrame.from_dict(metrics, orient='index', columns=['Value'])
107
+ metric_df = metric_df.reset_index().rename(columns={'index':'Metric'})
108
+
109
+ fig = go.Figure(data=[
110
+ go.Bar(
111
+ x=metric_df['Metric'],
112
+ y=metric_df['Value'],
113
+ marker_color='#3498DB',
114
+ text=[f'{val:.4f}' for val in metric_df['Value']],
115
+ textposition='auto'
116
+ )
117
+ ])
118
+ fig.update_layout(
119
+ title='RAG Evaluation Metrics',
120
+ xaxis_title='Metrics',
121
+ yaxis_title='Score',
122
+ template='plotly_white'
123
+ )
124
+
125
+ st.plotly_chart(fig, use_container_width=True)
126
+
127
  else:
128
+ st.error("Please provide all inputs to evaluate.")
129
+
130
+ # Footer
131
+ st.markdown("""
132
+ ---
133
+ *RAG Evaluation Dashboard - Powered by Advanced NLP Metrics*
134
+ """)