Spaces:

holistic-ai
/

explainbility_benchmark

Sleeping

Zekun Wu commited on May 17, 2024

Commit

be02b8f

1 Parent(s): 4a24dd6

update

Files changed (1) hide show

pages/5_Conversation_Evaluation.py CHANGED Viewed

@@ -90,7 +90,10 @@ else:
     st.write('### Conversation')
     if conversation:
-        st.write(conversation)
     else:
         st.write('No conversation entered yet.')
@@ -105,7 +108,7 @@ else:
             eval = evaluator(model_name)
             scores = eval.evaluate_conversation(conversation, context)
             st.write('### Scores')
-            details = write_evaluation_commentary(scores)
             df = pd.DataFrame(details)
             st.write(df)

     st.write('### Conversation')
     if conversation:
+        for exchange in conversation:
+            role = exchange['role'].capitalize()
+            content = exchange['content']
+            st.markdown(f"**{role}:** {content}")
     else:
         st.write('No conversation entered yet.')
             eval = evaluator(model_name)
             scores = eval.evaluate_conversation(conversation, context)
             st.write('### Scores')
+            details = write_evaluation_commentary(scores["aggregate_scores"])
             df = pd.DataFrame(details)
             st.write(df)