kiyer commited on
Commit
01799cd
·
verified ·
1 Parent(s): f08a02e

add feedback form link

Browse files
Files changed (1) hide show
  1. app.py +55 -23
app.py CHANGED
@@ -15,6 +15,7 @@ from datasets import load_dataset, load_from_disk
15
  from collections import Counter
16
 
17
  import yaml, json, requests, sys, os, time
 
18
  import concurrent.futures
19
 
20
  from langchain import hub
@@ -498,13 +499,25 @@ def guess_question_type(query: str):
498
  return gen_client.invoke(messages).content
499
 
500
  class OverallConsensusEvaluation(BaseModel):
501
- consensus: Literal["Strong Agreement", "Moderate Agreement", "Weak Agreement", "No Clear Consensus", "Weak Disagreement", "Moderate Disagreement", "Strong Disagreement"] = Field(
502
  ...,
503
- description="The overall level of consensus between the query and the abstracts"
 
 
 
 
 
 
 
 
 
 
 
 
504
  )
505
  explanation: str = Field(
506
  ...,
507
- description="A detailed explanation of the consensus evaluation"
508
  )
509
  relevance_score: float = Field(
510
  ...,
@@ -514,25 +527,31 @@ class OverallConsensusEvaluation(BaseModel):
514
  )
515
 
516
  def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
517
- """
518
- Evaluates the overall consensus of the abstracts in relation to the query in a single LLM call.
519
- """
520
  prompt = f"""
521
  Query: {query}
522
-
523
- You will be provided with {len(abstracts)} scientific abstracts. Your task is to:
524
- 1. Evaluate the overall consensus between the query and the abstracts.
525
- 2. Provide a detailed explanation of your consensus evaluation.
526
- 3. Assign an overall relevance score from 0 to 1, where 0 means completely irrelevant and 1 means highly relevant.
527
-
528
- For the consensus evaluation, use one of the following levels:
529
- Strong Agreement, Moderate Agreement, Weak Agreement, No Clear Consensus, Weak Disagreement, Moderate Disagreement, Strong Disagreement
530
-
 
 
 
 
 
 
 
 
 
 
 
531
  Here are the abstracts:
532
-
533
  {' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
534
-
535
- Provide your evaluation in a structured format.
536
  """
537
 
538
  response = st.session_state.consensus_client.chat.completions.create(
@@ -574,7 +593,7 @@ def make_embedding_plot(papers_df, consensus_answer):
574
  alphas = np.ones((len(plt_indices),)) * 0.9
575
  alphas[outlier_flag] = 0.5
576
 
577
- fig = plt.figure(figsize=(9*2.,12*2.))
578
  plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
579
 
580
  clkws = np.load('kw_tags.npz')
@@ -669,16 +688,29 @@ if st.session_state.get('runpfdr'):
669
  }
670
 
671
  @st.fragment()
672
- def download_op(data):
673
  json_string = json.dumps(data)
674
  st.download_button(
675
  label='Download output',
676
  file_name="pathfinder_data.json",
677
  mime="application/json",
678
- data=json_string,)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
 
680
- # with st.sidebar:
681
- download_op(session_vars)
682
 
683
  else:
684
  st.info("Use the sidebar to tweak the search parameters to get better results.")
 
15
  from collections import Counter
16
 
17
  import yaml, json, requests, sys, os, time
18
+ import urllib.parse
19
  import concurrent.futures
20
 
21
  from langchain import hub
 
499
  return gen_client.invoke(messages).content
500
 
501
  class OverallConsensusEvaluation(BaseModel):
502
+ rewritten_statement: str = Field(
503
  ...,
504
+ description="The query rewritten as a statement if it was initially a question"
505
+ )
506
+ consensus: Literal[
507
+ "Strong Agreement Between Abstracts and Query",
508
+ "Moderate Agreement Between Abstracts and Query",
509
+ "Weak Agreement Between Abstracts and Query",
510
+ "No Clear Agreement/Disagreement Between Abstracts and Query",
511
+ "Weak Disagreement Between Abstracts and Query",
512
+ "Moderate Disagreement Between Abstracts and Query",
513
+ "Strong Disagreement Between Abstracts and Query"
514
+ ] = Field(
515
+ ...,
516
+ description="The overall level of consensus between the rewritten statement and the abstracts"
517
  )
518
  explanation: str = Field(
519
  ...,
520
+ description="A detailed explanation of the consensus evaluation (maximum six sentences)"
521
  )
522
  relevance_score: float = Field(
523
  ...,
 
527
  )
528
 
529
  def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
 
 
 
530
  prompt = f"""
531
  Query: {query}
532
+ You will be provided with {len(abstracts)} scientific abstracts. Your task is to do the following:
533
+ 1. If the provided query is a question, rewrite it as a statement. This statement does not have to be true. Output this as 'Rewritten Statement:'.
534
+ 2. Evaluate the overall consensus between the rewritten statement and the abstracts using one of the following levels:
535
+ - Strong Agreement Between Abstracts and Query
536
+ - Moderate Agreement Between Abstracts and Query
537
+ - Weak Agreement Between Abstracts and Query
538
+ - No Clear Agreement/Disagreement Between Abstracts and Query
539
+ - Weak Disagreement Between Abstracts and Query
540
+ - Moderate Disagreement Between Abstracts and Query
541
+ - Strong Disagreement Between Abstracts and Query
542
+ Output this as 'Consensus:'
543
+ 3. Provide a detailed explanation of your consensus evaluation in maximum six sentences. Output this as 'Explanation:'
544
+ 4. Assign a relevance score as a float between 0 to 1, where:
545
+ - 1.0: Perfect match in content and quality
546
+ - 0.8-0.9: Excellent, with minor differences
547
+ - 0.6-0.7: Good, captures main points but misses some details
548
+ - 0.4-0.5: Fair, partially relevant but significant gaps
549
+ - 0.2-0.3: Poor, major inaccuracies or omissions
550
+ - 0.0-0.1: Completely irrelevant or incorrect
551
+ Output this as 'Relevance Score:'
552
  Here are the abstracts:
 
553
  {' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
554
+ Provide your evaluation in the structured format described above.
 
555
  """
556
 
557
  response = st.session_state.consensus_client.chat.completions.create(
 
593
  alphas = np.ones((len(plt_indices),)) * 0.9
594
  alphas[outlier_flag] = 0.5
595
 
596
+ fig = plt.figure(figsize=(9*1.8,12*1.8))
597
  plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
598
 
599
  clkws = np.load('kw_tags.npz')
 
688
  }
689
 
690
  @st.fragment()
691
+ def download_op(data, prefill_data):
692
  json_string = json.dumps(data)
693
  st.download_button(
694
  label='Download output',
695
  file_name="pathfinder_data.json",
696
  mime="application/json",
697
+ data=json_string,
698
+ use_container_width=True)
699
+
700
+ encoded_data = urllib.parse.urlencode(prefill_data)
701
+ prefilled_url = f"{form_url}?{encoded_data}"
702
+ st.link_button('Feedback: Help make pathfinder better!',
703
+ prefilled_url,
704
+ use_container_width=True)
705
+
706
+
707
+ form_url = "https://docs.google.com/forms/d/e/1FAIpQLScaPKbW1fiwksX-UewovCLwx6EArl7bxbVmdWMDBs_0Ct3i6g/viewform"
708
+ prefill_data = {
709
+ "entry.1224637570": query, # Replace with your actual field ID
710
+ "entry.872565685": answer_text, # Replace with your actual field ID
711
+ }
712
 
713
+ download_op(session_vars, prefill_data)
 
714
 
715
  else:
716
  st.info("Use the sidebar to tweak the search parameters to get better results.")