Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add feedback form link
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from datasets import load_dataset, load_from_disk
|
|
15 |
from collections import Counter
|
16 |
|
17 |
import yaml, json, requests, sys, os, time
|
|
|
18 |
import concurrent.futures
|
19 |
|
20 |
from langchain import hub
|
@@ -498,13 +499,25 @@ def guess_question_type(query: str):
|
|
498 |
return gen_client.invoke(messages).content
|
499 |
|
500 |
class OverallConsensusEvaluation(BaseModel):
|
501 |
-
|
502 |
...,
|
503 |
-
description="The
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
)
|
505 |
explanation: str = Field(
|
506 |
...,
|
507 |
-
description="A detailed explanation of the consensus evaluation"
|
508 |
)
|
509 |
relevance_score: float = Field(
|
510 |
...,
|
@@ -514,25 +527,31 @@ class OverallConsensusEvaluation(BaseModel):
|
|
514 |
)
|
515 |
|
516 |
def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
|
517 |
-
"""
|
518 |
-
Evaluates the overall consensus of the abstracts in relation to the query in a single LLM call.
|
519 |
-
"""
|
520 |
prompt = f"""
|
521 |
Query: {query}
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
Here are the abstracts:
|
532 |
-
|
533 |
{' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
|
534 |
-
|
535 |
-
Provide your evaluation in a structured format.
|
536 |
"""
|
537 |
|
538 |
response = st.session_state.consensus_client.chat.completions.create(
|
@@ -574,7 +593,7 @@ def make_embedding_plot(papers_df, consensus_answer):
|
|
574 |
alphas = np.ones((len(plt_indices),)) * 0.9
|
575 |
alphas[outlier_flag] = 0.5
|
576 |
|
577 |
-
fig = plt.figure(figsize=(9*
|
578 |
plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
|
579 |
|
580 |
clkws = np.load('kw_tags.npz')
|
@@ -669,16 +688,29 @@ if st.session_state.get('runpfdr'):
|
|
669 |
}
|
670 |
|
671 |
@st.fragment()
|
672 |
-
def download_op(data):
|
673 |
json_string = json.dumps(data)
|
674 |
st.download_button(
|
675 |
label='Download output',
|
676 |
file_name="pathfinder_data.json",
|
677 |
mime="application/json",
|
678 |
-
data=json_string,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
679 |
|
680 |
-
|
681 |
-
download_op(session_vars)
|
682 |
|
683 |
else:
|
684 |
st.info("Use the sidebar to tweak the search parameters to get better results.")
|
|
|
15 |
from collections import Counter
|
16 |
|
17 |
import yaml, json, requests, sys, os, time
|
18 |
+
import urllib.parse
|
19 |
import concurrent.futures
|
20 |
|
21 |
from langchain import hub
|
|
|
499 |
return gen_client.invoke(messages).content
|
500 |
|
501 |
class OverallConsensusEvaluation(BaseModel):
|
502 |
+
rewritten_statement: str = Field(
|
503 |
...,
|
504 |
+
description="The query rewritten as a statement if it was initially a question"
|
505 |
+
)
|
506 |
+
consensus: Literal[
|
507 |
+
"Strong Agreement Between Abstracts and Query",
|
508 |
+
"Moderate Agreement Between Abstracts and Query",
|
509 |
+
"Weak Agreement Between Abstracts and Query",
|
510 |
+
"No Clear Agreement/Disagreement Between Abstracts and Query",
|
511 |
+
"Weak Disagreement Between Abstracts and Query",
|
512 |
+
"Moderate Disagreement Between Abstracts and Query",
|
513 |
+
"Strong Disagreement Between Abstracts and Query"
|
514 |
+
] = Field(
|
515 |
+
...,
|
516 |
+
description="The overall level of consensus between the rewritten statement and the abstracts"
|
517 |
)
|
518 |
explanation: str = Field(
|
519 |
...,
|
520 |
+
description="A detailed explanation of the consensus evaluation (maximum six sentences)"
|
521 |
)
|
522 |
relevance_score: float = Field(
|
523 |
...,
|
|
|
527 |
)
|
528 |
|
529 |
def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
|
|
|
|
|
|
|
530 |
prompt = f"""
|
531 |
Query: {query}
|
532 |
+
You will be provided with {len(abstracts)} scientific abstracts. Your task is to do the following:
|
533 |
+
1. If the provided query is a question, rewrite it as a statement. This statement does not have to be true. Output this as 'Rewritten Statement:'.
|
534 |
+
2. Evaluate the overall consensus between the rewritten statement and the abstracts using one of the following levels:
|
535 |
+
- Strong Agreement Between Abstracts and Query
|
536 |
+
- Moderate Agreement Between Abstracts and Query
|
537 |
+
- Weak Agreement Between Abstracts and Query
|
538 |
+
- No Clear Agreement/Disagreement Between Abstracts and Query
|
539 |
+
- Weak Disagreement Between Abstracts and Query
|
540 |
+
- Moderate Disagreement Between Abstracts and Query
|
541 |
+
- Strong Disagreement Between Abstracts and Query
|
542 |
+
Output this as 'Consensus:'
|
543 |
+
3. Provide a detailed explanation of your consensus evaluation in maximum six sentences. Output this as 'Explanation:'
|
544 |
+
4. Assign a relevance score as a float between 0 to 1, where:
|
545 |
+
- 1.0: Perfect match in content and quality
|
546 |
+
- 0.8-0.9: Excellent, with minor differences
|
547 |
+
- 0.6-0.7: Good, captures main points but misses some details
|
548 |
+
- 0.4-0.5: Fair, partially relevant but significant gaps
|
549 |
+
- 0.2-0.3: Poor, major inaccuracies or omissions
|
550 |
+
- 0.0-0.1: Completely irrelevant or incorrect
|
551 |
+
Output this as 'Relevance Score:'
|
552 |
Here are the abstracts:
|
|
|
553 |
{' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
|
554 |
+
Provide your evaluation in the structured format described above.
|
|
|
555 |
"""
|
556 |
|
557 |
response = st.session_state.consensus_client.chat.completions.create(
|
|
|
593 |
alphas = np.ones((len(plt_indices),)) * 0.9
|
594 |
alphas[outlier_flag] = 0.5
|
595 |
|
596 |
+
fig = plt.figure(figsize=(9*1.8,12*1.8))
|
597 |
plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
|
598 |
|
599 |
clkws = np.load('kw_tags.npz')
|
|
|
688 |
}
|
689 |
|
690 |
@st.fragment()
|
691 |
+
def download_op(data, prefill_data):
|
692 |
json_string = json.dumps(data)
|
693 |
st.download_button(
|
694 |
label='Download output',
|
695 |
file_name="pathfinder_data.json",
|
696 |
mime="application/json",
|
697 |
+
data=json_string,
|
698 |
+
use_container_width=True)
|
699 |
+
|
700 |
+
encoded_data = urllib.parse.urlencode(prefill_data)
|
701 |
+
prefilled_url = f"{form_url}?{encoded_data}"
|
702 |
+
st.link_button('Feedback: Help make pathfinder better!',
|
703 |
+
prefilled_url,
|
704 |
+
use_container_width=True)
|
705 |
+
|
706 |
+
|
707 |
+
form_url = "https://docs.google.com/forms/d/e/1FAIpQLScaPKbW1fiwksX-UewovCLwx6EArl7bxbVmdWMDBs_0Ct3i6g/viewform"
|
708 |
+
prefill_data = {
|
709 |
+
"entry.1224637570": query, # Replace with your actual field ID
|
710 |
+
"entry.872565685": answer_text, # Replace with your actual field ID
|
711 |
+
}
|
712 |
|
713 |
+
download_op(session_vars, prefill_data)
|
|
|
714 |
|
715 |
else:
|
716 |
st.info("Use the sidebar to tweak the search parameters to get better results.")
|