Spaces:
Running
Running
Update model.py
Browse files
model.py
CHANGED
@@ -865,10 +865,13 @@ def parse_multi_sample_llm_output(raw_response: str, output_format_str):
|
|
865 |
metadata_list = {}
|
866 |
explanation_lines = []
|
867 |
output_answers = raw_response.split("\n")[0].split(", ")
|
|
|
868 |
if ". " in raw_response.split("\n")[1] and len(raw_response.split("\n")[1].split(". ")) > 1:
|
869 |
explanation_lines = [x for x in raw_response.split(". ")[1] if x.strip()]
|
|
|
870 |
else:
|
871 |
explanation_lines = [x for x in raw_response.split("\n")[1:] if x.strip()]
|
|
|
872 |
output_formats = output_format_str.split(", ")
|
873 |
explain = ""
|
874 |
# assign output format to its output answer and explanation
|
@@ -1209,16 +1212,33 @@ def query_document_info(query_word, alternative_query_word, metadata, master_str
|
|
1209 |
# f"Output Format: {output_format_str}"
|
1210 |
# )
|
1211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1212 |
prompt_for_llm = (
|
1213 |
f"{prompt_instruction_prefix}"
|
1214 |
-
f"Given the following text snippets, analyze the entity/concept {rag_query_phrase}
|
1215 |
-
f"
|
1216 |
-
f"
|
1217 |
-
f"
|
1218 |
-
f"
|
|
|
|
|
|
|
|
|
1219 |
f"Provide only {output_format_str}. "
|
1220 |
-
f"If any information is not explicitly present
|
1221 |
-
f"
|
1222 |
f"\n\nText Snippets:\n{context_for_llm}\n\n"
|
1223 |
f"Output Format: {output_format_str}"
|
1224 |
)
|
|
|
865 |
metadata_list = {}
|
866 |
explanation_lines = []
|
867 |
output_answers = raw_response.split("\n")[0].split(", ")
|
868 |
+
print("raw explanation line: raw_response.split("\n")[1]")
|
869 |
if ". " in raw_response.split("\n")[1] and len(raw_response.split("\n")[1].split(". ")) > 1:
|
870 |
explanation_lines = [x for x in raw_response.split(". ")[1] if x.strip()]
|
871 |
+
print("explain line split by dot: ", explanation_lines)
|
872 |
else:
|
873 |
explanation_lines = [x for x in raw_response.split("\n")[1:] if x.strip()]
|
874 |
+
print("explain line split by new line: ", explanation_lines)
|
875 |
output_formats = output_format_str.split(", ")
|
876 |
explain = ""
|
877 |
# assign output format to its output answer and explanation
|
|
|
1212 |
# f"Output Format: {output_format_str}"
|
1213 |
# )
|
1214 |
|
1215 |
+
# prompt_for_llm = (
|
1216 |
+
# f"{prompt_instruction_prefix}"
|
1217 |
+
# f"Given the following text snippets, analyze the entity/concept {rag_query_phrase} or the mitochondrial DNA sample in {organism} if these specific identifiers are not explicitly found. "
|
1218 |
+
# f"Identify its primary associated country/geographic location. "
|
1219 |
+
# f"Also, determine if the genetic sample or individual mentioned is from a 'modern' (present-day living individual) "
|
1220 |
+
# f"or 'ancient' (e.g., prehistoric remains, archaeological sample) source. "
|
1221 |
+
# f"If the text does not mention whether the sample is ancient or modern, assume the sample is modern unless otherwise explicitly described as ancient or archaeological. "
|
1222 |
+
# f"Provide only {output_format_str}. "
|
1223 |
+
# f"If any information is not explicitly present in the provided text snippets, state 'unknown' for that specific piece of information. "
|
1224 |
+
# f"If the {explain_list} is not 'unknown', write 1 sentence after the output explaining how you inferred it from the text (one sentence for each)."
|
1225 |
+
# f"\n\nText Snippets:\n{context_for_llm}\n\n"
|
1226 |
+
# f"Output Format: {output_format_str}"
|
1227 |
+
# )
|
1228 |
prompt_for_llm = (
|
1229 |
f"{prompt_instruction_prefix}"
|
1230 |
+
f"Given the following text snippets, analyze the entity/concept {rag_query_phrase} "
|
1231 |
+
f"or the mitochondrial DNA sample in {organism} if these identifiers are not explicitly found. "
|
1232 |
+
f"Identify its **primary associated geographic location**, preferring the most specific available: "
|
1233 |
+
f"first try to determine the exact country; if no country is explicitly mentioned, then provide "
|
1234 |
+
f"the next most specific region, continent, island, or other clear geographic area mentioned. "
|
1235 |
+
f"If no geographic clues at all are present, state 'unknown' for location. "
|
1236 |
+
f"Also, determine if the genetic sample is from a 'modern' (present-day living individual) "
|
1237 |
+
f"or 'ancient' (prehistoric/archaeological) source. "
|
1238 |
+
f"If the text does not specify ancient or archaeological context, assume 'modern'. "
|
1239 |
f"Provide only {output_format_str}. "
|
1240 |
+
f"If any information is not explicitly present, use the fallback rules above before defaulting to 'unknown'. "
|
1241 |
+
f"For each non-'unknown' field in {explain_list}, write one sentence explaining how it was inferred from the text (one sentence for each)."
|
1242 |
f"\n\nText Snippets:\n{context_for_llm}\n\n"
|
1243 |
f"Output Format: {output_format_str}"
|
1244 |
)
|