victormiller
commited on
Commit
•
f44dec9
1
Parent(s):
aa54686
Update curated.py
Browse files- curated.py +5 -5
curated.py
CHANGED
@@ -535,7 +535,7 @@ def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str
|
|
535 |
target=target,
|
536 |
)
|
537 |
|
538 |
-
se_examples = DV2("data/curated_samples/stackexchange_raw.json", "data/curated_samples/stackexchange_extract.json",
|
539 |
|
540 |
def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
|
541 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -558,7 +558,7 @@ def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str
|
|
558 |
target=target,
|
559 |
)
|
560 |
|
561 |
-
phil_examples = DV("data/curated_samples/philpapers_raw.json",
|
562 |
|
563 |
def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
|
564 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -582,7 +582,7 @@ def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo
|
|
582 |
target=target,
|
583 |
)
|
584 |
|
585 |
-
arx_examples =
|
586 |
|
587 |
def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
|
588 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -652,7 +652,7 @@ def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str =
|
|
652 |
target=target,
|
653 |
)
|
654 |
|
655 |
-
pubmed_examples =
|
656 |
|
657 |
def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
|
658 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -676,7 +676,7 @@ def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "f
|
|
676 |
target=target,
|
677 |
)
|
678 |
|
679 |
-
dmm_examples = DV("data/curated_samples/
|
680 |
|
681 |
def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
|
682 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
535 |
target=target,
|
536 |
)
|
537 |
|
538 |
+
se_examples = DV2("data/curated_samples/stackexchange_raw.json", "data/curated_samples/stackexchange_extract.json", 3)
|
539 |
|
540 |
def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
|
541 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
558 |
target=target,
|
559 |
)
|
560 |
|
561 |
+
phil_examples = DV("data/curated_samples/philpapers_raw.json", 2, "PhilPapers")
|
562 |
|
563 |
def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
|
564 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
582 |
target=target,
|
583 |
)
|
584 |
|
585 |
+
arx_examples = DV2("data/curated_samples/arxiv_raw.json", "data/curated_samples/arxiv_extract.json", 3)
|
586 |
|
587 |
def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
|
588 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
652 |
target=target,
|
653 |
)
|
654 |
|
655 |
+
pubmed_examples = DV2("data/curated_samples/pubmed_raw.json", "data/curated_samples/pubmed_extract.json", 3)
|
656 |
|
657 |
def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
|
658 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
676 |
target=target,
|
677 |
)
|
678 |
|
679 |
+
dmm_examples = DV("data/curated_samples/dm_maths_raw.json", "data/curated_samples/dm_maths_extract.json", 3)
|
680 |
|
681 |
def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
|
682 |
doc_id = max(0, min(int(doc_id), 9))
|