dolphinium commited on
Commit
4a63df8
·
1 Parent(s): bbcff60

format document

Browse files
Files changed (1) hide show
  1. solr_metadata.py +711 -706
solr_metadata.py CHANGED
@@ -161,7 +161,7 @@ field_metadata = [
161
  "example_values": [50, 120.5, 176.157, 1000],
162
  "definition": "The total value of a financial deal, in millions of USD. This is the primary numeric field for financial aggregations (sum, avg, etc.). To use this, you must also filter for news that has a deal value, e.g., 'total_deal_value_in_million:[0 TO *]'."
163
  },
164
-
165
  # NEW 'deal' CORE METADATA
166
  {
167
  "core_name": "deal",
@@ -370,7 +370,8 @@ field_metadata = [
370
  "core_name": "compound",
371
  "field_name": "molecule_id",
372
  "type": "string",
373
- "example_values": ["mol_12345", "mol_67890"], # Placeholder, not in provided doc examples directly.
 
374
  "definition": "Unique identifier for the molecule associated with the compound."
375
  },
376
  {
@@ -447,7 +448,8 @@ field_metadata = [
447
  "core_name": "compound",
448
  "field_name": "owner_company_name",
449
  "type": "string (multi-valued, exact match, for faceting)",
450
- "example_values": ["AstraZeneca Plc"], # Placeholder, based on common patterns
 
451
  "definition": "The canonical, standardized name(s) of the owner company of the compound. Use for faceting on owner companies."
452
  },
453
  {
@@ -496,14 +498,16 @@ field_metadata = [
496
  "core_name": "compound",
497
  "field_name": "mechanism",
498
  "type": "string (multi-valued, specific, for faceting)",
499
- "example_values": ["Beta-1 Adrenergic Receptor Antagonist"], # Placeholder
 
500
  "definition": "The specific mechanism of action of the compound. Use for very specific mechanism queries or faceting."
501
  },
502
  {
503
  "core_name": "compound",
504
  "field_name": "mechanism_s",
505
  "type": "string (multi-valued, for searching)",
506
- "example_values": ["beta blocker", "adrenergic receptor antagonist"], # Placeholder
 
507
  "definition": "Broader, multi-valued mechanisms of action and their synonyms. **Use this field for broad mechanism searches** in the `query` parameter."
508
  },
509
  {
@@ -607,7 +611,8 @@ field_metadata = [
607
  {
608
  "core_name": "compound",
609
  "field_name": "is_orphan",
610
- "type": "string (boolean)", # Solr typically stores booleans as strings or numbers [14, 15, 21, 23]
 
611
  "example_values": ["yes", "no"],
612
  "definition": "Indicates if the compound has received orphan drug designation. Orphan drugs are intended for rare diseases that affect a small population, making them less profitable to develop without incentives. [2, 4, 7, 9, 10]"
613
  },
@@ -640,706 +645,706 @@ field_metadata = [
640
  "definition": "The logarithm of the octanol-water partition coefficient, a measure of a compound's lipophilicity (fat-liking) or hydrophilicity (water-liking). Higher values indicate more lipophilicity. [1, 3, 5, 6, 8]"
641
  },
642
  {
643
- "core_name": "company",
644
- "field_name": "US_state",
645
- "type": "string",
646
- "example_values": ["California", "Massachusetts", "New York"],
647
- "definition": "The US state(s) where the company operates or has a significant presence."
648
- },
649
- {
650
- "core_name": "company",
651
- "field_name": "acquired_by",
652
- "type": "string",
653
- "example_values": ["Pfizer Inc.", "Johnson & Johnson"],
654
- "definition": "Names of companies that have acquired this company."
655
- },
656
- {
657
- "core_name": "company",
658
- "field_name": "address",
659
- "type": "string",
660
- "example_values": ["123 Biotech Ave, Boston, MA 02110", "456 Pharma Rd, San Francisco, CA 94107"],
661
- "definition": "Physical address(es) of the company."
662
- },
663
- {
664
- "core_name": "company",
665
- "field_name": "city",
666
- "type": "string",
667
- "example_values": ["Boston", "San Diego", "Basel"],
668
- "definition": "City/cities where the company is located or has a significant presence."
669
- },
670
- {
671
- "core_name": "company",
672
- "field_name": "company_IPO_date",
673
- "type": "string",
674
- "example_values": ["2005-10-15", "2018-03-22"],
675
- "definition": "Date(s) of the company's Initial Public Offering (IPO) in YYYY-MM-DD format."
676
- },
677
- {
678
- "core_name": "company",
679
- "field_name": "company_US_state",
680
- "type": "string",
681
- "example_values": ["California", "Massachusetts"],
682
- "definition": "The primary US state of the company."
683
- },
684
- {
685
- "core_name": "company",
686
- "field_name": "company_business_model",
687
- "type": "string",
688
- "example_values": ["Drug Discovery", "Drug Development", "Contract Manufacturing", "Diagnostics"],
689
- "definition": "Description(s) of the company's business model(s)."
690
- },
691
- {
692
- "core_name": "company",
693
- "field_name": "company_city",
694
- "type": "string",
695
- "example_values": ["Cambridge", "Princeton"],
696
- "definition": "The primary city of the company."
697
- },
698
- {
699
- "core_name": "company",
700
- "field_name": "company_development_capabilities",
701
- "type": "string",
702
- "example_values": ["Preclinical Research", "Clinical Trials Management", "CMC Development", "Regulatory Affairs"],
703
- "definition": "Capabilities of the company in drug/product development."
704
- },
705
- {
706
- "core_name": "company",
707
- "field_name": "company_gross_profit",
708
- "type": "float",
709
- "example_values": [1500.5, 750.25],
710
- "definition": "Gross profit(s) of the company, possibly over different periods (in millions USD)."
711
- },
712
- {
713
- "core_name": "company",
714
- "field_name": "company_highest_phase",
715
- "type": "string",
716
- "example_values": ["Marketed", "Phase 3", "Preclinical"],
717
- "definition": "The highest development phase achieved by any of the company's products/molecules."
718
- },
719
- {
720
- "core_name": "company",
721
- "field_name": "company_is_active",
722
- "type": "string",
723
- "example_values": ["Yes", "No"],
724
- "definition": "Indicates if the company is currently active ('Yes' or 'No')."
725
- },
726
- {
727
- "core_name": "company",
728
- "field_name": "company_liabilities",
729
- "type": "float",
730
- "example_values": [800.75, 400.0],
731
- "definition": "Total liabilities of the company, possibly over different periods (in millions USD)."
732
- },
733
- {
734
- "core_name": "company",
735
- "field_name": "company_location_facility_type",
736
- "type": "string",
737
- "example_values": ["R&D Center", "Manufacturing Plant", "Headquarters", "Clinical Site"],
738
- "definition": "Type of facilities associated with the company's locations (e.g., 'manufacturing', 'R&D')."
739
- },
740
- {
741
- "core_name": "company",
742
- "field_name": "company_main_territory",
743
- "type": "text_ci",
744
- "example_values": ["North America", "Europe", "Asia-Pacific"],
745
- "definition": "Main geographical territories where the company operates or focuses."
746
- },
747
- {
748
- "core_name": "company",
749
- "field_name": "company_market_cap",
750
- "type": "float",
751
- "example_values": [50000.0, 12500.75],
752
- "definition": "The market capitalization of the company (in millions USD)."
753
- },
754
- {
755
- "core_name": "company",
756
- "field_name": "company_name",
757
- "type": "string",
758
- "example_values": ["BioGen Innovations", "TheraWorks Pharmaceuticals", "MediTech Solutions"],
759
- "definition": "The official name of the company."
760
- },
761
- {
762
- "core_name": "company",
763
- "field_name": "company_net_income",
764
- "type": "float",
765
- "example_values": [250.1, -50.0],
766
- "definition": "Net income(s) of the company, possibly over different periods (in millions USD)."
767
- },
768
- {
769
- "core_name": "company",
770
- "field_name": "company_number_of_employees",
771
- "type": "int",
772
- "example_values": [500, 12000, 75],
773
- "definition": "Number of employees in the company, possibly over different periods."
774
- },
775
- {
776
- "core_name": "company",
777
- "field_name": "company_operating_income",
778
- "type": "float",
779
- "example_values": [300.0, 80.5],
780
- "definition": "Operating income(s) of the company, possibly over different periods (in millions USD)."
781
- },
782
- {
783
- "core_name": "company",
784
- "field_name": "company_private_public",
785
- "type": "string",
786
- "example_values": ["Public", "Private"],
787
- "definition": "Indicates if the company is 'Private' or 'Public'."
788
- },
789
- {
790
- "core_name": "company",
791
- "field_name": "company_r_d",
792
- "type": "float",
793
- "example_values": [120.0, 45.7],
794
- "definition": "Research and Development (R&D) expenditure of the company, possibly over different periods (in millions USD)."
795
- },
796
- {
797
- "core_name": "company",
798
- "field_name": "company_revenue",
799
- "type": "float",
800
- "example_values": [2500.0, 1100.2],
801
- "definition": "Total revenue(s) of the company, possibly over different periods (in millions USD)."
802
- },
803
- {
804
- "core_name": "company",
805
- "field_name": "company_role",
806
- "type": "string",
807
- "example_values": ["Developer", "Manufacturer", "Distributor", "Contract Research Organization (CRO)"],
808
- "definition": "Role(s) of the company (e.g., 'developer', 'manufacturer')."
809
- },
810
- {
811
- "core_name": "company",
812
- "field_name": "company_sales",
813
- "type": "float",
814
- "example_values": [2300.0, 950.0],
815
- "definition": "Sales figures of the company, possibly over different periods (in millions USD)."
816
- },
817
- {
818
- "core_name": "company",
819
- "field_name": "company_sales_ga_expenses",
820
- "type": "float",
821
- "example_values": [500.0, 210.5],
822
- "definition": "Sales, General, and Administrative (SG&A) expenses of the company (in millions USD)."
823
- },
824
- {
825
- "core_name": "company",
826
- "field_name": "company_summary",
827
- "type": "string",
828
- "example_values": ["A leading biotechnology company focused on oncology.", "Specializes in developing advanced drug delivery systems."],
829
- "definition": "A summary or description of the company."
830
- },
831
- {
832
- "core_name": "company",
833
- "field_name": "company_synonym",
834
- "type": "string",
835
- "example_values": ["BioGen Inc.", "TheraWorks Pharma"],
836
- "definition": "Alternative names or synonyms for the company."
837
- },
838
- {
839
- "core_name": "company",
840
- "field_name": "company_territory",
841
- "type": "text_ci",
842
- "example_values": ["United States", "Germany", "Japan", "Global"],
843
- "definition": "Geographical territories where the company operates."
844
- },
845
- {
846
- "core_name": "company",
847
- "field_name": "company_territory_hierarchy",
848
- "type": "hierarchy",
849
- "example_values": ["continent/North America/country/United States", "continent/Europe/country/Germany"],
850
- "definition": "Hierarchical representation of the company's operating territories."
851
- },
852
- {
853
- "core_name": "company",
854
- "field_name": "company_territory_s",
855
- "type": "string",
856
- "example_values": ["United States", "Germany", "Japan"],
857
- "definition": "String representation of the company's operating territories (likely for faceted search)."
858
- },
859
- {
860
- "core_name": "company",
861
- "field_name": "company_total_assets",
862
- "type": "float",
863
- "example_values": [3000.0, 1500.0],
864
- "definition": "Total assets of the company, possibly over different periods (in millions USD)."
865
- },
866
- {
867
- "core_name": "company",
868
- "field_name": "company_total_current_assets",
869
- "type": "float",
870
- "example_values": [1200.0, 600.0],
871
- "definition": "Total current assets of the company, possibly over different periods (in millions USD)."
872
- },
873
- {
874
- "core_name": "company",
875
- "field_name": "company_total_current_liabilities",
876
- "type": "float",
877
- "example_values": [400.0, 200.0],
878
- "definition": "Total current liabilities of the company, possibly over different periods (in millions USD)."
879
- },
880
- {
881
- "core_name": "company",
882
- "field_name": "company_total_equity",
883
- "type": "float",
884
- "example_values": [2600.0, 1300.0],
885
- "definition": "Total equity of the company, possibly over different periods (in millions USD)."
886
- },
887
- {
888
- "core_name": "company",
889
- "field_name": "company_website",
890
- "type": "string",
891
- "example_values": ["https://www.biogeninnovations.com", "https://www.theraworks.com"],
892
- "definition": "The official website URL of the company."
893
- },
894
- {
895
- "core_name": "company",
896
- "field_name": "company_year_founded",
897
- "type": "string",
898
- "example_values": ["1998", "2010"],
899
- "definition": "The year the company was founded."
900
- },
901
- {
902
- "core_name": "company",
903
- "field_name": "conjugate_molecule_type_1",
904
- "type": "text_ci",
905
- "example_values": ["Antibody", "Polymer"],
906
- "definition": "Type of the first conjugate molecule associated with the company's products."
907
- },
908
- {
909
- "core_name": "company",
910
- "field_name": "conjugate_molecule_type_2",
911
- "type": "text_ci",
912
- "example_values": ["Small Molecule", "Peptide"],
913
- "definition": "Type of the second conjugate molecule associated with the company's products."
914
- },
915
- {
916
- "core_name": "company",
917
- "field_name": "conjugate_molecule_types",
918
- "type": "text_ci",
919
- "example_values": ["Antibody-Drug Conjugate", "Polymer-Drug Conjugate"],
920
- "definition": "Overall types of conjugate molecules associated with the company's products."
921
- },
922
- {
923
- "core_name": "company",
924
- "field_name": "corporate_history",
925
- "type": "string",
926
- "example_values": ["Founded in 1998, went public in 2005.", "Acquired by Pfizer in 2020."],
927
- "definition": "Historical information about the company's corporate activities."
928
- },
929
- {
930
- "core_name": "company",
931
- "field_name": "cp_moment_updated",
932
- "type": "string",
933
- "example_values": ["2023-01-15T10:30:00Z", "2024-05-20T14:00:00Z"],
934
- "definition": "Timestamp(s) when the company's profile was last updated in ISO 8601 format."
935
- },
936
- {
937
- "core_name": "company",
938
- "field_name": "drug_delivery_branch",
939
- "type": "string",
940
- "example_values": ["Oral Delivery", "Injectable Delivery", "Transdermal Patches"],
941
- "definition": "Branch(es) of drug delivery technologies related to the company's products."
942
- },
943
- {
944
- "core_name": "company",
945
- "field_name": "drug_delivery_branch_hierarchy",
946
- "type": "hierarchy",
947
- "example_values": ["route/Oral/branch/Oral Delivery", "route/Parenteral/branch/Injectable Delivery"],
948
- "definition": "Hierarchical representation of drug delivery branches."
949
- },
950
- {
951
- "core_name": "company",
952
- "field_name": "drug_delivery_branch_s",
953
- "type": "string",
954
- "example_values": ["Oral Delivery", "Injectable Delivery"],
955
- "definition": "String representation of drug delivery branches (likely for faceted search)."
956
- },
957
- {
958
- "core_name": "company",
959
- "field_name": "drug_delivery_technology",
960
- "type": "string",
961
- "example_values": ["Nanoparticle Delivery", "Liposomal Encapsulation", "Microneedle Systems"],
962
- "definition": "Specific drug delivery technologies used by the company."
963
- },
964
- {
965
- "core_name": "company",
966
- "field_name": "drug_delivery_technology_categories",
967
- "type": "string",
968
- "example_values": ["Advanced Formulations", "Targeted Delivery"],
969
- "definition": "Categories of drug delivery technologies."
970
- },
971
- {
972
- "core_name": "company",
973
- "field_name": "drug_delivery_technology_hierarchy",
974
- "type": "hierarchy",
975
- "example_values": ["category/Advanced Formulations/technology/Nanoparticle Delivery"],
976
- "definition": "Hierarchical representation of drug delivery technologies."
977
- },
978
- {
979
- "core_name": "company",
980
- "field_name": "drug_delivery_technology_names",
981
- "type": "string",
982
- "example_values": ["NanoCaps", "LipoSphere", "MicroDerm"],
983
- "definition": "Names of drug delivery technologies."
984
- },
985
- {
986
- "core_name": "company",
987
- "field_name": "drug_delivery_technology_s",
988
- "type": "string",
989
- "example_values": ["nanoparticle", "liposomal", "microneedle"],
990
- "definition": "String representation of drug delivery technologies (likely for faceted search)."
991
- },
992
- {
993
- "core_name": "company",
994
- "field_name": "efficiency_income_employee",
995
- "type": "float",
996
- "example_values": [50000.0, 75000.0],
997
- "definition": "Efficiency metric: income per employee (in USD)."
998
- },
999
- {
1000
- "core_name": "company",
1001
- "field_name": "efficiency_revenue_employee",
1002
- "type": "float",
1003
- "example_values": [250000.0, 180000.0],
1004
- "definition": "Efficiency metric: revenue per employee (in USD)."
1005
- },
1006
- {
1007
- "core_name": "company",
1008
- "field_name": "efficiency_total_asset_turnover",
1009
- "type": "float",
1010
- "example_values": [0.8, 1.2],
1011
- "definition": "Efficiency metric: total asset turnover."
1012
- },
1013
- {
1014
- "core_name": "company",
1015
- "field_name": "email",
1016
- "type": "string",
1017
- "example_values": ["[email protected]", "[email protected]"],
1018
- "definition": "Email address(es) for the company."
1019
- },
1020
- {
1021
- "core_name": "company",
1022
- "field_name": "facility_state",
1023
- "type": "string",
1024
- "example_values": ["Maryland", "North Carolina"],
1025
- "definition": "State(s) where company facilities are located."
1026
- },
1027
- {
1028
- "core_name": "company",
1029
- "field_name": "facility_territory",
1030
- "type": "text_ci",
1031
- "example_values": ["United States", "Ireland"],
1032
- "definition": "Geographical territories where company facilities are located."
1033
- },
1034
- {
1035
- "core_name": "company",
1036
- "field_name": "facility_territory_id",
1037
- "type": "text_ci",
1038
- "example_values": ["USA", "IRL"],
1039
- "definition": "ID(s) for the geographical territories of company facilities."
1040
- },
1041
- {
1042
- "core_name": "company",
1043
- "field_name": "fax",
1044
- "type": "string",
1045
- "example_values": ["+1-123-456-7890", "+44-20-1234-5678"],
1046
- "definition": "Fax number(s) for the company."
1047
- },
1048
- {
1049
- "core_name": "company",
1050
- "field_name": "founder",
1051
- "type": "string",
1052
- "example_values": ["Dr. Jane Doe", "John Smith"],
1053
- "definition": "The founder(s) of the company."
1054
- },
1055
- {
1056
- "core_name": "company",
1057
- "field_name": "generic_or_innovator",
1058
- "type": "string",
1059
- "example_values": ["Innovator", "Generic"],
1060
- "definition": "Indicates if the company primarily develops 'Generic' or 'Innovator' products."
1061
- },
1062
- {
1063
- "core_name": "company",
1064
- "field_name": "highest_phase",
1065
- "type": "string",
1066
- "example_values": ["Marketed", "Phase 3"],
1067
- "definition": "The highest development phase achieved by any of the company's products/molecules."
1068
- },
1069
- {
1070
- "core_name": "company",
1071
- "field_name": "id",
1072
- "type": "string",
1073
- "example_values": ["COMP_0001", "COMP_0002", "COMP_0003"],
1074
- "definition": "Unique identifier for the company."
1075
- },
1076
- {
1077
- "core_name": "company",
1078
- "field_name": "investor_company",
1079
- "type": "string",
1080
- "example_values": ["BlackRock", "Sequoia Capital"],
1081
- "definition": "Names of companies that have invested in this company."
1082
- },
1083
- {
1084
- "core_name": "company",
1085
- "field_name": "investors",
1086
- "type": "string",
1087
- "example_values": ["VentureCorp Capital", "Angel Investors Network"],
1088
- "definition": "Names of individual or institutional investors in the company."
1089
- },
1090
- {
1091
- "core_name": "company",
1092
- "field_name": "last_update_time",
1093
- "type": "date",
1094
- "example_values": ["2023-11-01T08:00:00Z", "2024-01-25T16:45:00Z"],
1095
- "definition": "Timestamp(s) of the last update to the company's record in ISO 8601 format."
1096
- },
1097
- {
1098
- "core_name": "company",
1099
- "field_name": "last_update_user",
1100
- "type": "string",
1101
- "example_values": ["admin_user_1", "data_editor_2"],
1102
- "definition": "User(s) who last updated the company's record."
1103
- },
1104
- {
1105
- "core_name": "company",
1106
- "field_name": "liquidity_data_current_ratio_mrq",
1107
- "type": "float",
1108
- "example_values": [1.8, 2.5],
1109
- "definition": "Liquidity data: current ratio for the most recent quarter (MRQ)."
1110
- },
1111
- {
1112
- "core_name": "company",
1113
- "field_name": "liquidity_data_current_ratio_yoy",
1114
- "type": "float",
1115
- "example_values": [1.9, 2.3],
1116
- "definition": "Liquidity data: current ratio year-over-year (YOY)."
1117
- },
1118
- {
1119
- "core_name": "company",
1120
- "field_name": "liquidity_data_quick_ratio_mrq",
1121
- "type": "float",
1122
- "example_values": [1.2, 1.7],
1123
- "definition": "Liquidity data: quick ratio for the most recent quarter (MRQ)."
1124
- },
1125
- {
1126
- "core_name": "company",
1127
- "field_name": "liquidity_data_quick_ratio_yoy",
1128
- "type": "float",
1129
- "example_values": [1.3, 1.6],
1130
- "definition": "Liquidity data: quick ratio year-over-year (YOY)."
1131
- },
1132
- {
1133
- "core_name": "company",
1134
- "field_name": "major_business_model",
1135
- "type": "string",
1136
- "example_values": ["Biopharmaceutical Development", "Medical Device Manufacturing"],
1137
- "definition": "Major business model(s) of the company."
1138
- },
1139
- {
1140
- "core_name": "company",
1141
- "field_name": "marketing_status",
1142
- "type": "text_ci",
1143
- "example_values": ["Marketed", "Pre-Market", "Discontinued"],
1144
- "definition": "Current marketing status of the company's products (e.g., 'pre-market', 'on-market')."
1145
- },
1146
- {
1147
- "core_name": "company",
1148
- "field_name": "mechanism_type",
1149
- "type": "string",
1150
- "example_values": ["Kinase Inhibitor", "Monoclonal Antibody", "Gene Therapy"],
1151
- "definition": "Mechanism of action type(s) for molecules/products associated with the company."
1152
- },
1153
- {
1154
- "core_name": "company",
1155
- "field_name": "mechanism_type_hierarchy",
1156
- "type": "hierarchy",
1157
- "example_values": ["type/Kinase Inhibitor", "type/Monoclonal Antibody"],
1158
- "definition": "Hierarchical representation of mechanism types."
1159
- },
1160
- {
1161
- "core_name": "company",
1162
- "field_name": "molecule_api_group",
1163
- "type": "string",
1164
- "example_values": ["Small Molecules", "Biologics", "Gene Therapies"],
1165
- "definition": "API (Active Pharmaceutical Ingredient) group(s) of molecules associated with the company."
1166
- },
1167
- {
1168
- "core_name": "company",
1169
- "field_name": "molecule_name",
1170
- "type": "string",
1171
- "example_values": ["DrugX-200", "mAb-Y101", "GeneEdit-Z"],
1172
- "definition": "Names of molecules associated with the company."
1173
- },
1174
 
1175
- {
1176
- "core_name": "company",
1177
- "field_name": "molecule_type",
1178
- "type": "string",
1179
- "example_values": ["Small Molecule", "Monoclonal Antibody", "CRISPR-Cas9"],
1180
- "definition": "Type(s) of molecules associated with the company."
1181
- },
1182
- {
1183
- "core_name": "company",
1184
- "field_name": "original_financial_rd",
1185
- "type": "string",
1186
- "example_values": ["$120,000,000", "€45M"],
1187
- "definition": "Original raw R&D financial data (as string)."
1188
- },
1189
- {
1190
- "core_name": "company",
1191
- "field_name": "original_financial_total_revenue",
1192
- "type": "string",
1193
- "example_values": ["$2.5 Billion", "£1.1B"],
1194
- "definition": "Original raw total revenue financial data (as string)."
1195
- },
1196
- {
1197
- "core_name": "company",
1198
- "field_name": "owner_company_private_public",
1199
- "type": "string",
1200
- "example_values": ["Public", "Private"],
1201
- "definition": "Indicates if the owner company is 'Private' or 'Public'."
1202
- },
1203
- {
1204
- "core_name": "company",
1205
- "field_name": "phase",
1206
- "type": "string",
1207
- "example_values": ["Phase 1", "Phase 2", "Preclinical", "Marketed"],
1208
- "definition": "Development phase(s) of products/molecules associated with the company."
1209
- },
1210
- {
1211
- "core_name": "company",
1212
- "field_name": "product_name",
1213
- "type": "string",
1214
- "example_values": ["OncoTherapy-A", "CardioGuard-XL", "NeuroBoost-System"],
1215
- "definition": "Names of products developed or marketed by the company."
1216
- },
1217
- {
1218
- "core_name": "company",
1219
- "field_name": "product_synonym",
1220
- "type": "string",
1221
- "example_values": ["OncoTA", "CGXL"],
1222
- "definition": "Synonyms for products associated with the company."
1223
- },
1224
- {
1225
- "core_name": "company",
1226
- "field_name": "profitability_data_operating_margin_ttm",
1227
- "type": "float",
1228
- "example_values": [0.15, 0.22],
1229
- "definition": "Profitability data: operating margin for the trailing twelve months (TTM)."
1230
- },
1231
- {
1232
- "core_name": "company",
1233
- "field_name": "profitability_data_profit_margin",
1234
- "type": "float",
1235
- "example_values": [0.10, 0.18],
1236
- "definition": "Profitability data: overall profit margin."
1237
- },
1238
- {
1239
- "core_name": "company",
1240
- "field_name": "profitability_data_return_on_assets_ttm",
1241
- "type": "float",
1242
- "example_values": [0.08, 0.14],
1243
- "definition": "Profitability data: return on assets for the trailing twelve months (TTM)."
1244
- },
1245
- {
1246
- "core_name": "company",
1247
- "field_name": "profitability_data_return_on_equity_ttm",
1248
- "type": "float",
1249
- "example_values": [0.12, 0.20],
1250
- "definition": "Profitability data: return on equity for the trailing twelve months (TTM)."
1251
- },
1252
- {
1253
- "core_name": "company",
1254
- "field_name": "route",
1255
- "type": "string",
1256
- "example_values": ["Oral", "Intravenous", "Topical"],
1257
- "definition": "Route(s) of administration for products/molecules associated with the company."
1258
- },
1259
- {
1260
- "core_name": "company",
1261
- "field_name": "route_branch",
1262
- "type": "string",
1263
- "example_values": ["Oral Solid Dosage", "Injectable Solutions"],
1264
- "definition": "Branch(es) of route of administration."
1265
- },
1266
- {
1267
- "core_name": "company",
1268
- "field_name": "route_branch_hierarchy",
1269
- "type": "hierarchy",
1270
- "example_values": ["route/Oral/branch/Oral Solid Dosage"],
1271
- "definition": "Hierarchical representation of route of administration branches."
1272
- },
1273
- {
1274
- "core_name": "company",
1275
- "field_name": "route_branch_s",
1276
- "type": "string",
1277
- "example_values": ["Oral Solid Dosage", "Injectable Solutions"],
1278
- "definition": "String representation of route of administration branches (likely for faceted search)."
1279
- },
1280
- {
1281
- "core_name": "company",
1282
- "field_name": "route_s",
1283
- "type": "text_ci",
1284
- "example_values": ["oral", "injection", "topical"],
1285
- "definition": "String representation of routes of administration (likely for faceted search)."
1286
- },
1287
- {
1288
- "core_name": "company",
1289
- "field_name": "subsidiary",
1290
- "type": "string",
1291
- "example_values": ["BioGen Labs", "TheraWorks Diagnostics"],
1292
- "definition": "Names of subsidiaries of this company."
1293
- },
1294
- {
1295
- "core_name": "company",
1296
- "field_name": "subsidiary_of_company",
1297
- "type": "string",
1298
- "example_values": ["Global Pharma Corp", "United Healthcare Inc."],
1299
- "definition": "Names of parent companies if this company is a subsidiary."
1300
- },
1301
- {
1302
- "core_name": "company",
1303
- "field_name": "telephone",
1304
- "type": "string",
1305
- "example_values": ["+1-555-123-4567", "+49-30-9876-5432"],
1306
- "definition": "Telephone number(s) for the company."
1307
- },
1308
- {
1309
- "core_name": "company",
1310
- "field_name": "therapeutic_category",
1311
- "type": "string",
1312
- "example_values": ["Oncology", "Cardiology", "Neurology"],
1313
- "definition": "Therapeutic category/categories of products/molecules associated with the company."
1314
- },
1315
- {
1316
- "core_name": "company",
1317
- "field_name": "therapeutic_category_hierarchy",
1318
- "type": "hierarchy",
1319
- "example_values": ["disease/Cancer/category/Oncology", "disease/Cardiovascular/category/Cardiology"],
1320
- "definition": "Hierarchical representation of therapeutic categories."
1321
- },
1322
- {
1323
- "core_name": "company",
1324
- "field_name": "therapeutic_category_s",
1325
- "type": "text_ci",
1326
- "example_values": ["cancer", "heart disease", "nervous system disorders"],
1327
- "definition": "String representation of therapeutic categories (likely for faceted search)."
1328
- },
1329
- {
1330
- "core_name": "company",
1331
- "field_name": "therapeutic_group_id",
1332
- "type": "string",
1333
- "example_values": ["THERA_ONC_01", "THERA_CARD_02"],
1334
- "definition": "ID(s) for the therapeutic group(s) of products/molecules associated with the company."
1335
- },
1336
- {
1337
- "core_name": "company",
1338
- "field_name": "type",
1339
- "type": "string",
1340
- "example_values": ["Biotech", "Pharmaceutical", "CRO", "Medical Device Company"],
1341
- "definition": "Type(s) of the company (e.g., 'Biotech', 'Pharma')."
1342
- }
1343
  ]
1344
 
1345
 
@@ -1353,4 +1358,4 @@ def format_metadata_for_prompt(core_name="news"):
1353
  formatted_string += f" - **Type**: {field['type']}\n"
1354
  formatted_string += f" - **Definition**: {field['definition']}\n"
1355
  formatted_string += f" - **Examples**: {', '.join(map(str, field['example_values']))}\n\n"
1356
- return formatted_string
 
161
  "example_values": [50, 120.5, 176.157, 1000],
162
  "definition": "The total value of a financial deal, in millions of USD. This is the primary numeric field for financial aggregations (sum, avg, etc.). To use this, you must also filter for news that has a deal value, e.g., 'total_deal_value_in_million:[0 TO *]'."
163
  },
164
+
165
  # NEW 'deal' CORE METADATA
166
  {
167
  "core_name": "deal",
 
370
  "core_name": "compound",
371
  "field_name": "molecule_id",
372
  "type": "string",
373
+ # Placeholder, not in provided doc examples directly.
374
+ "example_values": ["mol_12345", "mol_67890"],
375
  "definition": "Unique identifier for the molecule associated with the compound."
376
  },
377
  {
 
448
  "core_name": "compound",
449
  "field_name": "owner_company_name",
450
  "type": "string (multi-valued, exact match, for faceting)",
451
+ # Placeholder, based on common patterns
452
+ "example_values": ["AstraZeneca Plc"],
453
  "definition": "The canonical, standardized name(s) of the owner company of the compound. Use for faceting on owner companies."
454
  },
455
  {
 
498
  "core_name": "compound",
499
  "field_name": "mechanism",
500
  "type": "string (multi-valued, specific, for faceting)",
501
+ # Placeholder
502
+ "example_values": ["Beta-1 Adrenergic Receptor Antagonist"],
503
  "definition": "The specific mechanism of action of the compound. Use for very specific mechanism queries or faceting."
504
  },
505
  {
506
  "core_name": "compound",
507
  "field_name": "mechanism_s",
508
  "type": "string (multi-valued, for searching)",
509
+ # Placeholder
510
+ "example_values": ["beta blocker", "adrenergic receptor antagonist"],
511
  "definition": "Broader, multi-valued mechanisms of action and their synonyms. **Use this field for broad mechanism searches** in the `query` parameter."
512
  },
513
  {
 
611
  {
612
  "core_name": "compound",
613
  "field_name": "is_orphan",
614
+ # Solr typically stores booleans as strings or numbers [14, 15, 21, 23]
615
+ "type": "string (boolean)",
616
  "example_values": ["yes", "no"],
617
  "definition": "Indicates if the compound has received orphan drug designation. Orphan drugs are intended for rare diseases that affect a small population, making them less profitable to develop without incentives. [2, 4, 7, 9, 10]"
618
  },
 
645
  "definition": "The logarithm of the octanol-water partition coefficient, a measure of a compound's lipophilicity (fat-liking) or hydrophilicity (water-liking). Higher values indicate more lipophilicity. [1, 3, 5, 6, 8]"
646
  },
647
  {
648
+ "core_name": "company",
649
+ "field_name": "US_state",
650
+ "type": "string",
651
+ "example_values": ["California", "Massachusetts", "New York"],
652
+ "definition": "The US state(s) where the company operates or has a significant presence."
653
+ },
654
+ {
655
+ "core_name": "company",
656
+ "field_name": "acquired_by",
657
+ "type": "string",
658
+ "example_values": ["Pfizer Inc.", "Johnson & Johnson"],
659
+ "definition": "Names of companies that have acquired this company."
660
+ },
661
+ {
662
+ "core_name": "company",
663
+ "field_name": "address",
664
+ "type": "string",
665
+ "example_values": ["123 Biotech Ave, Boston, MA 02110", "456 Pharma Rd, San Francisco, CA 94107"],
666
+ "definition": "Physical address(es) of the company."
667
+ },
668
+ {
669
+ "core_name": "company",
670
+ "field_name": "city",
671
+ "type": "string",
672
+ "example_values": ["Boston", "San Diego", "Basel"],
673
+ "definition": "City/cities where the company is located or has a significant presence."
674
+ },
675
+ {
676
+ "core_name": "company",
677
+ "field_name": "company_IPO_date",
678
+ "type": "string",
679
+ "example_values": ["2005-10-15", "2018-03-22"],
680
+ "definition": "Date(s) of the company's Initial Public Offering (IPO) in YYYY-MM-DD format."
681
+ },
682
+ {
683
+ "core_name": "company",
684
+ "field_name": "company_US_state",
685
+ "type": "string",
686
+ "example_values": ["California", "Massachusetts"],
687
+ "definition": "The primary US state of the company."
688
+ },
689
+ {
690
+ "core_name": "company",
691
+ "field_name": "company_business_model",
692
+ "type": "string",
693
+ "example_values": ["Drug Discovery", "Drug Development", "Contract Manufacturing", "Diagnostics"],
694
+ "definition": "Description(s) of the company's business model(s)."
695
+ },
696
+ {
697
+ "core_name": "company",
698
+ "field_name": "company_city",
699
+ "type": "string",
700
+ "example_values": ["Cambridge", "Princeton"],
701
+ "definition": "The primary city of the company."
702
+ },
703
+ {
704
+ "core_name": "company",
705
+ "field_name": "company_development_capabilities",
706
+ "type": "string",
707
+ "example_values": ["Preclinical Research", "Clinical Trials Management", "CMC Development", "Regulatory Affairs"],
708
+ "definition": "Capabilities of the company in drug/product development."
709
+ },
710
+ {
711
+ "core_name": "company",
712
+ "field_name": "company_gross_profit",
713
+ "type": "float",
714
+ "example_values": [1500.5, 750.25],
715
+ "definition": "Gross profit(s) of the company, possibly over different periods (in millions USD)."
716
+ },
717
+ {
718
+ "core_name": "company",
719
+ "field_name": "company_highest_phase",
720
+ "type": "string",
721
+ "example_values": ["Marketed", "Phase 3", "Preclinical"],
722
+ "definition": "The highest development phase achieved by any of the company's products/molecules."
723
+ },
724
+ {
725
+ "core_name": "company",
726
+ "field_name": "company_is_active",
727
+ "type": "string",
728
+ "example_values": ["Yes", "No"],
729
+ "definition": "Indicates if the company is currently active ('Yes' or 'No')."
730
+ },
731
+ {
732
+ "core_name": "company",
733
+ "field_name": "company_liabilities",
734
+ "type": "float",
735
+ "example_values": [800.75, 400.0],
736
+ "definition": "Total liabilities of the company, possibly over different periods (in millions USD)."
737
+ },
738
+ {
739
+ "core_name": "company",
740
+ "field_name": "company_location_facility_type",
741
+ "type": "string",
742
+ "example_values": ["R&D Center", "Manufacturing Plant", "Headquarters", "Clinical Site"],
743
+ "definition": "Type of facilities associated with the company's locations (e.g., 'manufacturing', 'R&D')."
744
+ },
745
+ {
746
+ "core_name": "company",
747
+ "field_name": "company_main_territory",
748
+ "type": "text_ci",
749
+ "example_values": ["North America", "Europe", "Asia-Pacific"],
750
+ "definition": "Main geographical territories where the company operates or focuses."
751
+ },
752
+ {
753
+ "core_name": "company",
754
+ "field_name": "company_market_cap",
755
+ "type": "float",
756
+ "example_values": [50000.0, 12500.75],
757
+ "definition": "The market capitalization of the company (in millions USD)."
758
+ },
759
+ {
760
+ "core_name": "company",
761
+ "field_name": "company_name",
762
+ "type": "string",
763
+ "example_values": ["BioGen Innovations", "TheraWorks Pharmaceuticals", "MediTech Solutions"],
764
+ "definition": "The official name of the company."
765
+ },
766
+ {
767
+ "core_name": "company",
768
+ "field_name": "company_net_income",
769
+ "type": "float",
770
+ "example_values": [250.1, -50.0],
771
+ "definition": "Net income(s) of the company, possibly over different periods (in millions USD)."
772
+ },
773
+ {
774
+ "core_name": "company",
775
+ "field_name": "company_number_of_employees",
776
+ "type": "int",
777
+ "example_values": [500, 12000, 75],
778
+ "definition": "Number of employees in the company, possibly over different periods."
779
+ },
780
+ {
781
+ "core_name": "company",
782
+ "field_name": "company_operating_income",
783
+ "type": "float",
784
+ "example_values": [300.0, 80.5],
785
+ "definition": "Operating income(s) of the company, possibly over different periods (in millions USD)."
786
+ },
787
+ {
788
+ "core_name": "company",
789
+ "field_name": "company_private_public",
790
+ "type": "string",
791
+ "example_values": ["Public", "Private"],
792
+ "definition": "Indicates if the company is 'Private' or 'Public'."
793
+ },
794
+ {
795
+ "core_name": "company",
796
+ "field_name": "company_r_d",
797
+ "type": "float",
798
+ "example_values": [120.0, 45.7],
799
+ "definition": "Research and Development (R&D) expenditure of the company, possibly over different periods (in millions USD)."
800
+ },
801
+ {
802
+ "core_name": "company",
803
+ "field_name": "company_revenue",
804
+ "type": "float",
805
+ "example_values": [2500.0, 1100.2],
806
+ "definition": "Total revenue(s) of the company, possibly over different periods (in millions USD)."
807
+ },
808
+ {
809
+ "core_name": "company",
810
+ "field_name": "company_role",
811
+ "type": "string",
812
+ "example_values": ["Developer", "Manufacturer", "Distributor", "Contract Research Organization (CRO)"],
813
+ "definition": "Role(s) of the company (e.g., 'developer', 'manufacturer')."
814
+ },
815
+ {
816
+ "core_name": "company",
817
+ "field_name": "company_sales",
818
+ "type": "float",
819
+ "example_values": [2300.0, 950.0],
820
+ "definition": "Sales figures of the company, possibly over different periods (in millions USD)."
821
+ },
822
+ {
823
+ "core_name": "company",
824
+ "field_name": "company_sales_ga_expenses",
825
+ "type": "float",
826
+ "example_values": [500.0, 210.5],
827
+ "definition": "Sales, General, and Administrative (SG&A) expenses of the company (in millions USD)."
828
+ },
829
+ {
830
+ "core_name": "company",
831
+ "field_name": "company_summary",
832
+ "type": "string",
833
+ "example_values": ["A leading biotechnology company focused on oncology.", "Specializes in developing advanced drug delivery systems."],
834
+ "definition": "A summary or description of the company."
835
+ },
836
+ {
837
+ "core_name": "company",
838
+ "field_name": "company_synonym",
839
+ "type": "string",
840
+ "example_values": ["BioGen Inc.", "TheraWorks Pharma"],
841
+ "definition": "Alternative names or synonyms for the company."
842
+ },
843
+ {
844
+ "core_name": "company",
845
+ "field_name": "company_territory",
846
+ "type": "text_ci",
847
+ "example_values": ["United States", "Germany", "Japan", "Global"],
848
+ "definition": "Geographical territories where the company operates."
849
+ },
850
+ {
851
+ "core_name": "company",
852
+ "field_name": "company_territory_hierarchy",
853
+ "type": "hierarchy",
854
+ "example_values": ["continent/North America/country/United States", "continent/Europe/country/Germany"],
855
+ "definition": "Hierarchical representation of the company's operating territories."
856
+ },
857
+ {
858
+ "core_name": "company",
859
+ "field_name": "company_territory_s",
860
+ "type": "string",
861
+ "example_values": ["United States", "Germany", "Japan"],
862
+ "definition": "String representation of the company's operating territories (likely for faceted search)."
863
+ },
864
+ {
865
+ "core_name": "company",
866
+ "field_name": "company_total_assets",
867
+ "type": "float",
868
+ "example_values": [3000.0, 1500.0],
869
+ "definition": "Total assets of the company, possibly over different periods (in millions USD)."
870
+ },
871
+ {
872
+ "core_name": "company",
873
+ "field_name": "company_total_current_assets",
874
+ "type": "float",
875
+ "example_values": [1200.0, 600.0],
876
+ "definition": "Total current assets of the company, possibly over different periods (in millions USD)."
877
+ },
878
+ {
879
+ "core_name": "company",
880
+ "field_name": "company_total_current_liabilities",
881
+ "type": "float",
882
+ "example_values": [400.0, 200.0],
883
+ "definition": "Total current liabilities of the company, possibly over different periods (in millions USD)."
884
+ },
885
+ {
886
+ "core_name": "company",
887
+ "field_name": "company_total_equity",
888
+ "type": "float",
889
+ "example_values": [2600.0, 1300.0],
890
+ "definition": "Total equity of the company, possibly over different periods (in millions USD)."
891
+ },
892
+ {
893
+ "core_name": "company",
894
+ "field_name": "company_website",
895
+ "type": "string",
896
+ "example_values": ["https://www.biogeninnovations.com", "https://www.theraworks.com"],
897
+ "definition": "The official website URL of the company."
898
+ },
899
+ {
900
+ "core_name": "company",
901
+ "field_name": "company_year_founded",
902
+ "type": "string",
903
+ "example_values": ["1998", "2010"],
904
+ "definition": "The year the company was founded."
905
+ },
906
+ {
907
+ "core_name": "company",
908
+ "field_name": "conjugate_molecule_type_1",
909
+ "type": "text_ci",
910
+ "example_values": ["Antibody", "Polymer"],
911
+ "definition": "Type of the first conjugate molecule associated with the company's products."
912
+ },
913
+ {
914
+ "core_name": "company",
915
+ "field_name": "conjugate_molecule_type_2",
916
+ "type": "text_ci",
917
+ "example_values": ["Small Molecule", "Peptide"],
918
+ "definition": "Type of the second conjugate molecule associated with the company's products."
919
+ },
920
+ {
921
+ "core_name": "company",
922
+ "field_name": "conjugate_molecule_types",
923
+ "type": "text_ci",
924
+ "example_values": ["Antibody-Drug Conjugate", "Polymer-Drug Conjugate"],
925
+ "definition": "Overall types of conjugate molecules associated with the company's products."
926
+ },
927
+ {
928
+ "core_name": "company",
929
+ "field_name": "corporate_history",
930
+ "type": "string",
931
+ "example_values": ["Founded in 1998, went public in 2005.", "Acquired by Pfizer in 2020."],
932
+ "definition": "Historical information about the company's corporate activities."
933
+ },
934
+ {
935
+ "core_name": "company",
936
+ "field_name": "cp_moment_updated",
937
+ "type": "string",
938
+ "example_values": ["2023-01-15T10:30:00Z", "2024-05-20T14:00:00Z"],
939
+ "definition": "Timestamp(s) when the company's profile was last updated in ISO 8601 format."
940
+ },
941
+ {
942
+ "core_name": "company",
943
+ "field_name": "drug_delivery_branch",
944
+ "type": "string",
945
+ "example_values": ["Oral Delivery", "Injectable Delivery", "Transdermal Patches"],
946
+ "definition": "Branch(es) of drug delivery technologies related to the company's products."
947
+ },
948
+ {
949
+ "core_name": "company",
950
+ "field_name": "drug_delivery_branch_hierarchy",
951
+ "type": "hierarchy",
952
+ "example_values": ["route/Oral/branch/Oral Delivery", "route/Parenteral/branch/Injectable Delivery"],
953
+ "definition": "Hierarchical representation of drug delivery branches."
954
+ },
955
+ {
956
+ "core_name": "company",
957
+ "field_name": "drug_delivery_branch_s",
958
+ "type": "string",
959
+ "example_values": ["Oral Delivery", "Injectable Delivery"],
960
+ "definition": "String representation of drug delivery branches (likely for faceted search)."
961
+ },
962
+ {
963
+ "core_name": "company",
964
+ "field_name": "drug_delivery_technology",
965
+ "type": "string",
966
+ "example_values": ["Nanoparticle Delivery", "Liposomal Encapsulation", "Microneedle Systems"],
967
+ "definition": "Specific drug delivery technologies used by the company."
968
+ },
969
+ {
970
+ "core_name": "company",
971
+ "field_name": "drug_delivery_technology_categories",
972
+ "type": "string",
973
+ "example_values": ["Advanced Formulations", "Targeted Delivery"],
974
+ "definition": "Categories of drug delivery technologies."
975
+ },
976
+ {
977
+ "core_name": "company",
978
+ "field_name": "drug_delivery_technology_hierarchy",
979
+ "type": "hierarchy",
980
+ "example_values": ["category/Advanced Formulations/technology/Nanoparticle Delivery"],
981
+ "definition": "Hierarchical representation of drug delivery technologies."
982
+ },
983
+ {
984
+ "core_name": "company",
985
+ "field_name": "drug_delivery_technology_names",
986
+ "type": "string",
987
+ "example_values": ["NanoCaps", "LipoSphere", "MicroDerm"],
988
+ "definition": "Names of drug delivery technologies."
989
+ },
990
+ {
991
+ "core_name": "company",
992
+ "field_name": "drug_delivery_technology_s",
993
+ "type": "string",
994
+ "example_values": ["nanoparticle", "liposomal", "microneedle"],
995
+ "definition": "String representation of drug delivery technologies (likely for faceted search)."
996
+ },
997
+ {
998
+ "core_name": "company",
999
+ "field_name": "efficiency_income_employee",
1000
+ "type": "float",
1001
+ "example_values": [50000.0, 75000.0],
1002
+ "definition": "Efficiency metric: income per employee (in USD)."
1003
+ },
1004
+ {
1005
+ "core_name": "company",
1006
+ "field_name": "efficiency_revenue_employee",
1007
+ "type": "float",
1008
+ "example_values": [250000.0, 180000.0],
1009
+ "definition": "Efficiency metric: revenue per employee (in USD)."
1010
+ },
1011
+ {
1012
+ "core_name": "company",
1013
+ "field_name": "efficiency_total_asset_turnover",
1014
+ "type": "float",
1015
+ "example_values": [0.8, 1.2],
1016
+ "definition": "Efficiency metric: total asset turnover."
1017
+ },
1018
+ {
1019
+ "core_name": "company",
1020
+ "field_name": "email",
1021
+ "type": "string",
1022
+ "example_values": ["[email protected]", "[email protected]"],
1023
+ "definition": "Email address(es) for the company."
1024
+ },
1025
+ {
1026
+ "core_name": "company",
1027
+ "field_name": "facility_state",
1028
+ "type": "string",
1029
+ "example_values": ["Maryland", "North Carolina"],
1030
+ "definition": "State(s) where company facilities are located."
1031
+ },
1032
+ {
1033
+ "core_name": "company",
1034
+ "field_name": "facility_territory",
1035
+ "type": "text_ci",
1036
+ "example_values": ["United States", "Ireland"],
1037
+ "definition": "Geographical territories where company facilities are located."
1038
+ },
1039
+ {
1040
+ "core_name": "company",
1041
+ "field_name": "facility_territory_id",
1042
+ "type": "text_ci",
1043
+ "example_values": ["USA", "IRL"],
1044
+ "definition": "ID(s) for the geographical territories of company facilities."
1045
+ },
1046
+ {
1047
+ "core_name": "company",
1048
+ "field_name": "fax",
1049
+ "type": "string",
1050
+ "example_values": ["+1-123-456-7890", "+44-20-1234-5678"],
1051
+ "definition": "Fax number(s) for the company."
1052
+ },
1053
+ {
1054
+ "core_name": "company",
1055
+ "field_name": "founder",
1056
+ "type": "string",
1057
+ "example_values": ["Dr. Jane Doe", "John Smith"],
1058
+ "definition": "The founder(s) of the company."
1059
+ },
1060
+ {
1061
+ "core_name": "company",
1062
+ "field_name": "generic_or_innovator",
1063
+ "type": "string",
1064
+ "example_values": ["Innovator", "Generic"],
1065
+ "definition": "Indicates if the company primarily develops 'Generic' or 'Innovator' products."
1066
+ },
1067
+ {
1068
+ "core_name": "company",
1069
+ "field_name": "highest_phase",
1070
+ "type": "string",
1071
+ "example_values": ["Marketed", "Phase 3"],
1072
+ "definition": "The highest development phase achieved by any of the company's products/molecules."
1073
+ },
1074
+ {
1075
+ "core_name": "company",
1076
+ "field_name": "id",
1077
+ "type": "string",
1078
+ "example_values": ["COMP_0001", "COMP_0002", "COMP_0003"],
1079
+ "definition": "Unique identifier for the company."
1080
+ },
1081
+ {
1082
+ "core_name": "company",
1083
+ "field_name": "investor_company",
1084
+ "type": "string",
1085
+ "example_values": ["BlackRock", "Sequoia Capital"],
1086
+ "definition": "Names of companies that have invested in this company."
1087
+ },
1088
+ {
1089
+ "core_name": "company",
1090
+ "field_name": "investors",
1091
+ "type": "string",
1092
+ "example_values": ["VentureCorp Capital", "Angel Investors Network"],
1093
+ "definition": "Names of individual or institutional investors in the company."
1094
+ },
1095
+ {
1096
+ "core_name": "company",
1097
+ "field_name": "last_update_time",
1098
+ "type": "date",
1099
+ "example_values": ["2023-11-01T08:00:00Z", "2024-01-25T16:45:00Z"],
1100
+ "definition": "Timestamp(s) of the last update to the company's record in ISO 8601 format."
1101
+ },
1102
+ {
1103
+ "core_name": "company",
1104
+ "field_name": "last_update_user",
1105
+ "type": "string",
1106
+ "example_values": ["admin_user_1", "data_editor_2"],
1107
+ "definition": "User(s) who last updated the company's record."
1108
+ },
1109
+ {
1110
+ "core_name": "company",
1111
+ "field_name": "liquidity_data_current_ratio_mrq",
1112
+ "type": "float",
1113
+ "example_values": [1.8, 2.5],
1114
+ "definition": "Liquidity data: current ratio for the most recent quarter (MRQ)."
1115
+ },
1116
+ {
1117
+ "core_name": "company",
1118
+ "field_name": "liquidity_data_current_ratio_yoy",
1119
+ "type": "float",
1120
+ "example_values": [1.9, 2.3],
1121
+ "definition": "Liquidity data: current ratio year-over-year (YOY)."
1122
+ },
1123
+ {
1124
+ "core_name": "company",
1125
+ "field_name": "liquidity_data_quick_ratio_mrq",
1126
+ "type": "float",
1127
+ "example_values": [1.2, 1.7],
1128
+ "definition": "Liquidity data: quick ratio for the most recent quarter (MRQ)."
1129
+ },
1130
+ {
1131
+ "core_name": "company",
1132
+ "field_name": "liquidity_data_quick_ratio_yoy",
1133
+ "type": "float",
1134
+ "example_values": [1.3, 1.6],
1135
+ "definition": "Liquidity data: quick ratio year-over-year (YOY)."
1136
+ },
1137
+ {
1138
+ "core_name": "company",
1139
+ "field_name": "major_business_model",
1140
+ "type": "string",
1141
+ "example_values": ["Biopharmaceutical Development", "Medical Device Manufacturing"],
1142
+ "definition": "Major business model(s) of the company."
1143
+ },
1144
+ {
1145
+ "core_name": "company",
1146
+ "field_name": "marketing_status",
1147
+ "type": "text_ci",
1148
+ "example_values": ["Marketed", "Pre-Market", "Discontinued"],
1149
+ "definition": "Current marketing status of the company's products (e.g., 'pre-market', 'on-market')."
1150
+ },
1151
+ {
1152
+ "core_name": "company",
1153
+ "field_name": "mechanism_type",
1154
+ "type": "string",
1155
+ "example_values": ["Kinase Inhibitor", "Monoclonal Antibody", "Gene Therapy"],
1156
+ "definition": "Mechanism of action type(s) for molecules/products associated with the company."
1157
+ },
1158
+ {
1159
+ "core_name": "company",
1160
+ "field_name": "mechanism_type_hierarchy",
1161
+ "type": "hierarchy",
1162
+ "example_values": ["type/Kinase Inhibitor", "type/Monoclonal Antibody"],
1163
+ "definition": "Hierarchical representation of mechanism types."
1164
+ },
1165
+ {
1166
+ "core_name": "company",
1167
+ "field_name": "molecule_api_group",
1168
+ "type": "string",
1169
+ "example_values": ["Small Molecules", "Biologics", "Gene Therapies"],
1170
+ "definition": "API (Active Pharmaceutical Ingredient) group(s) of molecules associated with the company."
1171
+ },
1172
+ {
1173
+ "core_name": "company",
1174
+ "field_name": "molecule_name",
1175
+ "type": "string",
1176
+ "example_values": ["DrugX-200", "mAb-Y101", "GeneEdit-Z"],
1177
+ "definition": "Names of molecules associated with the company."
1178
+ },
1179
 
1180
+ {
1181
+ "core_name": "company",
1182
+ "field_name": "molecule_type",
1183
+ "type": "string",
1184
+ "example_values": ["Small Molecule", "Monoclonal Antibody", "CRISPR-Cas9"],
1185
+ "definition": "Type(s) of molecules associated with the company."
1186
+ },
1187
+ {
1188
+ "core_name": "company",
1189
+ "field_name": "original_financial_rd",
1190
+ "type": "string",
1191
+ "example_values": ["$120,000,000", "€45M"],
1192
+ "definition": "Original raw R&D financial data (as string)."
1193
+ },
1194
+ {
1195
+ "core_name": "company",
1196
+ "field_name": "original_financial_total_revenue",
1197
+ "type": "string",
1198
+ "example_values": ["$2.5 Billion", "£1.1B"],
1199
+ "definition": "Original raw total revenue financial data (as string)."
1200
+ },
1201
+ {
1202
+ "core_name": "company",
1203
+ "field_name": "owner_company_private_public",
1204
+ "type": "string",
1205
+ "example_values": ["Public", "Private"],
1206
+ "definition": "Indicates if the owner company is 'Private' or 'Public'."
1207
+ },
1208
+ {
1209
+ "core_name": "company",
1210
+ "field_name": "phase",
1211
+ "type": "string",
1212
+ "example_values": ["Phase 1", "Phase 2", "Preclinical", "Marketed"],
1213
+ "definition": "Development phase(s) of products/molecules associated with the company."
1214
+ },
1215
+ {
1216
+ "core_name": "company",
1217
+ "field_name": "product_name",
1218
+ "type": "string",
1219
+ "example_values": ["OncoTherapy-A", "CardioGuard-XL", "NeuroBoost-System"],
1220
+ "definition": "Names of products developed or marketed by the company."
1221
+ },
1222
+ {
1223
+ "core_name": "company",
1224
+ "field_name": "product_synonym",
1225
+ "type": "string",
1226
+ "example_values": ["OncoTA", "CGXL"],
1227
+ "definition": "Synonyms for products associated with the company."
1228
+ },
1229
+ {
1230
+ "core_name": "company",
1231
+ "field_name": "profitability_data_operating_margin_ttm",
1232
+ "type": "float",
1233
+ "example_values": [0.15, 0.22],
1234
+ "definition": "Profitability data: operating margin for the trailing twelve months (TTM)."
1235
+ },
1236
+ {
1237
+ "core_name": "company",
1238
+ "field_name": "profitability_data_profit_margin",
1239
+ "type": "float",
1240
+ "example_values": [0.10, 0.18],
1241
+ "definition": "Profitability data: overall profit margin."
1242
+ },
1243
+ {
1244
+ "core_name": "company",
1245
+ "field_name": "profitability_data_return_on_assets_ttm",
1246
+ "type": "float",
1247
+ "example_values": [0.08, 0.14],
1248
+ "definition": "Profitability data: return on assets for the trailing twelve months (TTM)."
1249
+ },
1250
+ {
1251
+ "core_name": "company",
1252
+ "field_name": "profitability_data_return_on_equity_ttm",
1253
+ "type": "float",
1254
+ "example_values": [0.12, 0.20],
1255
+ "definition": "Profitability data: return on equity for the trailing twelve months (TTM)."
1256
+ },
1257
+ {
1258
+ "core_name": "company",
1259
+ "field_name": "route",
1260
+ "type": "string",
1261
+ "example_values": ["Oral", "Intravenous", "Topical"],
1262
+ "definition": "Route(s) of administration for products/molecules associated with the company."
1263
+ },
1264
+ {
1265
+ "core_name": "company",
1266
+ "field_name": "route_branch",
1267
+ "type": "string",
1268
+ "example_values": ["Oral Solid Dosage", "Injectable Solutions"],
1269
+ "definition": "Branch(es) of route of administration."
1270
+ },
1271
+ {
1272
+ "core_name": "company",
1273
+ "field_name": "route_branch_hierarchy",
1274
+ "type": "hierarchy",
1275
+ "example_values": ["route/Oral/branch/Oral Solid Dosage"],
1276
+ "definition": "Hierarchical representation of route of administration branches."
1277
+ },
1278
+ {
1279
+ "core_name": "company",
1280
+ "field_name": "route_branch_s",
1281
+ "type": "string",
1282
+ "example_values": ["Oral Solid Dosage", "Injectable Solutions"],
1283
+ "definition": "String representation of route of administration branches (likely for faceted search)."
1284
+ },
1285
+ {
1286
+ "core_name": "company",
1287
+ "field_name": "route_s",
1288
+ "type": "text_ci",
1289
+ "example_values": ["oral", "injection", "topical"],
1290
+ "definition": "String representation of routes of administration (likely for faceted search)."
1291
+ },
1292
+ {
1293
+ "core_name": "company",
1294
+ "field_name": "subsidiary",
1295
+ "type": "string",
1296
+ "example_values": ["BioGen Labs", "TheraWorks Diagnostics"],
1297
+ "definition": "Names of subsidiaries of this company."
1298
+ },
1299
+ {
1300
+ "core_name": "company",
1301
+ "field_name": "subsidiary_of_company",
1302
+ "type": "string",
1303
+ "example_values": ["Global Pharma Corp", "United Healthcare Inc."],
1304
+ "definition": "Names of parent companies if this company is a subsidiary."
1305
+ },
1306
+ {
1307
+ "core_name": "company",
1308
+ "field_name": "telephone",
1309
+ "type": "string",
1310
+ "example_values": ["+1-555-123-4567", "+49-30-9876-5432"],
1311
+ "definition": "Telephone number(s) for the company."
1312
+ },
1313
+ {
1314
+ "core_name": "company",
1315
+ "field_name": "therapeutic_category",
1316
+ "type": "string",
1317
+ "example_values": ["Oncology", "Cardiology", "Neurology"],
1318
+ "definition": "Therapeutic category/categories of products/molecules associated with the company."
1319
+ },
1320
+ {
1321
+ "core_name": "company",
1322
+ "field_name": "therapeutic_category_hierarchy",
1323
+ "type": "hierarchy",
1324
+ "example_values": ["disease/Cancer/category/Oncology", "disease/Cardiovascular/category/Cardiology"],
1325
+ "definition": "Hierarchical representation of therapeutic categories."
1326
+ },
1327
+ {
1328
+ "core_name": "company",
1329
+ "field_name": "therapeutic_category_s",
1330
+ "type": "text_ci",
1331
+ "example_values": ["cancer", "heart disease", "nervous system disorders"],
1332
+ "definition": "String representation of therapeutic categories (likely for faceted search)."
1333
+ },
1334
+ {
1335
+ "core_name": "company",
1336
+ "field_name": "therapeutic_group_id",
1337
+ "type": "string",
1338
+ "example_values": ["THERA_ONC_01", "THERA_CARD_02"],
1339
+ "definition": "ID(s) for the therapeutic group(s) of products/molecules associated with the company."
1340
+ },
1341
+ {
1342
+ "core_name": "company",
1343
+ "field_name": "type",
1344
+ "type": "string",
1345
+ "example_values": ["Biotech", "Pharmaceutical", "CRO", "Medical Device Company"],
1346
+ "definition": "Type(s) of the company (e.g., 'Biotech', 'Pharma')."
1347
+ }
1348
  ]
1349
 
1350
 
 
1358
  formatted_string += f" - **Type**: {field['type']}\n"
1359
  formatted_string += f" - **Definition**: {field['definition']}\n"
1360
  formatted_string += f" - **Examples**: {', '.join(map(str, field['example_values']))}\n\n"
1361
+ return formatted_string