dolphinium commited on
Commit
c934aa2
·
1 Parent(s): 43c6bd6

add compound metadata

Browse files
Files changed (1) hide show
  1. solr_metadata.py +280 -0
solr_metadata.py CHANGED
@@ -358,6 +358,286 @@ field_metadata = [
358
  "type": "string (multi-valued, categorical)",
359
  "example_values": ["Small Molecules", "Biologics", "Nucleic Acids", "Peptides"],
360
  "definition": "High-level classification of the drug's molecular type involved in the deal."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  }
362
  ]
363
 
 
358
  "type": "string (multi-valued, categorical)",
359
  "example_values": ["Small Molecules", "Biologics", "Nucleic Acids", "Peptides"],
360
  "definition": "High-level classification of the drug's molecular type involved in the deal."
361
+ },
362
+ {
363
+ "core_name": "compound",
364
+ "field_name": "id",
365
+ "type": "string",
366
+ "example_values": ["519-571", "35518_child"],
367
+ "definition": "Unique identifier for the compound document."
368
+ },
369
+ {
370
+ "core_name": "compound",
371
+ "field_name": "molecule_id",
372
+ "type": "string",
373
+ "example_values": ["mol_12345", "mol_67890"], # Placeholder, not in provided doc examples directly.
374
+ "definition": "Unique identifier for the molecule associated with the compound."
375
+ },
376
+ {
377
+ "core_name": "compound",
378
+ "field_name": "product_name",
379
+ "type": "string (exact match, for faceting)",
380
+ "example_values": ["Toprol-XL Tablets"],
381
+ "definition": "The specific, full trade name of the product. **Use this field for `terms` faceting** on products."
382
+ },
383
+ {
384
+ "core_name": "compound",
385
+ "field_name": "product_name_s",
386
+ "type": "string (multi-valued, for searching)",
387
+ "example_values": ["Toprol-XL Tablets", "Toprol Extended Release Tablets"],
388
+ "definition": "A field containing all known trade names and synonyms for a product. **Use this field for all `query` parameter searches involving a product name** to ensure comprehensive results."
389
+ },
390
+ {
391
+ "core_name": "compound",
392
+ "field_name": "product_synonym",
393
+ "type": "string (multi-valued)",
394
+ "example_values": ["Toprol Extended Release Tablets", "metoprolol succinate ER Tablets"],
395
+ "definition": "Synonyms or alternative names for the product."
396
+ },
397
+ {
398
+ "core_name": "compound",
399
+ "field_name": "molecule_name",
400
+ "type": "string (exact match, for faceting)",
401
+ "example_values": ["metoprolol succinate"],
402
+ "definition": "The generic, non-proprietary name of the active molecule. **Use this field for `terms` faceting** on molecules."
403
+ },
404
+ {
405
+ "core_name": "compound",
406
+ "field_name": "molecule_name_s",
407
+ "type": "string (multi-valued, for searching)",
408
+ "example_values": ["metoprolol succinate", "Métoprolol, Succinate de"],
409
+ "definition": "A field with all known generic names and synonyms for a molecule. **Use this field for all `query` parameter searches** involving a molecule name."
410
+ },
411
+ {
412
+ "core_name": "compound",
413
+ "field_name": "molecule_synonym",
414
+ "type": "string (multi-valued)",
415
+ "example_values": ["Métoprolol, Succinate de", "Metoprololi Succinas"],
416
+ "definition": "Synonyms or alternative names for the molecule."
417
+ },
418
+ {
419
+ "core_name": "compound",
420
+ "field_name": "molecule_type",
421
+ "type": "string (categorical)",
422
+ "example_values": ["small molecule", "biologic", "nucleic acid"],
423
+ "definition": "High-level classification of the compound's molecular type."
424
+ },
425
+ {
426
+ "core_name": "compound",
427
+ "field_name": "company_name",
428
+ "type": "string (multi-valued, exact match, for faceting)",
429
+ "example_values": ["AstraZeneca Plc", "Recordati S.p.A."],
430
+ "definition": "The canonical, standardized names of companies associated with the compound. **Use this field for `terms` faceting** to group results by unique companies."
431
+ },
432
+ {
433
+ "core_name": "compound",
434
+ "field_name": "company_name_s",
435
+ "type": "string (multi-valued, for searching)",
436
+ "example_values": ["AstraZeneca Plc", "New American Therapeutics, Inc."],
437
+ "definition": "A field containing all known names and synonyms for companies associated with the compound. **Use this field for all `query` parameter searches involving a company name**."
438
+ },
439
+ {
440
+ "core_name": "compound",
441
+ "field_name": "company_role",
442
+ "type": "string (multi-valued, categorical)",
443
+ "example_values": ["owner", "partner", "manufacturer"],
444
+ "definition": "The role of the company in relation to the compound (e.g., owner, partner, manufacturer)."
445
+ },
446
+ {
447
+ "core_name": "compound",
448
+ "field_name": "owner_company_name",
449
+ "type": "string (multi-valued, exact match, for faceting)",
450
+ "example_values": ["AstraZeneca Plc"], # Placeholder, based on common patterns
451
+ "definition": "The canonical, standardized name(s) of the owner company of the compound. Use for faceting on owner companies."
452
+ },
453
+ {
454
+ "core_name": "compound",
455
+ "field_name": "overall_status",
456
+ "type": "string (multi-valued, categorical)",
457
+ "example_values": ["Marketed", "Completed", "Terminated"],
458
+ "definition": "The overall development status of the compound (e.g., Marketed, Completed, Terminated)."
459
+ },
460
+ {
461
+ "core_name": "compound",
462
+ "field_name": "phase",
463
+ "type": "string (multi-valued, categorical)",
464
+ "example_values": ["Marketed", "Phase 1", "Pre Clinical"],
465
+ "definition": "The current or most advanced clinical or developmental stage of the compound. Essential for queries about clinical trial phases."
466
+ },
467
+ {
468
+ "core_name": "compound",
469
+ "field_name": "highest_phase_molecule",
470
+ "type": "string (multi-valued, categorical)",
471
+ "example_values": ["Marketed", "Phase 3"],
472
+ "definition": "The highest development stage a molecule associated with this compound has ever reached."
473
+ },
474
+ {
475
+ "core_name": "compound",
476
+ "field_name": "therapeutic_category",
477
+ "type": "string (multi-valued, specific, for faceting)",
478
+ "example_values": ["Angina Pectoris", "Heart Failure, Other"],
479
+ "definition": "The specific disease or therapeutic area being targeted by the compound. Use for very specific disease queries or faceting."
480
+ },
481
+ {
482
+ "core_name": "compound",
483
+ "field_name": "therapeutic_category_s",
484
+ "type": "string (multi-valued, for searching)",
485
+ "example_values": ["cardiovascular diseases", "heart diseases", "angina pectoris"],
486
+ "definition": "Broader, multi-valued therapeutic categories and their synonyms. **Use this field for broad category searches** in the `query` parameter."
487
+ },
488
+ {
489
+ "core_name": "compound",
490
+ "field_name": "therapeutic_category_hierarchy",
491
+ "type": "string (multi-valued, hierarchical)",
492
+ "example_values": ["Cardiovascular Diseases|Heart Diseases|Coronary Artery Disease|Angina Pectoris"],
493
+ "definition": "The hierarchical classification of the therapeutic area (e.g., 'Category|Subcategory|Specific Area'). Useful for hierarchical faceting or broader path-based searches. [12, 16, 18, 24]"
494
+ },
495
+ {
496
+ "core_name": "compound",
497
+ "field_name": "mechanism",
498
+ "type": "string (multi-valued, specific, for faceting)",
499
+ "example_values": ["Beta-1 Adrenergic Receptor Antagonist"], # Placeholder
500
+ "definition": "The specific mechanism of action of the compound. Use for very specific mechanism queries or faceting."
501
+ },
502
+ {
503
+ "core_name": "compound",
504
+ "field_name": "mechanism_s",
505
+ "type": "string (multi-valued, for searching)",
506
+ "example_values": ["beta blocker", "adrenergic receptor antagonist"], # Placeholder
507
+ "definition": "Broader, multi-valued mechanisms of action and their synonyms. **Use this field for broad mechanism searches** in the `query` parameter."
508
+ },
509
+ {
510
+ "core_name": "compound",
511
+ "field_name": "mechanism_type",
512
+ "type": "string (multi-valued, categorical)",
513
+ "example_values": ["Cardioprotectants", "Antihypertensive Agents", "Beta-1 Blockers"],
514
+ "definition": "High-level classification of the mechanism type."
515
+ },
516
+ {
517
+ "core_name": "compound",
518
+ "field_name": "mechanism_type_hierarchy",
519
+ "type": "string (multi-valued, hierarchical)",
520
+ "example_values": ["Tissue Protectants|Cardioprotectants", "Adrenergic Receptor Modulators|Beta Adrenergic Receptor Modulators|Beta Blockers|Beta-1 Blockers"],
521
+ "definition": "The hierarchical classification of the mechanism type."
522
+ },
523
+ {
524
+ "core_name": "compound",
525
+ "field_name": "target_name",
526
+ "type": "string (multi-valued, exact match, for faceting)",
527
+ "example_values": ["Beta1 Adrenergic"],
528
+ "definition": "The specific name of the biological target(s) of the compound. **Use this field for `terms` faceting** on targets."
529
+ },
530
+ {
531
+ "core_name": "compound",
532
+ "field_name": "target_name_s",
533
+ "type": "string (multi-valued, for searching)",
534
+ "example_values": ["Beta1 Adrenergic", "Beta-1 Adrenergic Receptor", "ADRB1"],
535
+ "definition": "A field with all known names and synonyms for the biological target(s). **Use this field for all `query` parameter searches** involving a target name."
536
+ },
537
+ {
538
+ "core_name": "compound",
539
+ "field_name": "target_type",
540
+ "type": "string (multi-valued, categorical)",
541
+ "example_values": ["protein", "biochemical process/pathway", "other"],
542
+ "definition": "The type of biological target (e.g., protein, pathway)."
543
+ },
544
+ {
545
+ "core_name": "compound",
546
+ "field_name": "route",
547
+ "type": "string (multi-valued, categorical)",
548
+ "example_values": ["ORAL", "INJECTION", "TOPICAL"],
549
+ "definition": "The primary route of administration for the compound. Good for faceting on exact routes."
550
+ },
551
+ {
552
+ "core_name": "compound",
553
+ "field_name": "route_s",
554
+ "type": "string (multi-valued, for searching)",
555
+ "example_values": ["oral", "parenteral", "injection"],
556
+ "definition": "Broader, multi-valued routes of administration and their synonyms. **Use this field for broad route searches** in the `query` parameter."
557
+ },
558
+ {
559
+ "core_name": "compound",
560
+ "field_name": "route_branch_hierarchy",
561
+ "type": "string (multi-valued, hierarchical)",
562
+ "example_values": ["ORAL"],
563
+ "definition": "The hierarchical classification of the route of administration."
564
+ },
565
+ {
566
+ "core_name": "compound",
567
+ "field_name": "form_name",
568
+ "type": "string (multi-valued, categorical)",
569
+ "example_values": ["ORAL|Oral Tablet", "INJECTION|Solution"],
570
+ "definition": "The pharmaceutical form of the compound, often combined with the route (e.g., 'Route|Form')."
571
+ },
572
+ {
573
+ "core_name": "compound",
574
+ "field_name": "drug_delivery_technology",
575
+ "type": "string (multi-valued, specific, for faceting)",
576
+ "example_values": ["Conventional Melt Tablets", "Oral Matrix MR", "Bioadhesion"],
577
+ "definition": "Specific categories of drug delivery technology employed for the compound. **Use this field for `terms` faceting** on specific delivery technologies."
578
+ },
579
+ {
580
+ "core_name": "compound",
581
+ "field_name": "drug_delivery_branch_s",
582
+ "type": "string (multi-valued, for searching)",
583
+ "example_values": ["oral", "oral modified release", "oral extended release"],
584
+ "definition": "The method of drug administration technology and its broader search-friendly terms. **Use this for `query` parameter searches about drug delivery technologies**."
585
+ },
586
+ {
587
+ "core_name": "compound",
588
+ "field_name": "earliest_approval_date",
589
+ "type": "date",
590
+ "example_values": ["1986-06-19T00:00:00Z"],
591
+ "definition": "The earliest known approval date for the compound in ISO 8601 format. Use for precise date range queries."
592
+ },
593
+ {
594
+ "core_name": "compound",
595
+ "field_name": "approval_year",
596
+ "type": "number (multi-valued, year)",
597
+ "example_values": [1992, 2000, 2001],
598
+ "definition": "The 4-digit year(s) of approval for the compound. Use for queries involving whole years."
599
+ },
600
+ {
601
+ "core_name": "compound",
602
+ "field_name": "compound_territory",
603
+ "type": "string (multi-valued, hierarchical)",
604
+ "example_values": ["United States of America", "Americas Northern", "Europe", "World"],
605
+ "definition": "The geographic territories where the compound is approved or marketed. It is hierarchical. Use for filtering by location. [12, 16, 18, 24]"
606
+ },
607
+ {
608
+ "core_name": "compound",
609
+ "field_name": "is_orphan",
610
+ "type": "string (boolean)", # Solr typically stores booleans as strings or numbers [14, 15, 21, 23]
611
+ "example_values": ["yes", "no"],
612
+ "definition": "Indicates if the compound has received orphan drug designation. Orphan drugs are intended for rare diseases that affect a small population, making them less profitable to develop without incentives. [2, 4, 7, 9, 10]"
613
+ },
614
+ {
615
+ "core_name": "compound",
616
+ "field_name": "is_prodrug",
617
+ "type": "string (boolean)",
618
+ "example_values": ["yes", "no"],
619
+ "definition": "Indicates if the compound is a prodrug (an inactive compound that is metabolized in the body to an active drug)."
620
+ },
621
+ {
622
+ "core_name": "compound",
623
+ "field_name": "black_box",
624
+ "type": "string (boolean)",
625
+ "example_values": ["yes", "no"],
626
+ "definition": "Indicates if the compound carries a 'Black Box Warning' (the strongest warning by the FDA that a drug carries significant risks)."
627
+ },
628
+ {
629
+ "core_name": "compound",
630
+ "field_name": "molecular_weight",
631
+ "type": "number (metric)",
632
+ "example_values": [652.816, 300.25],
633
+ "definition": "The molecular weight of the compound in Daltons. Use for numerical range queries."
634
+ },
635
+ {
636
+ "core_name": "compound",
637
+ "field_name": "logP",
638
+ "type": "number (metric)",
639
+ "example_values": [2.5, -0.7, 4.1],
640
+ "definition": "The logarithm of the octanol-water partition coefficient, a measure of a compound's lipophilicity (fat-liking) or hydrophilicity (water-liking). Higher values indicate more lipophilicity. [1, 3, 5, 6, 8]"
641
  }
642
  ]
643