dolphinium
commited on
Commit
·
c934aa2
1
Parent(s):
43c6bd6
add compound metadata
Browse files- solr_metadata.py +280 -0
solr_metadata.py
CHANGED
@@ -358,6 +358,286 @@ field_metadata = [
|
|
358 |
"type": "string (multi-valued, categorical)",
|
359 |
"example_values": ["Small Molecules", "Biologics", "Nucleic Acids", "Peptides"],
|
360 |
"definition": "High-level classification of the drug's molecular type involved in the deal."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
}
|
362 |
]
|
363 |
|
|
|
358 |
"type": "string (multi-valued, categorical)",
|
359 |
"example_values": ["Small Molecules", "Biologics", "Nucleic Acids", "Peptides"],
|
360 |
"definition": "High-level classification of the drug's molecular type involved in the deal."
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"core_name": "compound",
|
364 |
+
"field_name": "id",
|
365 |
+
"type": "string",
|
366 |
+
"example_values": ["519-571", "35518_child"],
|
367 |
+
"definition": "Unique identifier for the compound document."
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"core_name": "compound",
|
371 |
+
"field_name": "molecule_id",
|
372 |
+
"type": "string",
|
373 |
+
"example_values": ["mol_12345", "mol_67890"], # Placeholder, not in provided doc examples directly.
|
374 |
+
"definition": "Unique identifier for the molecule associated with the compound."
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"core_name": "compound",
|
378 |
+
"field_name": "product_name",
|
379 |
+
"type": "string (exact match, for faceting)",
|
380 |
+
"example_values": ["Toprol-XL Tablets"],
|
381 |
+
"definition": "The specific, full trade name of the product. **Use this field for `terms` faceting** on products."
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"core_name": "compound",
|
385 |
+
"field_name": "product_name_s",
|
386 |
+
"type": "string (multi-valued, for searching)",
|
387 |
+
"example_values": ["Toprol-XL Tablets", "Toprol Extended Release Tablets"],
|
388 |
+
"definition": "A field containing all known trade names and synonyms for a product. **Use this field for all `query` parameter searches involving a product name** to ensure comprehensive results."
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"core_name": "compound",
|
392 |
+
"field_name": "product_synonym",
|
393 |
+
"type": "string (multi-valued)",
|
394 |
+
"example_values": ["Toprol Extended Release Tablets", "metoprolol succinate ER Tablets"],
|
395 |
+
"definition": "Synonyms or alternative names for the product."
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"core_name": "compound",
|
399 |
+
"field_name": "molecule_name",
|
400 |
+
"type": "string (exact match, for faceting)",
|
401 |
+
"example_values": ["metoprolol succinate"],
|
402 |
+
"definition": "The generic, non-proprietary name of the active molecule. **Use this field for `terms` faceting** on molecules."
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"core_name": "compound",
|
406 |
+
"field_name": "molecule_name_s",
|
407 |
+
"type": "string (multi-valued, for searching)",
|
408 |
+
"example_values": ["metoprolol succinate", "Métoprolol, Succinate de"],
|
409 |
+
"definition": "A field with all known generic names and synonyms for a molecule. **Use this field for all `query` parameter searches** involving a molecule name."
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"core_name": "compound",
|
413 |
+
"field_name": "molecule_synonym",
|
414 |
+
"type": "string (multi-valued)",
|
415 |
+
"example_values": ["Métoprolol, Succinate de", "Metoprololi Succinas"],
|
416 |
+
"definition": "Synonyms or alternative names for the molecule."
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"core_name": "compound",
|
420 |
+
"field_name": "molecule_type",
|
421 |
+
"type": "string (categorical)",
|
422 |
+
"example_values": ["small molecule", "biologic", "nucleic acid"],
|
423 |
+
"definition": "High-level classification of the compound's molecular type."
|
424 |
+
},
|
425 |
+
{
|
426 |
+
"core_name": "compound",
|
427 |
+
"field_name": "company_name",
|
428 |
+
"type": "string (multi-valued, exact match, for faceting)",
|
429 |
+
"example_values": ["AstraZeneca Plc", "Recordati S.p.A."],
|
430 |
+
"definition": "The canonical, standardized names of companies associated with the compound. **Use this field for `terms` faceting** to group results by unique companies."
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"core_name": "compound",
|
434 |
+
"field_name": "company_name_s",
|
435 |
+
"type": "string (multi-valued, for searching)",
|
436 |
+
"example_values": ["AstraZeneca Plc", "New American Therapeutics, Inc."],
|
437 |
+
"definition": "A field containing all known names and synonyms for companies associated with the compound. **Use this field for all `query` parameter searches involving a company name**."
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"core_name": "compound",
|
441 |
+
"field_name": "company_role",
|
442 |
+
"type": "string (multi-valued, categorical)",
|
443 |
+
"example_values": ["owner", "partner", "manufacturer"],
|
444 |
+
"definition": "The role of the company in relation to the compound (e.g., owner, partner, manufacturer)."
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"core_name": "compound",
|
448 |
+
"field_name": "owner_company_name",
|
449 |
+
"type": "string (multi-valued, exact match, for faceting)",
|
450 |
+
"example_values": ["AstraZeneca Plc"], # Placeholder, based on common patterns
|
451 |
+
"definition": "The canonical, standardized name(s) of the owner company of the compound. Use for faceting on owner companies."
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"core_name": "compound",
|
455 |
+
"field_name": "overall_status",
|
456 |
+
"type": "string (multi-valued, categorical)",
|
457 |
+
"example_values": ["Marketed", "Completed", "Terminated"],
|
458 |
+
"definition": "The overall development status of the compound (e.g., Marketed, Completed, Terminated)."
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"core_name": "compound",
|
462 |
+
"field_name": "phase",
|
463 |
+
"type": "string (multi-valued, categorical)",
|
464 |
+
"example_values": ["Marketed", "Phase 1", "Pre Clinical"],
|
465 |
+
"definition": "The current or most advanced clinical or developmental stage of the compound. Essential for queries about clinical trial phases."
|
466 |
+
},
|
467 |
+
{
|
468 |
+
"core_name": "compound",
|
469 |
+
"field_name": "highest_phase_molecule",
|
470 |
+
"type": "string (multi-valued, categorical)",
|
471 |
+
"example_values": ["Marketed", "Phase 3"],
|
472 |
+
"definition": "The highest development stage a molecule associated with this compound has ever reached."
|
473 |
+
},
|
474 |
+
{
|
475 |
+
"core_name": "compound",
|
476 |
+
"field_name": "therapeutic_category",
|
477 |
+
"type": "string (multi-valued, specific, for faceting)",
|
478 |
+
"example_values": ["Angina Pectoris", "Heart Failure, Other"],
|
479 |
+
"definition": "The specific disease or therapeutic area being targeted by the compound. Use for very specific disease queries or faceting."
|
480 |
+
},
|
481 |
+
{
|
482 |
+
"core_name": "compound",
|
483 |
+
"field_name": "therapeutic_category_s",
|
484 |
+
"type": "string (multi-valued, for searching)",
|
485 |
+
"example_values": ["cardiovascular diseases", "heart diseases", "angina pectoris"],
|
486 |
+
"definition": "Broader, multi-valued therapeutic categories and their synonyms. **Use this field for broad category searches** in the `query` parameter."
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"core_name": "compound",
|
490 |
+
"field_name": "therapeutic_category_hierarchy",
|
491 |
+
"type": "string (multi-valued, hierarchical)",
|
492 |
+
"example_values": ["Cardiovascular Diseases|Heart Diseases|Coronary Artery Disease|Angina Pectoris"],
|
493 |
+
"definition": "The hierarchical classification of the therapeutic area (e.g., 'Category|Subcategory|Specific Area'). Useful for hierarchical faceting or broader path-based searches. [12, 16, 18, 24]"
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"core_name": "compound",
|
497 |
+
"field_name": "mechanism",
|
498 |
+
"type": "string (multi-valued, specific, for faceting)",
|
499 |
+
"example_values": ["Beta-1 Adrenergic Receptor Antagonist"], # Placeholder
|
500 |
+
"definition": "The specific mechanism of action of the compound. Use for very specific mechanism queries or faceting."
|
501 |
+
},
|
502 |
+
{
|
503 |
+
"core_name": "compound",
|
504 |
+
"field_name": "mechanism_s",
|
505 |
+
"type": "string (multi-valued, for searching)",
|
506 |
+
"example_values": ["beta blocker", "adrenergic receptor antagonist"], # Placeholder
|
507 |
+
"definition": "Broader, multi-valued mechanisms of action and their synonyms. **Use this field for broad mechanism searches** in the `query` parameter."
|
508 |
+
},
|
509 |
+
{
|
510 |
+
"core_name": "compound",
|
511 |
+
"field_name": "mechanism_type",
|
512 |
+
"type": "string (multi-valued, categorical)",
|
513 |
+
"example_values": ["Cardioprotectants", "Antihypertensive Agents", "Beta-1 Blockers"],
|
514 |
+
"definition": "High-level classification of the mechanism type."
|
515 |
+
},
|
516 |
+
{
|
517 |
+
"core_name": "compound",
|
518 |
+
"field_name": "mechanism_type_hierarchy",
|
519 |
+
"type": "string (multi-valued, hierarchical)",
|
520 |
+
"example_values": ["Tissue Protectants|Cardioprotectants", "Adrenergic Receptor Modulators|Beta Adrenergic Receptor Modulators|Beta Blockers|Beta-1 Blockers"],
|
521 |
+
"definition": "The hierarchical classification of the mechanism type."
|
522 |
+
},
|
523 |
+
{
|
524 |
+
"core_name": "compound",
|
525 |
+
"field_name": "target_name",
|
526 |
+
"type": "string (multi-valued, exact match, for faceting)",
|
527 |
+
"example_values": ["Beta1 Adrenergic"],
|
528 |
+
"definition": "The specific name of the biological target(s) of the compound. **Use this field for `terms` faceting** on targets."
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"core_name": "compound",
|
532 |
+
"field_name": "target_name_s",
|
533 |
+
"type": "string (multi-valued, for searching)",
|
534 |
+
"example_values": ["Beta1 Adrenergic", "Beta-1 Adrenergic Receptor", "ADRB1"],
|
535 |
+
"definition": "A field with all known names and synonyms for the biological target(s). **Use this field for all `query` parameter searches** involving a target name."
|
536 |
+
},
|
537 |
+
{
|
538 |
+
"core_name": "compound",
|
539 |
+
"field_name": "target_type",
|
540 |
+
"type": "string (multi-valued, categorical)",
|
541 |
+
"example_values": ["protein", "biochemical process/pathway", "other"],
|
542 |
+
"definition": "The type of biological target (e.g., protein, pathway)."
|
543 |
+
},
|
544 |
+
{
|
545 |
+
"core_name": "compound",
|
546 |
+
"field_name": "route",
|
547 |
+
"type": "string (multi-valued, categorical)",
|
548 |
+
"example_values": ["ORAL", "INJECTION", "TOPICAL"],
|
549 |
+
"definition": "The primary route of administration for the compound. Good for faceting on exact routes."
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"core_name": "compound",
|
553 |
+
"field_name": "route_s",
|
554 |
+
"type": "string (multi-valued, for searching)",
|
555 |
+
"example_values": ["oral", "parenteral", "injection"],
|
556 |
+
"definition": "Broader, multi-valued routes of administration and their synonyms. **Use this field for broad route searches** in the `query` parameter."
|
557 |
+
},
|
558 |
+
{
|
559 |
+
"core_name": "compound",
|
560 |
+
"field_name": "route_branch_hierarchy",
|
561 |
+
"type": "string (multi-valued, hierarchical)",
|
562 |
+
"example_values": ["ORAL"],
|
563 |
+
"definition": "The hierarchical classification of the route of administration."
|
564 |
+
},
|
565 |
+
{
|
566 |
+
"core_name": "compound",
|
567 |
+
"field_name": "form_name",
|
568 |
+
"type": "string (multi-valued, categorical)",
|
569 |
+
"example_values": ["ORAL|Oral Tablet", "INJECTION|Solution"],
|
570 |
+
"definition": "The pharmaceutical form of the compound, often combined with the route (e.g., 'Route|Form')."
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"core_name": "compound",
|
574 |
+
"field_name": "drug_delivery_technology",
|
575 |
+
"type": "string (multi-valued, specific, for faceting)",
|
576 |
+
"example_values": ["Conventional Melt Tablets", "Oral Matrix MR", "Bioadhesion"],
|
577 |
+
"definition": "Specific categories of drug delivery technology employed for the compound. **Use this field for `terms` faceting** on specific delivery technologies."
|
578 |
+
},
|
579 |
+
{
|
580 |
+
"core_name": "compound",
|
581 |
+
"field_name": "drug_delivery_branch_s",
|
582 |
+
"type": "string (multi-valued, for searching)",
|
583 |
+
"example_values": ["oral", "oral modified release", "oral extended release"],
|
584 |
+
"definition": "The method of drug administration technology and its broader search-friendly terms. **Use this for `query` parameter searches about drug delivery technologies**."
|
585 |
+
},
|
586 |
+
{
|
587 |
+
"core_name": "compound",
|
588 |
+
"field_name": "earliest_approval_date",
|
589 |
+
"type": "date",
|
590 |
+
"example_values": ["1986-06-19T00:00:00Z"],
|
591 |
+
"definition": "The earliest known approval date for the compound in ISO 8601 format. Use for precise date range queries."
|
592 |
+
},
|
593 |
+
{
|
594 |
+
"core_name": "compound",
|
595 |
+
"field_name": "approval_year",
|
596 |
+
"type": "number (multi-valued, year)",
|
597 |
+
"example_values": [1992, 2000, 2001],
|
598 |
+
"definition": "The 4-digit year(s) of approval for the compound. Use for queries involving whole years."
|
599 |
+
},
|
600 |
+
{
|
601 |
+
"core_name": "compound",
|
602 |
+
"field_name": "compound_territory",
|
603 |
+
"type": "string (multi-valued, hierarchical)",
|
604 |
+
"example_values": ["United States of America", "Americas Northern", "Europe", "World"],
|
605 |
+
"definition": "The geographic territories where the compound is approved or marketed. It is hierarchical. Use for filtering by location. [12, 16, 18, 24]"
|
606 |
+
},
|
607 |
+
{
|
608 |
+
"core_name": "compound",
|
609 |
+
"field_name": "is_orphan",
|
610 |
+
"type": "string (boolean)", # Solr typically stores booleans as strings or numbers [14, 15, 21, 23]
|
611 |
+
"example_values": ["yes", "no"],
|
612 |
+
"definition": "Indicates if the compound has received orphan drug designation. Orphan drugs are intended for rare diseases that affect a small population, making them less profitable to develop without incentives. [2, 4, 7, 9, 10]"
|
613 |
+
},
|
614 |
+
{
|
615 |
+
"core_name": "compound",
|
616 |
+
"field_name": "is_prodrug",
|
617 |
+
"type": "string (boolean)",
|
618 |
+
"example_values": ["yes", "no"],
|
619 |
+
"definition": "Indicates if the compound is a prodrug (an inactive compound that is metabolized in the body to an active drug)."
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"core_name": "compound",
|
623 |
+
"field_name": "black_box",
|
624 |
+
"type": "string (boolean)",
|
625 |
+
"example_values": ["yes", "no"],
|
626 |
+
"definition": "Indicates if the compound carries a 'Black Box Warning' (the strongest warning by the FDA that a drug carries significant risks)."
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"core_name": "compound",
|
630 |
+
"field_name": "molecular_weight",
|
631 |
+
"type": "number (metric)",
|
632 |
+
"example_values": [652.816, 300.25],
|
633 |
+
"definition": "The molecular weight of the compound in Daltons. Use for numerical range queries."
|
634 |
+
},
|
635 |
+
{
|
636 |
+
"core_name": "compound",
|
637 |
+
"field_name": "logP",
|
638 |
+
"type": "number (metric)",
|
639 |
+
"example_values": [2.5, -0.7, 4.1],
|
640 |
+
"definition": "The logarithm of the octanol-water partition coefficient, a measure of a compound's lipophilicity (fat-liking) or hydrophilicity (water-liking). Higher values indicate more lipophilicity. [1, 3, 5, 6, 8]"
|
641 |
}
|
642 |
]
|
643 |
|