From 4aa3d84a12d56c2f0eb3ead1aebb0f0bc4f9992b Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Tue, 7 Jun 2022 18:43:16 +0200
Subject: [PATCH 01/20] add _TAGS

---
 bigbio/biodatasets/an_em/an_em.py                    |  3 ++-
 bigbio/biodatasets/anat_em/anat_em.py                |  3 ++-
 bigbio/biodatasets/ask_a_patient/ask_a_patient.py    |  3 ++-
 bigbio/biodatasets/bc5cdr/bc5cdr.py                  |  3 ++-
 bigbio/biodatasets/bc7_litcovid/bc7_litcovid.py      |  3 ++-
 bigbio/biodatasets/bio_sim_verb/bio_sim_verb.py      |  3 ++-
 bigbio/biodatasets/bio_simlex/bio_simlex.py          |  3 ++-
 .../bioasq_2021_mesinesp/bioasq_2021_mesinesp.py     |  3 ++-
 bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py    |  3 ++-
 .../bioasq_task_c_2017/bioasq_task_c_2017.py         |  3 ++-
 bigbio/biodatasets/bioinfer/bioinfer.py              |  3 ++-
 .../biology_how_why_corpus/biology_how_why_corpus.py |  3 ++-
 bigbio/biodatasets/biomrc/biomrc.py                  |  3 ++-
 .../bionlp_shared_task_2009.py                       |  7 ++++++-
 .../bionlp_st_2011_epi/bionlp_st_2011_epi.py         |  3 ++-
 .../bionlp_st_2011_ge/bionlp_st_2011_ge.py           |  3 ++-
 .../bionlp_st_2011_id/bionlp_st_2011_id.py           | 10 +++++++++-
 .../bionlp_st_2011_rel/bionlp_st_2011_rel.py         |  3 ++-
 .../bionlp_st_2013_cg/bionlp_st_2013_cg.py           | 12 +++++++++++-
 .../bionlp_st_2013_ge/bionlp_st_2013_ge.py           |  3 ++-
 .../bionlp_st_2013_gro/bionlp_st_2013_gro.py         |  3 ++-
 .../bionlp_st_2013_pc/bionlp_st_2013_pc.py           |  3 ++-
 .../bionlp_st_2019_bb/bionlp_st_2019_bb.py           |  3 ++-
 bigbio/biodatasets/biored/biored.py                  |  3 ++-
 bigbio/biodatasets/biorelex/biorelex.py              |  3 ++-
 bigbio/biodatasets/bioscope/bioscope.py              |  3 ++-
 bigbio/biodatasets/biosses/biosses.py                |  3 ++-
 bigbio/biodatasets/cadec/cadec.py                    |  3 ++-
 bigbio/biodatasets/cantemist/cantemist.py            |  3 ++-
 bigbio/biodatasets/cas/cas.py                        |  3 ++-
 bigbio/biodatasets/cellfinder/cellfinder.py          |  3 ++-
 bigbio/biodatasets/chebi_nactem/chebi_nactem.py      |  3 ++-
 bigbio/biodatasets/chemdner/chemdner.py              |  3 ++-
 bigbio/biodatasets/chemprot/chemprot.py              |  3 ++-
 bigbio/biodatasets/chia/chia.py                      |  3 ++-
 .../citation_gia_test_collection.py                  |  3 ++-
 bigbio/biodatasets/codiesp/codiesp.py                |  3 ++-
 bigbio/biodatasets/cord_ner/cord_ner.py              |  3 ++-
 bigbio/biodatasets/ctebmsp/ctebmsp.py                |  3 ++-
 bigbio/biodatasets/ddi_corpus/ddi_corpus.py          |  3 ++-
 .../biodatasets/diann_iber_eval/diann_iber_eval.py   |  3 ++-
 bigbio/biodatasets/distemist/distemist.py            |  3 ++-
 bigbio/biodatasets/ebm_pico/ebm_pico.py              |  3 ++-
 bigbio/biodatasets/ehr_rel/ehr_rel.py                |  3 ++-
 bigbio/biodatasets/essai/essai.py                    |  3 ++-
 bigbio/biodatasets/euadr/euadr.py                    |  3 ++-
 .../evidence_inference/evidence_inference.py         |  3 ++-
 bigbio/biodatasets/gad/gad.py                        |  3 ++-
 bigbio/biodatasets/genetag/genetag.py                |  3 ++-
 .../genia_ptm_event_corpus/genia_ptm_event_corpus.py |  3 ++-
 .../genia_relation_corpus/genia_relation_corpus.py   |  3 ++-
 .../genia_term_corpus/genia_term_corpus.py           |  3 ++-
 bigbio/biodatasets/geokhoj_v1/geokhoj_v1.py          |  3 ++-
 bigbio/biodatasets/gnormplus/gnormplus.py            |  3 ++-
 .../hallmarks_of_cancer/hallmarks_of_cancer.py       |  3 ++-
 bigbio/biodatasets/hprd50/hprd50.py                  |  3 ++-
 bigbio/biodatasets/iepa/iepa.py                      |  3 ++-
 bigbio/biodatasets/jnlpba/jnlpba.py                  |  3 ++-
 bigbio/biodatasets/linnaeus/linnaeus.py              |  3 ++-
 bigbio/biodatasets/lll/lll.py                        |  1 +
 bigbio/biodatasets/mantra_gsc/mantra_gsc.py          |  3 ++-
 bigbio/biodatasets/mayosrs/mayosrs.py                |  3 ++-
 bigbio/biodatasets/med_qa/med_qa.py                  |  3 ++-
 bigbio/biodatasets/medal/medal.py                    |  3 ++-
 bigbio/biodatasets/meddialog/meddialog.py            |  3 ++-
 bigbio/biodatasets/meddocan/meddocan.py              |  3 ++-
 bigbio/biodatasets/medhop/medhop.py                  |  3 ++-
 bigbio/biodatasets/medical_data/medical_data.py      |  3 ++-
 bigbio/biodatasets/mediqa_nli/mediqa_nli.py          |  3 ++-
 bigbio/biodatasets/mediqa_qa/mediqa_qa.py            |  3 ++-
 bigbio/biodatasets/mediqa_rqe/mediqa_rqe.py          |  3 ++-
 bigbio/biodatasets/medmentions/medmentions.py        |  3 ++-
 bigbio/biodatasets/mednli/mednli.py                  |  3 ++-
 bigbio/biodatasets/meqsum/meqsum.py                  |  3 ++-
 bigbio/biodatasets/minimayosrs/minimayosrs.py        |  3 ++-
 bigbio/biodatasets/mirna/mirna.py                    |  3 ++-
 bigbio/biodatasets/mlee/mlee.py                      |  3 ++-
 bigbio/biodatasets/mqp/mqp.py                        |  3 ++-
 bigbio/biodatasets/msh_wsd/msh_wsd.py                |  3 ++-
 bigbio/biodatasets/muchmore/muchmore.py              |  3 ++-
 bigbio/biodatasets/multi_xscience/multi_xscience.py  |  3 ++-
 .../biodatasets/mutation_finder/mutation_finder.py   |  3 ++-
 bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py  |  3 ++-
 .../n2c2_2006_smokers/n2c2_2006_smokers.py           |  3 ++-
 bigbio/biodatasets/n2c2_2008/n2c2_2008.py            |  3 ++-
 bigbio/biodatasets/n2c2_2009/n2c2_2009.py            |  3 ++-
 bigbio/biodatasets/n2c2_2010/n2c2_2010.py            |  3 ++-
 bigbio/biodatasets/n2c2_2011/n2c2_2011.py            |  3 ++-
 bigbio/biodatasets/n2c2_2014_deid/n2c2_2014_deid.py  |  3 ++-
 .../n2c2_2014_risk_factors/n2c2_2014_risk_factors.py |  3 ++-
 .../biodatasets/n2c2_2018_track1/n2c2_2018_track1.py |  3 ++-
 .../biodatasets/n2c2_2018_track2/n2c2_2018_track2.py |  3 ++-
 bigbio/biodatasets/nagel/nagel.py                    |  3 ++-
 bigbio/biodatasets/ncbi_disease/ncbi_disease.py      |  3 ++-
 bigbio/biodatasets/nlm_gene/nlm_gene.py              |  3 ++-
 bigbio/biodatasets/nlm_wsd/nlm_wsd.py                |  3 ++-
 bigbio/biodatasets/nlmchem/nlmchem.py                |  3 ++-
 .../biodatasets/ntcir_13_medweb/ntcir_13_medweb.py   |  3 ++-
 bigbio/biodatasets/osiris/osiris.py                  |  3 ++-
 bigbio/biodatasets/paramed/paramed.py                |  3 ++-
 bigbio/biodatasets/pcr/pcr.py                        |  3 ++-
 bigbio/biodatasets/pdr/pdr.py                        |  3 ++-
 bigbio/biodatasets/pharmaconer/pharmaconer.py        |  3 ++-
 bigbio/biodatasets/pho_ner/pho_ner.py                |  3 ++-
 .../biodatasets/pico_extraction/pico_extraction.py   |  3 ++-
 bigbio/biodatasets/pmc_patients/pmc_patients.py      |  3 ++-
 bigbio/biodatasets/progene/progene.py                |  3 ++-
 bigbio/biodatasets/psytar/psytar.py                  |  3 ++-
 bigbio/biodatasets/pubhealth/pubhealth.py            |  3 ++-
 bigbio/biodatasets/pubmed_qa/pubmed_qa.py            |  1 +
 .../biodatasets/pubtator_central/pubtator_central.py |  3 ++-
 bigbio/biodatasets/quaero/quaero.py                  |  3 ++-
 bigbio/biodatasets/scai_chemical/scai_chemical.py    |  3 ++-
 bigbio/biodatasets/scai_disease/scai_disease.py      |  3 ++-
 bigbio/biodatasets/scicite/scicite.py                |  3 ++-
 bigbio/biodatasets/scielo/scielo.py                  |  3 ++-
 bigbio/biodatasets/scifact/scifact.py                |  3 ++-
 bigbio/biodatasets/sciq/sciq.py                      |  3 ++-
 bigbio/biodatasets/scitail/scitail.py                |  3 ++-
 bigbio/biodatasets/seth_corpus/seth_corpus.py        |  3 ++-
 bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py    |  3 ++-
 .../swedish_medical_ner/swedish_medical_ner.py       |  3 ++-
 bigbio/biodatasets/thomas2011/thomas2011.py          |  3 ++-
 bigbio/biodatasets/tmvar_v1/tmvar_v1.py              |  3 ++-
 bigbio/biodatasets/tmvar_v2/tmvar_v2.py              |  3 ++-
 bigbio/biodatasets/tmvar_v3/tmvar_v3.py              |  3 ++-
 bigbio/biodatasets/twadrl/twadrl.py                  |  3 ++-
 bigbio/biodatasets/umnsrs/umnsrs.py                  |  3 ++-
 bigbio/biodatasets/verspoor_2013/verspoor_2013.py    |  3 ++-
 129 files changed, 276 insertions(+), 127 deletions(-)

diff --git a/bigbio/biodatasets/an_em/an_em.py b/bigbio/biodatasets/an_em/an_em.py
index f3460349a..4d956684a 100644
--- a/bigbio/biodatasets/an_em/an_em.py
+++ b/bigbio/biodatasets/an_em/an_em.py
@@ -29,9 +29,10 @@
 import bigbio.utils.parsing as parse
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.ANATOMY]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/anat_em/anat_em.py b/bigbio/biodatasets/anat_em/anat_em.py
index c74125c24..c58f6fb19 100644
--- a/bigbio/biodatasets/anat_em/anat_em.py
+++ b/bigbio/biodatasets/anat_em/anat_em.py
@@ -27,9 +27,10 @@
 import bigbio.utils.parsing as parsing
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.ANATOMY]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ask_a_patient/ask_a_patient.py b/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
index dee74515d..0b4eeffe4 100644
--- a/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
+++ b/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
@@ -21,11 +21,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "ask_a_patient"
 
+_TAGS = [Tags.SOCIAL_MEDIA]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bc5cdr/bc5cdr.py b/bigbio/biodatasets/bc5cdr/bc5cdr.py
index 47af693c0..5e729b270 100644
--- a/bigbio/biodatasets/bc5cdr/bc5cdr.py
+++ b/bigbio/biodatasets/bc5cdr/bc5cdr.py
@@ -31,10 +31,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = [Tags.DISEASE, Tags.CHEMICAL, Tags.CHEMICAL_DISEASE_RELATION, Tags.MESH]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bc7_litcovid/bc7_litcovid.py b/bigbio/biodatasets/bc7_litcovid/bc7_litcovid.py
index 2e9ca9e95..50543a186 100644
--- a/bigbio/biodatasets/bc7_litcovid/bc7_litcovid.py
+++ b/bigbio/biodatasets/bc7_litcovid/bc7_litcovid.py
@@ -20,9 +20,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.COVID]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bio_sim_verb/bio_sim_verb.py b/bigbio/biodatasets/bio_sim_verb/bio_sim_verb.py
index 05db39fd8..afab00599 100644
--- a/bigbio/biodatasets/bio_sim_verb/bio_sim_verb.py
+++ b/bigbio/biodatasets/bio_sim_verb/bio_sim_verb.py
@@ -27,10 +27,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 # TODO: Add BibTeX citation
+_TAGS = [Tags.LEXICAL]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bio_simlex/bio_simlex.py b/bigbio/biodatasets/bio_simlex/bio_simlex.py
index 6b8fc6f8b..2a9ceceaa 100644
--- a/bigbio/biodatasets/bio_simlex/bio_simlex.py
+++ b/bigbio/biodatasets/bio_simlex/bio_simlex.py
@@ -27,10 +27,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 # TODO: Add BibTeX citation
+_TAGS = [Tags.LEXICAL]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py b/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py
index 4672c3f53..680de353c 100644
--- a/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py
+++ b/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py
@@ -51,9 +51,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.DECS]
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py b/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py
index f5668647c..b17ed3828 100644
--- a/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py
+++ b/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py
@@ -32,9 +32,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/bioasq_task_c_2017/bioasq_task_c_2017.py b/bigbio/biodatasets/bioasq_task_c_2017/bioasq_task_c_2017.py
index 8012f3800..2aeeb7292 100644
--- a/bigbio/biodatasets/bioasq_task_c_2017/bioasq_task_c_2017.py
+++ b/bigbio/biodatasets/bioasq_task_c_2017/bioasq_task_c_2017.py
@@ -23,9 +23,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.GRANT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/bioinfer/bioinfer.py b/bigbio/biodatasets/bioinfer/bioinfer.py
index 8a71bbf59..dd1a7cfdd 100644
--- a/bigbio/biodatasets/bioinfer/bioinfer.py
+++ b/bigbio/biodatasets/bioinfer/bioinfer.py
@@ -25,9 +25,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.PPI]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py b/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py
index 751172900..41e8cca74 100644
--- a/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py
+++ b/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py
@@ -30,9 +30,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.QA_HOW, Tags.QA_WHY]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/biomrc/biomrc.py b/bigbio/biodatasets/biomrc/biomrc.py
index a80f0955c..df849298a 100644
--- a/bigbio/biodatasets/biomrc/biomrc.py
+++ b/bigbio/biodatasets/biomrc/biomrc.py
@@ -31,9 +31,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.QA_MULTIPLE_CHOICE, Tags.MRC, Tags.QA_CLOZE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_shared_task_2009/bionlp_shared_task_2009.py b/bigbio/biodatasets/bionlp_shared_task_2009/bionlp_shared_task_2009.py
index 1f32a25d6..4e3303137 100644
--- a/bigbio/biodatasets/bionlp_shared_task_2009/bionlp_shared_task_2009.py
+++ b/bigbio/biodatasets/bionlp_shared_task_2009/bionlp_shared_task_2009.py
@@ -21,10 +21,15 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import brat_parse_to_bigbio_kb, parse_brat_file
 
+# http://www.wikicfp.com/cfp/servlet/event.showcfp?eventid=4605&copyownerid=320
+# Task 1. Event detection and characterization
+# Task 2. Event argument recognition
+# Task 3. Recognition of negations and speculations
+_TAGS = [Tags.PPI, Tags.NEGATION, Tags.SPECULATION, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2011_epi/bionlp_st_2011_epi.py b/bigbio/biodatasets/bionlp_st_2011_epi/bionlp_st_2011_epi.py
index 4c2d5991b..7a6ea0ab1 100644
--- a/bigbio/biodatasets/bionlp_st_2011_epi/bionlp_st_2011_epi.py
+++ b/bigbio/biodatasets/bionlp_st_2011_epi/bionlp_st_2011_epi.py
@@ -21,13 +21,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2011_epi"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [Tags.EPIGENETICS, Tags.NEGATION, Tags.SPECULATION, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2011_ge/bionlp_st_2011_ge.py b/bigbio/biodatasets/bionlp_st_2011_ge/bionlp_st_2011_ge.py
index 112c03a4a..3eab0c715 100644
--- a/bigbio/biodatasets/bionlp_st_2011_ge/bionlp_st_2011_ge.py
+++ b/bigbio/biodatasets/bionlp_st_2011_ge/bionlp_st_2011_ge.py
@@ -20,13 +20,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2011_ge"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [Tags.NEGATION, Tags.SPECULATION, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py b/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py
index 1d640ac3f..c5e0734d5 100644
--- a/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py
+++ b/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py
@@ -20,13 +20,21 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2011_id"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [
+    Tags.DISEASE,
+    Tags.GENE,
+    Tags.CHEMICAL,
+    Tags.ORGANISM,
+    Tags.SPECULATION,
+    Tags.NEGATION,
+]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2011_rel/bionlp_st_2011_rel.py b/bigbio/biodatasets/bionlp_st_2011_rel/bionlp_st_2011_rel.py
index d6539fbb1..92a9c3b27 100644
--- a/bigbio/biodatasets/bionlp_st_2011_rel/bionlp_st_2011_rel.py
+++ b/bigbio/biodatasets/bionlp_st_2011_rel/bionlp_st_2011_rel.py
@@ -20,13 +20,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2011_rel"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [Tags.PART_OF, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py b/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py
index f99326ec4..a72d0386d 100644
--- a/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py
+++ b/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py
@@ -20,12 +20,22 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2013_cg"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [
+    Tags.DISEASE,
+    Tags.CANCER,
+    Tags.TISSUE,
+    Tags.ORGANISM,
+    Tags.CELL,
+    Tags.GENE,
+    Tags.CHEMICAL,
+    Tags.PATHWAY,
+]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2013_ge/bionlp_st_2013_ge.py b/bigbio/biodatasets/bionlp_st_2013_ge/bionlp_st_2013_ge.py
index 93dfa58f3..74a76bdea 100644
--- a/bigbio/biodatasets/bionlp_st_2013_ge/bionlp_st_2013_ge.py
+++ b/bigbio/biodatasets/bionlp_st_2013_ge/bionlp_st_2013_ge.py
@@ -20,13 +20,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2013_ge"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [Tags.NEGATION, Tags.SPECULATION, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py b/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py
index 277dfcec3..1241b22c5 100644
--- a/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py
+++ b/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py
@@ -21,13 +21,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2013_gro"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [Tags.GENE, Tags.ORGANISM, Tags.CELL, Tags.TISSUE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py b/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py
index 69fd79f90..f685ff3ea 100644
--- a/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py
+++ b/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py
@@ -20,12 +20,13 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2013_pc"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [Tags.GENE, Tags.CHEMICAL, Tags.PATHWAY, Tags.NEGATION, Tags.SPECULATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py b/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py
index 026c83374..8d464b851 100644
--- a/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py
+++ b/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py
@@ -20,13 +20,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "bionlp_st_2019_bb"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = [Tags.ORGANISM]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/biored/biored.py b/bigbio/biodatasets/biored/biored.py
index 250ce8374..b45bdacd1 100644
--- a/bigbio/biodatasets/biored/biored.py
+++ b/bigbio/biodatasets/biored/biored.py
@@ -26,10 +26,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 # TODO: Add BibTeX citation
+_TAGS = [Tags.GENE, Tags.DISEASE, Tags.CHEMICAL, Tags.VARIANT, Tags.PPI]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/biorelex/biorelex.py b/bigbio/biodatasets/biorelex/biorelex.py
index f6dac279a..1b1d2a129 100644
--- a/bigbio/biodatasets/biorelex/biorelex.py
+++ b/bigbio/biodatasets/biorelex/biorelex.py
@@ -35,10 +35,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 # TODO: Add BibTeX citation
+_TAGS = [Tags.GENE, Tags.CHEMICAL, Tags.VARIANT, Tags.NEGATION, Tags.SPECULATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bioscope/bioscope.py b/bigbio/biodatasets/bioscope/bioscope.py
index 5af2077a2..9e7d2e222 100644
--- a/bigbio/biodatasets/bioscope/bioscope.py
+++ b/bigbio/biodatasets/bioscope/bioscope.py
@@ -35,9 +35,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = [Tags.NEGATION, Tags.SPECULATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/biosses/biosses.py b/bigbio/biodatasets/biosses/biosses.py
index 059a03065..a55a313c2 100644
--- a/bigbio/biodatasets/biosses/biosses.py
+++ b/bigbio/biodatasets/biosses/biosses.py
@@ -28,11 +28,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "biosses"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cadec/cadec.py b/bigbio/biodatasets/cadec/cadec.py
index 3eb3f6da3..13784fd6f 100644
--- a/bigbio/biodatasets/cadec/cadec.py
+++ b/bigbio/biodatasets/cadec/cadec.py
@@ -35,9 +35,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cantemist/cantemist.py b/bigbio/biodatasets/cantemist/cantemist.py
index 6a140d2a3..9d0c9d897 100644
--- a/bigbio/biodatasets/cantemist/cantemist.py
+++ b/bigbio/biodatasets/cantemist/cantemist.py
@@ -31,9 +31,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cas/cas.py b/bigbio/biodatasets/cas/cas.py
index d563be29c..6c421ca86 100644
--- a/bigbio/biodatasets/cas/cas.py
+++ b/bigbio/biodatasets/cas/cas.py
@@ -6,9 +6,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.FR]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/cellfinder/cellfinder.py b/bigbio/biodatasets/cellfinder/cellfinder.py
index 935a919c0..9987ee5f6 100644
--- a/bigbio/biodatasets/cellfinder/cellfinder.py
+++ b/bigbio/biodatasets/cellfinder/cellfinder.py
@@ -28,9 +28,10 @@
 import bigbio.utils.parsing as parsing
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chebi_nactem/chebi_nactem.py b/bigbio/biodatasets/chebi_nactem/chebi_nactem.py
index c6e96c6f6..b7edd94f5 100644
--- a/bigbio/biodatasets/chebi_nactem/chebi_nactem.py
+++ b/bigbio/biodatasets/chebi_nactem/chebi_nactem.py
@@ -21,10 +21,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import parse_brat_file
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chemdner/chemdner.py b/bigbio/biodatasets/chemdner/chemdner.py
index 7b0b974f1..c1ec7c880 100644
--- a/bigbio/biodatasets/chemdner/chemdner.py
+++ b/bigbio/biodatasets/chemdner/chemdner.py
@@ -22,10 +22,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chemprot/chemprot.py b/bigbio/biodatasets/chemprot/chemprot.py
index 620a1a449..c91d5aa81 100644
--- a/bigbio/biodatasets/chemprot/chemprot.py
+++ b/bigbio/biodatasets/chemprot/chemprot.py
@@ -25,9 +25,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chia/chia.py b/bigbio/biodatasets/chia/chia.py
index 2328a4599..cc1b3d7ab 100644
--- a/bigbio/biodatasets/chia/chia.py
+++ b/bigbio/biodatasets/chia/chia.py
@@ -25,9 +25,10 @@
 import bigbio.utils.parsing as parsing
 import bigbio.utils.schemas as schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py b/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py
index 0713a87f1..63efad002 100644
--- a/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py
+++ b/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py
@@ -24,9 +24,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/codiesp/codiesp.py b/bigbio/biodatasets/codiesp/codiesp.py
index 1cede6227..aea9c7860 100644
--- a/bigbio/biodatasets/codiesp/codiesp.py
+++ b/bigbio/biodatasets/codiesp/codiesp.py
@@ -35,9 +35,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cord_ner/cord_ner.py b/bigbio/biodatasets/cord_ner/cord_ner.py
index 8724cf64f..38f956daa 100644
--- a/bigbio/biodatasets/cord_ner/cord_ner.py
+++ b/bigbio/biodatasets/cord_ner/cord_ner.py
@@ -25,9 +25,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ctebmsp/ctebmsp.py b/bigbio/biodatasets/ctebmsp/ctebmsp.py
index 92ca3519d..42c23ef23 100644
--- a/bigbio/biodatasets/ctebmsp/ctebmsp.py
+++ b/bigbio/biodatasets/ctebmsp/ctebmsp.py
@@ -31,9 +31,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.ES]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ddi_corpus/ddi_corpus.py b/bigbio/biodatasets/ddi_corpus/ddi_corpus.py
index 4d8fb8937..970cdbb6b 100644
--- a/bigbio/biodatasets/ddi_corpus/ddi_corpus.py
+++ b/bigbio/biodatasets/ddi_corpus/ddi_corpus.py
@@ -27,9 +27,10 @@
 import bigbio.utils.parsing as parsing
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py b/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py
index a9f4a9279..9ae958463 100644
--- a/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py
+++ b/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py
@@ -27,9 +27,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/distemist/distemist.py b/bigbio/biodatasets/distemist/distemist.py
index b9dfaf5d0..798c568a0 100644
--- a/bigbio/biodatasets/distemist/distemist.py
+++ b/bigbio/biodatasets/distemist/distemist.py
@@ -21,9 +21,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/ebm_pico/ebm_pico.py b/bigbio/biodatasets/ebm_pico/ebm_pico.py
index 5e7078e06..f20a3379d 100644
--- a/bigbio/biodatasets/ebm_pico/ebm_pico.py
+++ b/bigbio/biodatasets/ebm_pico/ebm_pico.py
@@ -26,9 +26,10 @@
 import datasets
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ehr_rel/ehr_rel.py b/bigbio/biodatasets/ehr_rel/ehr_rel.py
index 90235ee4a..2ad2f965a 100644
--- a/bigbio/biodatasets/ehr_rel/ehr_rel.py
+++ b/bigbio/biodatasets/ehr_rel/ehr_rel.py
@@ -28,9 +28,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/essai/essai.py b/bigbio/biodatasets/essai/essai.py
index 275aa115c..289055a63 100644
--- a/bigbio/biodatasets/essai/essai.py
+++ b/bigbio/biodatasets/essai/essai.py
@@ -6,9 +6,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.FR]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/euadr/euadr.py b/bigbio/biodatasets/euadr/euadr.py
index 35b276646..e68a1feb1 100644
--- a/bigbio/biodatasets/euadr/euadr.py
+++ b/bigbio/biodatasets/euadr/euadr.py
@@ -4,9 +4,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/evidence_inference/evidence_inference.py b/bigbio/biodatasets/evidence_inference/evidence_inference.py
index 83fd2ca74..e21ce4f47 100644
--- a/bigbio/biodatasets/evidence_inference/evidence_inference.py
+++ b/bigbio/biodatasets/evidence_inference/evidence_inference.py
@@ -32,9 +32,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/gad/gad.py b/bigbio/biodatasets/gad/gad.py
index 4a9286cea..d12e7b5b2 100644
--- a/bigbio/biodatasets/gad/gad.py
+++ b/bigbio/biodatasets/gad/gad.py
@@ -6,13 +6,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "gad"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _LOCAL = False
 _CITATION = """\
diff --git a/bigbio/biodatasets/genetag/genetag.py b/bigbio/biodatasets/genetag/genetag.py
index e53b49185..bfe13bf53 100644
--- a/bigbio/biodatasets/genetag/genetag.py
+++ b/bigbio/biodatasets/genetag/genetag.py
@@ -29,9 +29,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py b/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py
index 0e3f25369..ed8741663 100644
--- a/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py
+++ b/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py
@@ -29,9 +29,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _LOCAL = False
 _CITATION = """\
diff --git a/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py b/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py
index f0a730590..81c833687 100644
--- a/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py
+++ b/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py
@@ -31,9 +31,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py b/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py
index 0ae321ceb..7516e830d 100644
--- a/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py
+++ b/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py
@@ -28,9 +28,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/geokhoj_v1/geokhoj_v1.py b/bigbio/biodatasets/geokhoj_v1/geokhoj_v1.py
index 1954035f8..5b37531a3 100644
--- a/bigbio/biodatasets/geokhoj_v1/geokhoj_v1.py
+++ b/bigbio/biodatasets/geokhoj_v1/geokhoj_v1.py
@@ -28,9 +28,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/gnormplus/gnormplus.py b/bigbio/biodatasets/gnormplus/gnormplus.py
index 7fd0e750c..28d16d360 100644
--- a/bigbio/biodatasets/gnormplus/gnormplus.py
+++ b/bigbio/biodatasets/gnormplus/gnormplus.py
@@ -23,10 +23,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py b/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py
index 83d19030f..73439fe04 100644
--- a/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py
+++ b/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py
@@ -18,9 +18,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/hprd50/hprd50.py b/bigbio/biodatasets/hprd50/hprd50.py
index 91b18470e..63de60017 100644
--- a/bigbio/biodatasets/hprd50/hprd50.py
+++ b/bigbio/biodatasets/hprd50/hprd50.py
@@ -38,10 +38,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 # TODO: Add BibTeX citation
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/iepa/iepa.py b/bigbio/biodatasets/iepa/iepa.py
index 5efffd9f6..157893562 100644
--- a/bigbio/biodatasets/iepa/iepa.py
+++ b/bigbio/biodatasets/iepa/iepa.py
@@ -30,9 +30,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/jnlpba/jnlpba.py b/bigbio/biodatasets/jnlpba/jnlpba.py
index d163c385e..a10a42981 100644
--- a/bigbio/biodatasets/jnlpba/jnlpba.py
+++ b/bigbio/biodatasets/jnlpba/jnlpba.py
@@ -26,9 +26,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/linnaeus/linnaeus.py b/bigbio/biodatasets/linnaeus/linnaeus.py
index 14c1b6ef4..4a079d1d9 100644
--- a/bigbio/biodatasets/linnaeus/linnaeus.py
+++ b/bigbio/biodatasets/linnaeus/linnaeus.py
@@ -32,9 +32,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/lll/lll.py b/bigbio/biodatasets/lll/lll.py
index 34259f128..ccc4eca80 100644
--- a/bigbio/biodatasets/lll/lll.py
+++ b/bigbio/biodatasets/lll/lll.py
@@ -39,6 +39,7 @@
 from bigbio.utils.constants import BigBioValues, Lang, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mantra_gsc/mantra_gsc.py b/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
index e014f006d..0db20bd50 100644
--- a/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
+++ b/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
@@ -22,9 +22,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.FR, Lang.DE, Lang.NL, Lang.ES]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mayosrs/mayosrs.py b/bigbio/biodatasets/mayosrs/mayosrs.py
index 033a93b89..160a66668 100644
--- a/bigbio/biodatasets/mayosrs/mayosrs.py
+++ b/bigbio/biodatasets/mayosrs/mayosrs.py
@@ -25,9 +25,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/med_qa/med_qa.py b/bigbio/biodatasets/med_qa/med_qa.py
index e83b70e90..4cdbc1d96 100644
--- a/bigbio/biodatasets/med_qa/med_qa.py
+++ b/bigbio/biodatasets/med_qa/med_qa.py
@@ -29,9 +29,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/medal/medal.py b/bigbio/biodatasets/medal/medal.py
index 2766f97b7..03df40fc3 100644
--- a/bigbio/biodatasets/medal/medal.py
+++ b/bigbio/biodatasets/medal/medal.py
@@ -26,11 +26,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 logger = datasets.logging.get_logger(__name__)
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/meddialog/meddialog.py b/bigbio/biodatasets/meddialog/meddialog.py
index 90e77e55c..4d0e95b64 100644
--- a/bigbio/biodatasets/meddialog/meddialog.py
+++ b/bigbio/biodatasets/meddialog/meddialog.py
@@ -20,11 +20,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "meddialog"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.ZH]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/meddocan/meddocan.py b/bigbio/biodatasets/meddocan/meddocan.py
index a2e66d64b..e1fb393d1 100644
--- a/bigbio/biodatasets/meddocan/meddocan.py
+++ b/bigbio/biodatasets/meddocan/meddocan.py
@@ -29,9 +29,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/medhop/medhop.py b/bigbio/biodatasets/medhop/medhop.py
index 1b6012a7c..96c926399 100644
--- a/bigbio/biodatasets/medhop/medhop.py
+++ b/bigbio/biodatasets/medhop/medhop.py
@@ -20,9 +20,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/medical_data/medical_data.py b/bigbio/biodatasets/medical_data/medical_data.py
index 986324525..80ddfdef7 100644
--- a/bigbio/biodatasets/medical_data/medical_data.py
+++ b/bigbio/biodatasets/medical_data/medical_data.py
@@ -21,9 +21,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _LOCAL = True
 _CITATION = """\
diff --git a/bigbio/biodatasets/mediqa_nli/mediqa_nli.py b/bigbio/biodatasets/mediqa_nli/mediqa_nli.py
index 3b82f39fa..153df024a 100644
--- a/bigbio/biodatasets/mediqa_nli/mediqa_nli.py
+++ b/bigbio/biodatasets/mediqa_nli/mediqa_nli.py
@@ -44,9 +44,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/mediqa_qa/mediqa_qa.py b/bigbio/biodatasets/mediqa_qa/mediqa_qa.py
index 0e85d9268..1c26254e7 100644
--- a/bigbio/biodatasets/mediqa_qa/mediqa_qa.py
+++ b/bigbio/biodatasets/mediqa_qa/mediqa_qa.py
@@ -25,9 +25,10 @@
 import bigbio.utils.parsing as parsing
 import bigbio.utils.schemas as schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/mediqa_rqe/mediqa_rqe.py b/bigbio/biodatasets/mediqa_rqe/mediqa_rqe.py
index 9b9fe79ed..ad61f5313 100644
--- a/bigbio/biodatasets/mediqa_rqe/mediqa_rqe.py
+++ b/bigbio/biodatasets/mediqa_rqe/mediqa_rqe.py
@@ -25,9 +25,10 @@
 import bigbio.utils.parsing as parsing
 import bigbio.utils.schemas as schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/medmentions/medmentions.py b/bigbio/biodatasets/medmentions/medmentions.py
index a1e8e2d96..a1322f7e1 100644
--- a/bigbio/biodatasets/medmentions/medmentions.py
+++ b/bigbio/biodatasets/medmentions/medmentions.py
@@ -43,9 +43,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mednli/mednli.py b/bigbio/biodatasets/mednli/mednli.py
index 5e6c8cace..4488852fd 100644
--- a/bigbio/biodatasets/mednli/mednli.py
+++ b/bigbio/biodatasets/mednli/mednli.py
@@ -42,9 +42,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/meqsum/meqsum.py b/bigbio/biodatasets/meqsum/meqsum.py
index 684877dd6..21fe7f58c 100644
--- a/bigbio/biodatasets/meqsum/meqsum.py
+++ b/bigbio/biodatasets/meqsum/meqsum.py
@@ -30,9 +30,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/minimayosrs/minimayosrs.py b/bigbio/biodatasets/minimayosrs/minimayosrs.py
index 1169fa673..f8f095bbe 100644
--- a/bigbio/biodatasets/minimayosrs/minimayosrs.py
+++ b/bigbio/biodatasets/minimayosrs/minimayosrs.py
@@ -25,9 +25,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/mirna/mirna.py b/bigbio/biodatasets/mirna/mirna.py
index 2b128f216..aa7e72793 100644
--- a/bigbio/biodatasets/mirna/mirna.py
+++ b/bigbio/biodatasets/mirna/mirna.py
@@ -19,9 +19,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mlee/mlee.py b/bigbio/biodatasets/mlee/mlee.py
index d4e3db091..478079624 100644
--- a/bigbio/biodatasets/mlee/mlee.py
+++ b/bigbio/biodatasets/mlee/mlee.py
@@ -25,13 +25,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "mlee"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mqp/mqp.py b/bigbio/biodatasets/mqp/mqp.py
index 6adf36a95..f1d47b1f3 100644
--- a/bigbio/biodatasets/mqp/mqp.py
+++ b/bigbio/biodatasets/mqp/mqp.py
@@ -26,9 +26,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/msh_wsd/msh_wsd.py b/bigbio/biodatasets/msh_wsd/msh_wsd.py
index 59525ce36..2195106ac 100644
--- a/bigbio/biodatasets/msh_wsd/msh_wsd.py
+++ b/bigbio/biodatasets/msh_wsd/msh_wsd.py
@@ -40,9 +40,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/muchmore/muchmore.py b/bigbio/biodatasets/muchmore/muchmore.py
index f744477fb..3ae9d047d 100644
--- a/bigbio/biodatasets/muchmore/muchmore.py
+++ b/bigbio/biodatasets/muchmore/muchmore.py
@@ -73,9 +73,10 @@
 # Buitelaar, Paul / Declerck, Thierry / Sacaleanu, Bogdan / Vintar, Spela / Raileanu, Diana / Crispi, Claudia: A Multi-Layered, XML-Based Approach to the Integration of Linguistic and Semantic Annotations. In: Proceedings of EACL 2003 Workshop on Language Technology and the Semantic Web (NLPXML’03), Budapest, Hungary, April 2003.
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.DE]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/multi_xscience/multi_xscience.py b/bigbio/biodatasets/multi_xscience/multi_xscience.py
index ab8c55b6f..6be1347be 100644
--- a/bigbio/biodatasets/multi_xscience/multi_xscience.py
+++ b/bigbio/biodatasets/multi_xscience/multi_xscience.py
@@ -21,9 +21,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/mutation_finder/mutation_finder.py b/bigbio/biodatasets/mutation_finder/mutation_finder.py
index 277d5db4e..5dc113a7c 100644
--- a/bigbio/biodatasets/mutation_finder/mutation_finder.py
+++ b/bigbio/biodatasets/mutation_finder/mutation_finder.py
@@ -20,9 +20,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py b/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py
index f3cac12f6..9144f25f0 100644
--- a/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py
+++ b/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py
@@ -65,12 +65,13 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "n2c2_2006"
 
 # https://academic.oup.com/jamia/article/14/5/550/720189
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py b/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py
index 688400469..6e0fc9209 100644
--- a/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py
+++ b/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py
@@ -63,12 +63,13 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "n2c2_2006"
 
 # https://academic.oup.com/jamia/article/15/1/14/779738
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2008/n2c2_2008.py b/bigbio/biodatasets/n2c2_2008/n2c2_2008.py
index 0167def08..4b3054ac1 100644
--- a/bigbio/biodatasets/n2c2_2008/n2c2_2008.py
+++ b/bigbio/biodatasets/n2c2_2008/n2c2_2008.py
@@ -71,12 +71,13 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "n2c2_2008"
 
 # https://academic.oup.com/jamia/article/16/4/561/766997
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2009/n2c2_2009.py b/bigbio/biodatasets/n2c2_2009/n2c2_2009.py
index 3d9328a99..88f1e60c5 100644
--- a/bigbio/biodatasets/n2c2_2009/n2c2_2009.py
+++ b/bigbio/biodatasets/n2c2_2009/n2c2_2009.py
@@ -57,9 +57,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2010/n2c2_2010.py b/bigbio/biodatasets/n2c2_2010/n2c2_2010.py
index 277081cf5..549ac121a 100644
--- a/bigbio/biodatasets/n2c2_2010/n2c2_2010.py
+++ b/bigbio/biodatasets/n2c2_2010/n2c2_2010.py
@@ -52,9 +52,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2011/n2c2_2011.py b/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
index 44328533a..67fc5e684 100644
--- a/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
+++ b/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
@@ -72,12 +72,13 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "n2c2_2011"
 
 # https://academic.oup.com/jamia/article/19/5/786/716138
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2014_deid/n2c2_2014_deid.py b/bigbio/biodatasets/n2c2_2014_deid/n2c2_2014_deid.py
index 1e3992a19..75f972cb8 100644
--- a/bigbio/biodatasets/n2c2_2014_deid/n2c2_2014_deid.py
+++ b/bigbio/biodatasets/n2c2_2014_deid/n2c2_2014_deid.py
@@ -59,9 +59,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _LOCAL = True
 _CITATION = """\
diff --git a/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py b/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py
index 524a48fca..fec27a82b 100644
--- a/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py
+++ b/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py
@@ -59,9 +59,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _LOCAL = True
 _CITATION = """\
diff --git a/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py b/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py
index 27d0f5ae9..59411a293 100644
--- a/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py
+++ b/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py
@@ -43,9 +43,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py b/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py
index ff26a9ebe..13ddc19b1 100644
--- a/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py
+++ b/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py
@@ -46,9 +46,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/nagel/nagel.py b/bigbio/biodatasets/nagel/nagel.py
index 260224c62..fd8a05f68 100644
--- a/bigbio/biodatasets/nagel/nagel.py
+++ b/bigbio/biodatasets/nagel/nagel.py
@@ -23,9 +23,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ncbi_disease/ncbi_disease.py b/bigbio/biodatasets/ncbi_disease/ncbi_disease.py
index 4d85e9ac2..1efee20e5 100644
--- a/bigbio/biodatasets/ncbi_disease/ncbi_disease.py
+++ b/bigbio/biodatasets/ncbi_disease/ncbi_disease.py
@@ -26,9 +26,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/nlm_gene/nlm_gene.py b/bigbio/biodatasets/nlm_gene/nlm_gene.py
index 2d7e1a4bb..1a6c0e06f 100644
--- a/bigbio/biodatasets/nlm_gene/nlm_gene.py
+++ b/bigbio/biodatasets/nlm_gene/nlm_gene.py
@@ -22,10 +22,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/nlm_wsd/nlm_wsd.py b/bigbio/biodatasets/nlm_wsd/nlm_wsd.py
index 01620230d..7437d8df2 100644
--- a/bigbio/biodatasets/nlm_wsd/nlm_wsd.py
+++ b/bigbio/biodatasets/nlm_wsd/nlm_wsd.py
@@ -53,9 +53,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/nlmchem/nlmchem.py b/bigbio/biodatasets/nlmchem/nlmchem.py
index ec83fe2ea..e816e3788 100644
--- a/bigbio/biodatasets/nlmchem/nlmchem.py
+++ b/bigbio/biodatasets/nlmchem/nlmchem.py
@@ -22,10 +22,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py b/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
index 7066df6e4..ff8734739 100644
--- a/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
+++ b/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
@@ -63,9 +63,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.ZH, Lang.JA]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/osiris/osiris.py b/bigbio/biodatasets/osiris/osiris.py
index 3929ca5d9..b83262563 100644
--- a/bigbio/biodatasets/osiris/osiris.py
+++ b/bigbio/biodatasets/osiris/osiris.py
@@ -24,9 +24,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/paramed/paramed.py b/bigbio/biodatasets/paramed/paramed.py
index 6791791e0..50966a93a 100644
--- a/bigbio/biodatasets/paramed/paramed.py
+++ b/bigbio/biodatasets/paramed/paramed.py
@@ -26,12 +26,13 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 logger = datasets.logging.get_logger(__name__)
 
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.ZH]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/pcr/pcr.py b/bigbio/biodatasets/pcr/pcr.py
index e2e10566a..28e3987e9 100644
--- a/bigbio/biodatasets/pcr/pcr.py
+++ b/bigbio/biodatasets/pcr/pcr.py
@@ -25,9 +25,10 @@
 import bigbio.utils.parsing as parsing
 import bigbio.utils.schemas as schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pdr/pdr.py b/bigbio/biodatasets/pdr/pdr.py
index a41255e69..1c7bb9f77 100644
--- a/bigbio/biodatasets/pdr/pdr.py
+++ b/bigbio/biodatasets/pdr/pdr.py
@@ -28,9 +28,10 @@
 import bigbio.utils.parsing as parsing
 import bigbio.utils.schemas as schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pharmaconer/pharmaconer.py b/bigbio/biodatasets/pharmaconer/pharmaconer.py
index 61a28ab9f..f20fd87f1 100644
--- a/bigbio/biodatasets/pharmaconer/pharmaconer.py
+++ b/bigbio/biodatasets/pharmaconer/pharmaconer.py
@@ -31,9 +31,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/pho_ner/pho_ner.py b/bigbio/biodatasets/pho_ner/pho_ner.py
index 28f8829a2..4ae3852d1 100644
--- a/bigbio/biodatasets/pho_ner/pho_ner.py
+++ b/bigbio/biodatasets/pho_ner/pho_ner.py
@@ -20,9 +20,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
+_TAGS = []
 _LANGUAGES = [Lang.VI]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/pico_extraction/pico_extraction.py b/bigbio/biodatasets/pico_extraction/pico_extraction.py
index b05092615..ab4c36f25 100644
--- a/bigbio/biodatasets/pico_extraction/pico_extraction.py
+++ b/bigbio/biodatasets/pico_extraction/pico_extraction.py
@@ -27,9 +27,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pmc_patients/pmc_patients.py b/bigbio/biodatasets/pmc_patients/pmc_patients.py
index b12a79ae2..05823f401 100644
--- a/bigbio/biodatasets/pmc_patients/pmc_patients.py
+++ b/bigbio/biodatasets/pmc_patients/pmc_patients.py
@@ -27,9 +27,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/progene/progene.py b/bigbio/biodatasets/progene/progene.py
index 49aec1dbd..f1ce6223a 100644
--- a/bigbio/biodatasets/progene/progene.py
+++ b/bigbio/biodatasets/progene/progene.py
@@ -22,9 +22,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/psytar/psytar.py b/bigbio/biodatasets/psytar/psytar.py
index 61a16aa61..e0931739a 100644
--- a/bigbio/biodatasets/psytar/psytar.py
+++ b/bigbio/biodatasets/psytar/psytar.py
@@ -51,9 +51,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/pubhealth/pubhealth.py b/bigbio/biodatasets/pubhealth/pubhealth.py
index 63c411bfb..5320c16e6 100644
--- a/bigbio/biodatasets/pubhealth/pubhealth.py
+++ b/bigbio/biodatasets/pubhealth/pubhealth.py
@@ -26,11 +26,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 logger = datasets.utils.logging.get_logger(__name__)
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/pubmed_qa/pubmed_qa.py b/bigbio/biodatasets/pubmed_qa/pubmed_qa.py
index c0e0228f2..4bdf15062 100644
--- a/bigbio/biodatasets/pubmed_qa/pubmed_qa.py
+++ b/bigbio/biodatasets/pubmed_qa/pubmed_qa.py
@@ -30,6 +30,7 @@
 from bigbio.utils.constants import BigBioValues, Lang, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pubtator_central/pubtator_central.py b/bigbio/biodatasets/pubtator_central/pubtator_central.py
index 972000e64..50048a96f 100644
--- a/bigbio/biodatasets/pubtator_central/pubtator_central.py
+++ b/bigbio/biodatasets/pubtator_central/pubtator_central.py
@@ -48,9 +48,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/quaero/quaero.py b/bigbio/biodatasets/quaero/quaero.py
index 09a8e0598..29558a115 100644
--- a/bigbio/biodatasets/quaero/quaero.py
+++ b/bigbio/biodatasets/quaero/quaero.py
@@ -5,10 +5,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = []
 _LANGUAGES = [Lang.FR]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/scai_chemical/scai_chemical.py b/bigbio/biodatasets/scai_chemical/scai_chemical.py
index 1abe0fb03..e3c4ef800 100644
--- a/bigbio/biodatasets/scai_chemical/scai_chemical.py
+++ b/bigbio/biodatasets/scai_chemical/scai_chemical.py
@@ -28,9 +28,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/scai_disease/scai_disease.py b/bigbio/biodatasets/scai_disease/scai_disease.py
index 711e54b19..4b7905d9b 100644
--- a/bigbio/biodatasets/scai_disease/scai_disease.py
+++ b/bigbio/biodatasets/scai_disease/scai_disease.py
@@ -30,9 +30,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/scicite/scicite.py b/bigbio/biodatasets/scicite/scicite.py
index 0fe741492..3a0f3284b 100644
--- a/bigbio/biodatasets/scicite/scicite.py
+++ b/bigbio/biodatasets/scicite/scicite.py
@@ -37,9 +37,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/scielo/scielo.py b/bigbio/biodatasets/scielo/scielo.py
index 73aea9985..44659df74 100644
--- a/bigbio/biodatasets/scielo/scielo.py
+++ b/bigbio/biodatasets/scielo/scielo.py
@@ -21,9 +21,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.ES, Lang.PT]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/scifact/scifact.py b/bigbio/biodatasets/scifact/scifact.py
index 22065ec20..14a48e21a 100644
--- a/bigbio/biodatasets/scifact/scifact.py
+++ b/bigbio/biodatasets/scifact/scifact.py
@@ -22,9 +22,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/sciq/sciq.py b/bigbio/biodatasets/sciq/sciq.py
index 2f23906d7..54f44e622 100644
--- a/bigbio/biodatasets/sciq/sciq.py
+++ b/bigbio/biodatasets/sciq/sciq.py
@@ -20,11 +20,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "sciq"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/scitail/scitail.py b/bigbio/biodatasets/scitail/scitail.py
index b945bdaa9..c5dcdca57 100644
--- a/bigbio/biodatasets/scitail/scitail.py
+++ b/bigbio/biodatasets/scitail/scitail.py
@@ -30,9 +30,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/seth_corpus/seth_corpus.py b/bigbio/biodatasets/seth_corpus/seth_corpus.py
index 70d4c6d48..82b84b534 100644
--- a/bigbio/biodatasets/seth_corpus/seth_corpus.py
+++ b/bigbio/biodatasets/seth_corpus/seth_corpus.py
@@ -28,9 +28,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py b/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py
index 1cf5812ab..a6b16123f 100644
--- a/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py
+++ b/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py
@@ -64,9 +64,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py b/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py
index 9a1ff0769..1ec26aca1 100644
--- a/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py
+++ b/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py
@@ -38,11 +38,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "swedish_medical_ner"
 
+_TAGS = []
 _LANGUAGES = [Lang.SV]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/thomas2011/thomas2011.py b/bigbio/biodatasets/thomas2011/thomas2011.py
index 6e7c24842..d2747c4a8 100644
--- a/bigbio/biodatasets/thomas2011/thomas2011.py
+++ b/bigbio/biodatasets/thomas2011/thomas2011.py
@@ -49,10 +49,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
 # TODO: Add BibTeX citation
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/tmvar_v1/tmvar_v1.py b/bigbio/biodatasets/tmvar_v1/tmvar_v1.py
index e2d59b74b..f6cb22e59 100644
--- a/bigbio/biodatasets/tmvar_v1/tmvar_v1.py
+++ b/bigbio/biodatasets/tmvar_v1/tmvar_v1.py
@@ -23,9 +23,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/tmvar_v2/tmvar_v2.py b/bigbio/biodatasets/tmvar_v2/tmvar_v2.py
index 8e766d028..b522524b6 100644
--- a/bigbio/biodatasets/tmvar_v2/tmvar_v2.py
+++ b/bigbio/biodatasets/tmvar_v2/tmvar_v2.py
@@ -23,9 +23,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/tmvar_v3/tmvar_v3.py b/bigbio/biodatasets/tmvar_v3/tmvar_v3.py
index c5b7d93dc..1e2bb9dd4 100644
--- a/bigbio/biodatasets/tmvar_v3/tmvar_v3.py
+++ b/bigbio/biodatasets/tmvar_v3/tmvar_v3.py
@@ -22,7 +22,7 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _CITATION = """\
@@ -44,6 +44,7 @@
   copyright = {Creative Commons Attribution 4.0 International}
 }
 """
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/twadrl/twadrl.py b/bigbio/biodatasets/twadrl/twadrl.py
index d73089124..abfb83a91 100644
--- a/bigbio/biodatasets/twadrl/twadrl.py
+++ b/bigbio/biodatasets/twadrl/twadrl.py
@@ -21,11 +21,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "twadrl"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/umnsrs/umnsrs.py b/bigbio/biodatasets/umnsrs/umnsrs.py
index 6ec1416a2..8bb39e554 100644
--- a/bigbio/biodatasets/umnsrs/umnsrs.py
+++ b/bigbio/biodatasets/umnsrs/umnsrs.py
@@ -29,9 +29,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/verspoor_2013/verspoor_2013.py b/bigbio/biodatasets/verspoor_2013/verspoor_2013.py
index be5f625e6..58f613132 100644
--- a/bigbio/biodatasets/verspoor_2013/verspoor_2013.py
+++ b/bigbio/biodatasets/verspoor_2013/verspoor_2013.py
@@ -32,9 +32,10 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False

From 4f328b4ff898e06d273b9668e906e195f75f35c1 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Tue, 7 Jun 2022 18:43:25 +0200
Subject: [PATCH 02/20] add _TAGS

---
 examples/bc5cdr.py              | 3 ++-
 examples/bioasq_task_b.py       | 8 +++++++-
 examples/biosses.py             | 3 ++-
 examples/chemprot.py            | 3 ++-
 examples/hallmarks_of_cancer.py | 3 ++-
 examples/mlee.py                | 3 ++-
 examples/mqp.py                 | 3 ++-
 examples/muchmore.py            | 3 ++-
 examples/n2c2_2011.py           | 3 ++-
 examples/nlmchem.py             | 3 ++-
 examples/paramed.py             | 7 ++++---
 examples/scitail.py             | 3 ++-
 12 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/examples/bc5cdr.py b/examples/bc5cdr.py
index 111d7beea..ee325c162 100644
--- a/examples/bc5cdr.py
+++ b/examples/bc5cdr.py
@@ -31,10 +31,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = [Tags.DISEASE, Tags.CHEMICAL, Tags.CHEMICAL_DISEASE_RELATION, Tags.MESH]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/bioasq_task_b.py b/examples/bioasq_task_b.py
index da38146a9..9026918f6 100644
--- a/examples/bioasq_task_b.py
+++ b/examples/bioasq_task_b.py
@@ -32,9 +32,15 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
+_TAGS = [
+    Tags.QA_YESNO
+    Tags.QA_FACTOID,
+    Tags.QA_LIST,
+    Tags.QA_SUMMARY,
+]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/examples/biosses.py b/examples/biosses.py
index 059a03065..80aa75b36 100644
--- a/examples/biosses.py
+++ b/examples/biosses.py
@@ -28,11 +28,12 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "biosses"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/examples/chemprot.py b/examples/chemprot.py
index 1db648c73..c29b362ae 100644
--- a/examples/chemprot.py
+++ b/examples/chemprot.py
@@ -25,9 +25,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/hallmarks_of_cancer.py b/examples/hallmarks_of_cancer.py
index ae8673b40..50600def9 100644
--- a/examples/hallmarks_of_cancer.py
+++ b/examples/hallmarks_of_cancer.py
@@ -19,9 +19,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/mlee.py b/examples/mlee.py
index 2f6b09ddd..e0330d53a 100644
--- a/examples/mlee.py
+++ b/examples/mlee.py
@@ -25,13 +25,14 @@
 
 from bigbio.utils import parsing, schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "mlee"
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/mqp.py b/examples/mqp.py
index b42cbd539..c9e122bc9 100644
--- a/examples/mqp.py
+++ b/examples/mqp.py
@@ -26,9 +26,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/examples/muchmore.py b/examples/muchmore.py
index da6bc7430..9afb2982d 100644
--- a/examples/muchmore.py
+++ b/examples/muchmore.py
@@ -73,9 +73,10 @@
 # Buitelaar, Paul / Declerck, Thierry / Sacaleanu, Bogdan / Vintar, Spela / Raileanu, Diana / Crispi, Claudia: A Multi-Layered, XML-Based Approach to the Integration of Linguistic and Semantic Annotations. In: Proceedings of EACL 2003 Workshop on Language Technology and the Semantic Web (NLPXML’03), Budapest, Hungary, April 2003.
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/n2c2_2011.py b/examples/n2c2_2011.py
index 44328533a..d1dd79f7f 100644
--- a/examples/n2c2_2011.py
+++ b/examples/n2c2_2011.py
@@ -72,12 +72,13 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
 _DATASETNAME = "n2c2_2011"
 
 # https://academic.oup.com/jamia/article/19/5/786/716138
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/examples/nlmchem.py b/examples/nlmchem.py
index 945461bf0..885234462 100644
--- a/examples/nlmchem.py
+++ b/examples/nlmchem.py
@@ -22,10 +22,11 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/paramed.py b/examples/paramed.py
index 6791791e0..518d7e623 100644
--- a/examples/paramed.py
+++ b/examples/paramed.py
@@ -1,7 +1,7 @@
-# coding=utf-8
+# bcoding=utf-8
 # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
+# bicensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -26,12 +26,13 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
 logger = datasets.logging.get_logger(__name__)
 
 
+_TAGS = []
 _LANGUAGES = [Lang.EN, Lang.ZH]
 _PUBMED = False
 _LOCAL = False
diff --git a/examples/scitail.py b/examples/scitail.py
index d7bf14dd9..1be23c7cc 100644
--- a/examples/scitail.py
+++ b/examples/scitail.py
@@ -30,9 +30,10 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tasks
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
+_TAGS = []
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False

From 515d9acc3278969adc1a5df4f06d1bf79cfed9be Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Tue, 7 Jun 2022 18:43:36 +0200
Subject: [PATCH 03/20] create Tags Enum

---
 bigbio/utils/constants.py        | 16 ++++++++++++---
 bigbio/utils/resources/tags.json | 34 ++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 bigbio/utils/resources/tags.json

diff --git a/bigbio/utils/constants.py b/bigbio/utils/constants.py
index 78a574167..8405bae3b 100644
--- a/bigbio/utils/constants.py
+++ b/bigbio/utils/constants.py
@@ -6,9 +6,19 @@
 
 from bigbio.utils import resources
 from bigbio.utils.license import Licenses
-from bigbio.utils.schemas import (entailment_features, kb_features,
-                                  pairs_features, qa_features,
-                                  text2text_features, text_features)
+from bigbio.utils.schemas import (
+    entailment_features,
+    kb_features,
+    pairs_features,
+    qa_features,
+    text2text_features,
+    text_features,
+)
+
+
+_TAGS = json.loads(pkg_resources.read_text(resources, "tags.json"))
+Tags = Enum("Tags", _TAGS)
+
 
 BigBioValues = SimpleNamespace(NULL="<BB_NULL_STR>")
 
diff --git a/bigbio/utils/resources/tags.json b/bigbio/utils/resources/tags.json
new file mode 100644
index 000000000..f2daadbbb
--- /dev/null
+++ b/bigbio/utils/resources/tags.json
@@ -0,0 +1,34 @@
+{
+  "SOCIAL_MEDIA" : "Social media",
+  "ANATOMY" : "Anatomy",
+  "ORGANISM" : "Organism",
+  "VARIANT" : "Variant/Mutation",
+  "TISSUE" : "Tissue",
+  "CELL" : "Cells and/or cell lines",
+  "SPECIES" : "Species",
+  "GENE" : "Gene, proteins, gene products, ...",
+  "DISEASE" : "Disease",
+  "CHEMICAL" : "Chemical",
+  "UMLS" : "Unified Medical Language System",
+  "COVID" : "Coronavirus disease 2019 (COVID-19)",
+  "LEXICAL" : "Lexical data (e.g. word, verbs,...)",
+  "DECS" : "Descriptores en Ciencias de la Salud",
+  "QA_YESNO" : "QA with yes no answer",
+  "QA_FACTOID" : "QA with factoid answer",
+  "QA_LIST": "QA with list of factoid answer",
+  "QA_SUMMARY_ANSWER" : "QA with abstractive summary answer",
+  "QA_HOW" : "`How` question",
+  "QA_WHY" : "`Why` question",
+  "GRANT" : "Grants data",
+  "PPI" : "Protein-protein interaction",
+  "QA_CLOZE" : "Cloze test",
+  "MRC" : "Machine Reading Comprehension",
+  "QA_MULTIPLE_CHOICE" : "QA with multiple choice",
+  "NEGATION" : "Negation",
+  "SPECULATION" : "Speculation",
+  "EPIGENETICS" : "Epigenetics",
+  "PART_OF" : "Part-of relations",
+  "CANCER" : "Cancer",
+  "PATHWAY" : "Pathway",
+  "MESH" : "Medical Subject Headings (MeSH)"
+}

From beb1eb6f51cd914f235f84429d9d8f47a10bf515 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 12:20:44 +0200
Subject: [PATCH 04/20] update _TAGS

---
 .../ask_a_patient/ask_a_patient.py            |   2 +-
 bigbio/biodatasets/bc5cdr/bc5cdr.py           |   2 +-
 .../bioasq_2021_mesinesp.py                   |   2 +-
 .../bioasq_task_b/bioasq_task_b.py            |   8 +-
 .../biology_how_why_corpus.py                 |   2 +-
 bigbio/biodatasets/biomrc/biomrc.py           |   2 +-
 bigbio/biodatasets/cadec/cadec.py             |   2 +-
 bigbio/biodatasets/cantemist/cantemist.py     |   2 +-
 bigbio/biodatasets/cas/cas.py                 |   2 +-
 bigbio/biodatasets/cellfinder/cellfinder.py   |   2 +-
 .../biodatasets/chebi_nactem/chebi_nactem.py  |   2 +-
 bigbio/biodatasets/chemdner/chemdner.py       |   2 +-
 bigbio/biodatasets/chemprot/chemprot.py       |   2 +-
 bigbio/biodatasets/chia/chia.py               |   2 +-
 .../citation_gia_test_collection.py           | 137 ++++++++++--------
 bigbio/biodatasets/codiesp/codiesp.py         |   2 +-
 bigbio/biodatasets/cord_ner/cord_ner.py       |   2 +-
 bigbio/biodatasets/ctebmsp/ctebmsp.py         |   2 +-
 bigbio/biodatasets/ddi_corpus/ddi_corpus.py   |   2 +-
 .../diann_iber_eval/diann_iber_eval.py        |   2 +-
 bigbio/biodatasets/distemist/distemist.py     |   2 +-
 bigbio/biodatasets/ebm_pico/ebm_pico.py       |  30 +++-
 bigbio/biodatasets/ehr_rel/ehr_rel.py         |   2 +-
 bigbio/biodatasets/essai/essai.py             |   2 +-
 bigbio/biodatasets/euadr/euadr.py             |  10 +-
 .../evidence_inference/evidence_inference.py  |   2 +-
 bigbio/biodatasets/genetag/genetag.py         |   2 +-
 .../genia_relation_corpus.py                  |   2 +-
 .../genia_term_corpus/genia_term_corpus.py    |   2 +-
 bigbio/biodatasets/gnormplus/gnormplus.py     |   2 +-
 .../hallmarks_of_cancer.py                    |  41 +++---
 bigbio/biodatasets/hprd50/hprd50.py           |   2 +-
 bigbio/biodatasets/iepa/iepa.py               |   2 +-
 bigbio/biodatasets/jnlpba/jnlpba.py           |   2 +-
 bigbio/biodatasets/linnaeus/linnaeus.py       |   2 +-
 bigbio/biodatasets/lll/lll.py                 |   2 +-
 bigbio/biodatasets/mantra_gsc/mantra_gsc.py   |   2 +-
 bigbio/biodatasets/mayosrs/mayosrs.py         |   2 +-
 bigbio/biodatasets/med_qa/med_qa.py           |   2 +-
 bigbio/biodatasets/meddialog/meddialog.py     |   2 +-
 bigbio/biodatasets/meddocan/meddocan.py       |   2 +-
 bigbio/biodatasets/medhop/medhop.py           |   2 +-
 .../biodatasets/medical_data/medical_data.py  |   2 +-
 bigbio/biodatasets/mediqa_qa/mediqa_qa.py     |   2 +-
 bigbio/biodatasets/medmentions/medmentions.py |   2 +-
 bigbio/utils/resources/tags.json              |  34 +++--
 46 files changed, 203 insertions(+), 137 deletions(-)

diff --git a/bigbio/biodatasets/ask_a_patient/ask_a_patient.py b/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
index 0b4eeffe4..bd89c5026 100644
--- a/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
+++ b/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
@@ -26,7 +26,7 @@
 
 _DATASETNAME = "ask_a_patient"
 
-_TAGS = [Tags.SOCIAL_MEDIA]
+_TAGS = [Tags.SOCIAL_MEDIA, Tags.ADR]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bc5cdr/bc5cdr.py b/bigbio/biodatasets/bc5cdr/bc5cdr.py
index 5e729b270..45ed49a77 100644
--- a/bigbio/biodatasets/bc5cdr/bc5cdr.py
+++ b/bigbio/biodatasets/bc5cdr/bc5cdr.py
@@ -35,7 +35,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = [Tags.DISEASE, Tags.CHEMICAL, Tags.CHEMICAL_DISEASE_RELATION, Tags.MESH]
+_TAGS = [Tags.DISEASE, Tags.CHEMICAL]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py b/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py
index 680de353c..7fd13d83f 100644
--- a/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py
+++ b/bigbio/biodatasets/bioasq_2021_mesinesp/bioasq_2021_mesinesp.py
@@ -54,7 +54,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.DECS]
+_TAGS = [Tags.DOCUMENT_INDEXING]
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py b/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py
index b17ed3828..685ac4e45 100644
--- a/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py
+++ b/bigbio/biodatasets/bioasq_task_b/bioasq_task_b.py
@@ -35,7 +35,13 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [
+    Tags.YESNO,
+    Tags.FACTOID,
+    Tags.FACTOID_LIST,
+    Tags.ABSTRACTIVE,
+    Tags.EXTRACTIVE,
+]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py b/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py
index 41e8cca74..282050c63 100644
--- a/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py
+++ b/bigbio/biodatasets/biology_how_why_corpus/biology_how_why_corpus.py
@@ -33,7 +33,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.QA_HOW, Tags.QA_WHY]
+_TAGS = [Tags.HOW, Tags.WHY]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/biomrc/biomrc.py b/bigbio/biodatasets/biomrc/biomrc.py
index df849298a..43dd1f725 100644
--- a/bigbio/biodatasets/biomrc/biomrc.py
+++ b/bigbio/biodatasets/biomrc/biomrc.py
@@ -34,7 +34,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.QA_MULTIPLE_CHOICE, Tags.MRC, Tags.QA_CLOZE]
+_TAGS = [Tags.MULTIPLE_CHOICE, Tags.MRC, Tags.CLOZE_TEST]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/cadec/cadec.py b/bigbio/biodatasets/cadec/cadec.py
index 13784fd6f..c604c0920 100644
--- a/bigbio/biodatasets/cadec/cadec.py
+++ b/bigbio/biodatasets/cadec/cadec.py
@@ -38,7 +38,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = []
+_TAGS = [Tags.SOCIAL_MEDIA, Tags.DISEASE, Tags.ADR, Tags.DRUG]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cantemist/cantemist.py b/bigbio/biodatasets/cantemist/cantemist.py
index 9d0c9d897..9b4af0460 100644
--- a/bigbio/biodatasets/cantemist/cantemist.py
+++ b/bigbio/biodatasets/cantemist/cantemist.py
@@ -34,7 +34,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CANCER, Tags.DISEASE, Tags.DOCUMENT_INDEXING]
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cas/cas.py b/bigbio/biodatasets/cas/cas.py
index 6c421ca86..6b45d7d2b 100644
--- a/bigbio/biodatasets/cas/cas.py
+++ b/bigbio/biodatasets/cas/cas.py
@@ -9,7 +9,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.NEGATION, Tags.SPECULATION, Tags.POS]
 _LANGUAGES = [Lang.FR]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/cellfinder/cellfinder.py b/bigbio/biodatasets/cellfinder/cellfinder.py
index 9987ee5f6..04b36b529 100644
--- a/bigbio/biodatasets/cellfinder/cellfinder.py
+++ b/bigbio/biodatasets/cellfinder/cellfinder.py
@@ -31,7 +31,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.CELL, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chebi_nactem/chebi_nactem.py b/bigbio/biodatasets/chebi_nactem/chebi_nactem.py
index b7edd94f5..aeb5f48bf 100644
--- a/bigbio/biodatasets/chebi_nactem/chebi_nactem.py
+++ b/bigbio/biodatasets/chebi_nactem/chebi_nactem.py
@@ -25,7 +25,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import parse_brat_file
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.CHEMICAL, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chemdner/chemdner.py b/bigbio/biodatasets/chemdner/chemdner.py
index c1ec7c880..4e237b6b9 100644
--- a/bigbio/biodatasets/chemdner/chemdner.py
+++ b/bigbio/biodatasets/chemdner/chemdner.py
@@ -26,7 +26,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.DOCUMENT_INDEXING]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chemprot/chemprot.py b/bigbio/biodatasets/chemprot/chemprot.py
index c91d5aa81..fc2aa6793 100644
--- a/bigbio/biodatasets/chemprot/chemprot.py
+++ b/bigbio/biodatasets/chemprot/chemprot.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/chia/chia.py b/bigbio/biodatasets/chia/chia.py
index cc1b3d7ab..da93b98d4 100644
--- a/bigbio/biodatasets/chia/chia.py
+++ b/bigbio/biodatasets/chia/chia.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.DRUG, Tags.PROCEDURE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py b/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py
index 63efad002..28169f96f 100644
--- a/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py
+++ b/bigbio/biodatasets/citation_gia_test_collection/citation_gia_test_collection.py
@@ -27,7 +27,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
@@ -60,11 +60,11 @@
 
 _URLS = {
     _DATASETNAME: [
-        "https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/download/GNormPlus/GNormPlusCorpus.zip"]
+        "https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/download/GNormPlus/GNormPlusCorpus.zip"
+    ]
 }
 
-_SUPPORTED_TASKS = [Tasks.NAMED_ENTITY_RECOGNITION,
-                    Tasks.NAMED_ENTITY_DISAMBIGUATION]
+_SUPPORTED_TASKS = [Tasks.NAMED_ENTITY_RECOGNITION, Tasks.NAMED_ENTITY_DISAMBIGUATION]
 
 _SOURCE_VERSION = "1.0.0"
 
@@ -73,8 +73,8 @@
 
 class CitationGIATestCollection(datasets.GeneratorBasedBuilder):
     """
-    The Citation GIA Test Collection was recently created for gene indexing at the NLM and includes 
-    151 PubMed abstracts with both mention-level and document-level annotations. 
+    The Citation GIA Test Collection was recently created for gene indexing at the NLM and includes
+    151 PubMed abstracts with both mention-level and document-level annotations.
     They are selected because both have a focus on human genes.
     """
 
@@ -95,7 +95,7 @@ class CitationGIATestCollection(datasets.GeneratorBasedBuilder):
             description="citation_gia_test_collection BigBio schema",
             schema="bigbio_kb",
             subset_id="citation_gia_test_collection",
-        )
+        ),
     ]
 
     DEFAULT_CONFIG_NAME = "citation_gia_test_collection_source"
@@ -127,7 +127,7 @@ def _info(self) -> datasets.DatasetInfo:
                                 }
                             ],
                         }
-                    ]
+                    ],
                 }
             )
 
@@ -151,16 +151,18 @@ def _split_generators(self, dl_manager) -> List[datasets.SplitGenerator]:
             datasets.SplitGenerator(
                 name=datasets.Split.TEST,
                 gen_kwargs={
-                    "filepath": os.path.join(data_dir[0], "GNormPlusCorpus/NLMIAT.BioC.xml"),
+                    "filepath": os.path.join(
+                        data_dir[0], "GNormPlusCorpus/NLMIAT.BioC.xml"
+                    ),
                     "split": "NLMIAT",
                 },
             ),
         ]
 
     def _get_entities(self, annot_d: dict) -> dict:
-        ''''
+        """'
         Converts annotation dict to entity dict.
-        '''
+        """
         ent = {
             "id": str(uuid.uuid4()),
             "type": annot_d["type"],
@@ -176,13 +178,15 @@ def _get_entities(self, annot_d: dict) -> dict:
 
         return ent
 
-    def _get_offsets_entities(child, parent_text: str, child_text: str, offset: int) -> List[int]:
-        '''
-        Extracts child text offsets from parent text for entities. 
+    def _get_offsets_entities(
+        child, parent_text: str, child_text: str, offset: int
+    ) -> List[int]:
+        """
+        Extracts child text offsets from parent text for entities.
         Some offsets that were present in the datset were wrong mainly because of string encodings.
-        Also a little fraction of parent strings doesn't contain its respective child strings. 
-        Hence few assertion errors in the entitity offsets checking test. 
-        '''
+        Also a little fraction of parent strings doesn't contain its respective child strings.
+        Hence few assertion errors in the entitity offsets checking test.
+        """
         if child_text in parent_text:
             index = parent_text.index(child_text)
             start = index + offset
@@ -194,10 +198,10 @@ def _get_offsets_entities(child, parent_text: str, child_text: str, offset: int)
         return [start, end]
 
     def _process_annot(self, annot: ET.Element, passages: dict) -> dict:
-        ''''
+        """'
         Converts annotation XML Element to Python dict.
-        '''
-        parent_text = " ".join([p['text'] for p in passages.values()])
+        """
+        parent_text = " ".join([p["text"] for p in passages.values()])
         annot_d = dict()
         a_d = {a.tag: a.text for a in annot}
 
@@ -206,21 +210,21 @@ def _process_annot(self, annot: ET.Element, passages: dict) -> dict:
             if a.tag == "location":
                 offset = int(a.attrib["offset"])
                 annot_d["offsets"] = self._get_offsets_entities(
-                    html.escape(parent_text[offset:]),
-                    html.escape(a_d["text"]), offset)
+                    html.escape(parent_text[offset:]), html.escape(a_d["text"]), offset
+                )
 
             elif a.tag != "infon":
                 annot_d[a.tag] = html.escape(a.text)
 
             else:
                 annot_d[a.attrib["key"]] = html.escape(a.text)
-                
+
         return annot_d
 
     def _parse_elem(self, elem: ET.Element) -> dict:
-        ''''
+        """'
         Converts document XML Element to Python dict.
-        '''
+        """
         elem_d = dict()
         passages = dict()
         annotations = elem.findall(".//annotation")
@@ -231,8 +235,21 @@ def _parse_elem(self, elem: ET.Element) -> dict:
 
         for child in elem:
             if child.tag == "passage":
-                elem_d[child.tag].append({c.tag: html.escape(" ".join(list(filter(
-                    lambda item: item, [t.strip('\n') for t in c.itertext()])))) for c in child})
+                elem_d[child.tag].append(
+                    {
+                        c.tag: html.escape(
+                            " ".join(
+                                list(
+                                    filter(
+                                        lambda item: item,
+                                        [t.strip("\n") for t in c.itertext()],
+                                    )
+                                )
+                            )
+                        )
+                        for c in child
+                    }
+                )
 
             elif child.tag == "id":
                 elem_d[child.tag] = html.escape(child.text)
@@ -243,11 +260,10 @@ def _parse_elem(self, elem: ET.Element) -> dict:
             passages[infon] = passage
 
         elem_d["passages"] = passages
-        elem_d.pop('passage', None)
+        elem_d.pop("passage", None)
 
         for a in annotations:
-            elem_d["entities"].append(
-                self._process_annot(a, elem_d["passages"]))
+            elem_d["entities"].append(self._process_annot(a, elem_d["passages"]))
 
         return elem_d
 
@@ -261,31 +277,35 @@ def _generate_examples(self, filepath, split):
                 row = self._parse_elem(elem)
                 uid += 1
                 passages = row["passages"]
-                yield uid,  {
+                yield uid, {
                     "id": str(uid),
                     "passages": [
                         {
                             "id": str(uuid.uuid4()),
                             "type": "title",
                             "text": [passages["title"]["text"]],
-                            "offsets": [[
-                                int(passages["title"]["offset"]),
-                                int(passages["title"]["offset"]) +
-                                len(passages["title"]["text"])
-                            ]],
+                            "offsets": [
+                                [
+                                    int(passages["title"]["offset"]),
+                                    int(passages["title"]["offset"])
+                                    + len(passages["title"]["text"]),
+                                ]
+                            ],
                         },
                         {
                             "id": str(uuid.uuid4()),
                             "type": "abstract",
                             "text": [passages["abstract"]["text"]],
-                            "offsets": [[
-                                int(passages["abstract"]["offset"]),
-                                int(passages["abstract"]["offset"]) +
-                                len(passages["abstract"]["text"])
-                            ]],
-                        }
+                            "offsets": [
+                                [
+                                    int(passages["abstract"]["offset"]),
+                                    int(passages["abstract"]["offset"])
+                                    + len(passages["abstract"]["text"]),
+                                ]
+                            ],
+                        },
                     ],
-                    "entities": [self._get_entities(a) for a in row["entities"]]
+                    "entities": [self._get_entities(a) for a in row["entities"]],
                 }
 
         elif self.config.schema == "bigbio_kb":
@@ -294,7 +314,7 @@ def _generate_examples(self, filepath, split):
                 row = self._parse_elem(elem)
                 uid += 1
                 passages = row["passages"]
-                yield uid,  {
+                yield uid, {
                     "id": str(uid),
                     "document_id": str(uuid.uuid4()),
                     "passages": [
@@ -302,26 +322,29 @@ def _generate_examples(self, filepath, split):
                             "id": str(uuid.uuid4()),
                             "type": "title",
                             "text": [passages["title"]["text"]],
-                            "offsets": [[
-                                int(passages["title"]["offset"]),
-                                int(passages["title"]["offset"]) +
-                                len(passages["title"]
-                                    ["text"])
-                            ]],
+                            "offsets": [
+                                [
+                                    int(passages["title"]["offset"]),
+                                    int(passages["title"]["offset"])
+                                    + len(passages["title"]["text"]),
+                                ]
+                            ],
                         },
                         {
                             "id": str(uuid.uuid4()),
                             "type": "abstract",
                             "text": [passages["abstract"]["text"]],
-                            "offsets": [[
-                                int(passages["abstract"]["offset"]),
-                                int(passages["abstract"]["offset"]) +
-                                len(passages["abstract"]["text"])
-                            ]],
-                        }
+                            "offsets": [
+                                [
+                                    int(passages["abstract"]["offset"]),
+                                    int(passages["abstract"]["offset"])
+                                    + len(passages["abstract"]["text"]),
+                                ]
+                            ],
+                        },
                     ],
                     "entities": [self._get_entities(a) for a in row["entities"]],
                     "relations": [],
                     "events": [],
-                    "coreferences": []
+                    "coreferences": [],
                 }
diff --git a/bigbio/biodatasets/codiesp/codiesp.py b/bigbio/biodatasets/codiesp/codiesp.py
index aea9c7860..65671fcd8 100644
--- a/bigbio/biodatasets/codiesp/codiesp.py
+++ b/bigbio/biodatasets/codiesp/codiesp.py
@@ -38,7 +38,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.DOCUMENT_INDEXING, Tags.PROCEDURE]
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cord_ner/cord_ner.py b/bigbio/biodatasets/cord_ner/cord_ner.py
index 38f956daa..5457155d7 100644
--- a/bigbio/biodatasets/cord_ner/cord_ner.py
+++ b/bigbio/biodatasets/cord_ner/cord_ner.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.DISEASE, Tags.CHEMICAL, Tags.COVID, Tags.ORGANISM]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ctebmsp/ctebmsp.py b/bigbio/biodatasets/ctebmsp/ctebmsp.py
index 42c23ef23..f5a3fc2b8 100644
--- a/bigbio/biodatasets/ctebmsp/ctebmsp.py
+++ b/bigbio/biodatasets/ctebmsp/ctebmsp.py
@@ -34,7 +34,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.ANATOMY, Tags.CHEMICAL, Tags.DISEASE, Tags.PROCEDURE]
 _LANGUAGES = [Lang.ES]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ddi_corpus/ddi_corpus.py b/bigbio/biodatasets/ddi_corpus/ddi_corpus.py
index 970cdbb6b..7ff25476b 100644
--- a/bigbio/biodatasets/ddi_corpus/ddi_corpus.py
+++ b/bigbio/biodatasets/ddi_corpus/ddi_corpus.py
@@ -30,7 +30,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DDI, Tags.DRUG]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py b/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py
index 9ae958463..8dcc4ac1c 100644
--- a/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py
+++ b/bigbio/biodatasets/diann_iber_eval/diann_iber_eval.py
@@ -30,7 +30,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DOCUMENT_INDEXING, Tags.DISEASE]
 _LANGUAGES = [Lang.EN, Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/distemist/distemist.py b/bigbio/biodatasets/distemist/distemist.py
index 798c568a0..1471653f4 100644
--- a/bigbio/biodatasets/distemist/distemist.py
+++ b/bigbio/biodatasets/distemist/distemist.py
@@ -24,7 +24,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/ebm_pico/ebm_pico.py b/bigbio/biodatasets/ebm_pico/ebm_pico.py
index f20a3379d..0abb19048 100644
--- a/bigbio/biodatasets/ebm_pico/ebm_pico.py
+++ b/bigbio/biodatasets/ebm_pico/ebm_pico.py
@@ -29,7 +29,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.PICO, Tags.POS]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
@@ -66,7 +66,9 @@
 
 _LICENSE = Licenses.UNKNOWN
 
-_URLS = {_DATASETNAME: "https://github.com/bepnye/EBM-NLP/raw/master/ebm_nlp_2_00.tar.gz"}
+_URLS = {
+    _DATASETNAME: "https://github.com/bepnye/EBM-NLP/raw/master/ebm_nlp_2_00.tar.gz"
+}
 
 _SUPPORTED_TASKS = [Tasks.NAMED_ENTITY_RECOGNITION]
 
@@ -139,7 +141,9 @@ def _partition(alist, indices):
 
             for _indices in multiple_indices:
                 high_level_type = LABEL_DECODERS["starting_spans"][annotation_type][1]
-                fine_grained_type = LABEL_DECODERS["hierarchical_labels"][annotation_type][annotations[_indices[0]]]
+                fine_grained_type = LABEL_DECODERS["hierarchical_labels"][
+                    annotation_type
+                ][annotations[_indices[0]]]
                 annotation_text = " ".join([tokenized[ind] for ind in _indices])
 
                 char_start = document_content.find(annotation_text)
@@ -222,7 +226,9 @@ def _split_generators(self, dl_manager) -> List[datasets.SplitGenerator]:
         data_dir = dl_manager.download_and_extract(urls)
 
         documents_folder = Path(data_dir) / "ebm_nlp_2_00" / "documents"
-        annotations_folder = Path(data_dir) / "ebm_nlp_2_00" / "annotations" / "aggregated"
+        annotations_folder = (
+            Path(data_dir) / "ebm_nlp_2_00" / "annotations" / "aggregated"
+        )
         return [
             datasets.SplitGenerator(
                 name=datasets.Split.TRAIN,
@@ -242,7 +248,9 @@ def _split_generators(self, dl_manager) -> List[datasets.SplitGenerator]:
             ),
         ]
 
-    def _generate_examples(self, documents_folder, annotations_folder, split_folder: str) -> Tuple[int, Dict]:
+    def _generate_examples(
+        self, documents_folder, annotations_folder, split_folder: str
+    ) -> Tuple[int, Dict]:
         """Yields examples as (key, example) tuples."""
         annotation_types = ["interventions", "outcomes", "participants"]
 
@@ -265,11 +273,15 @@ def _generate_examples(self, documents_folder, annotations_folder, split_folder:
                     with open(
                         f"{annotations_folder}/hierarchical_labels/{annotation_type}/{split_folder}/{document}"
                     ) as fp:
-                        annotation_dict[annotation_type] = [int(x) for x in fp.read().splitlines()]
+                        annotation_dict[annotation_type] = [
+                            int(x) for x in fp.read().splitlines()
+                        ]
                 except OSError:
                     annotation_dict[annotation_type] = []
 
-            ents = _get_entities_pico(annotation_dict, tokenized=tokenized, document_content=document_content)
+            ents = _get_entities_pico(
+                annotation_dict, tokenized=tokenized, document_content=document_content
+            )
 
             if self.config.schema == "source":
 
@@ -280,7 +292,9 @@ def _generate_examples(self, documents_folder, annotations_folder, split_folder:
                         {
                             "text": ent["annotation_text"],
                             "annotation_type": ent["high_level_annotation_type"],
-                            "fine_grained_annotation_type": ent["fine_grained_annotation_type"],
+                            "fine_grained_annotation_type": ent[
+                                "fine_grained_annotation_type"
+                            ],
                             "start": ent["char_start"],
                             "end": ent["char_end"],
                         }
diff --git a/bigbio/biodatasets/ehr_rel/ehr_rel.py b/bigbio/biodatasets/ehr_rel/ehr_rel.py
index 2ad2f965a..f9b0967e8 100644
--- a/bigbio/biodatasets/ehr_rel/ehr_rel.py
+++ b/bigbio/biodatasets/ehr_rel/ehr_rel.py
@@ -31,7 +31,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.CONCEPT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/essai/essai.py b/bigbio/biodatasets/essai/essai.py
index 289055a63..aab446381 100644
--- a/bigbio/biodatasets/essai/essai.py
+++ b/bigbio/biodatasets/essai/essai.py
@@ -9,7 +9,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.NEGATION, Tags.SPECULATION, Tags.DISEASE, Tags.PROCEDURE]
 _LANGUAGES = [Lang.FR]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/euadr/euadr.py b/bigbio/biodatasets/euadr/euadr.py
index e68a1feb1..6923b5ad5 100644
--- a/bigbio/biodatasets/euadr/euadr.py
+++ b/bigbio/biodatasets/euadr/euadr.py
@@ -7,7 +7,15 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [
+    Tags.ADR,
+    Tags.DRUG,
+    Tags.GENE,
+    Tags.DISEASE,
+    Tags.VARIANT,
+    Tags.NEGATION,
+    Tags.SPECULATION,
+]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/evidence_inference/evidence_inference.py b/bigbio/biodatasets/evidence_inference/evidence_inference.py
index e21ce4f47..d17594ca8 100644
--- a/bigbio/biodatasets/evidence_inference/evidence_inference.py
+++ b/bigbio/biodatasets/evidence_inference/evidence_inference.py
@@ -35,7 +35,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.PROCEDURE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/genetag/genetag.py b/bigbio/biodatasets/genetag/genetag.py
index bfe13bf53..2faf3558e 100644
--- a/bigbio/biodatasets/genetag/genetag.py
+++ b/bigbio/biodatasets/genetag/genetag.py
@@ -32,7 +32,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py b/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py
index 81c833687..f010eb3a2 100644
--- a/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py
+++ b/bigbio/biodatasets/genia_relation_corpus/genia_relation_corpus.py
@@ -34,7 +34,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.PART_OF]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py b/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py
index 7516e830d..66b55cf8e 100644
--- a/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py
+++ b/bigbio/biodatasets/genia_term_corpus/genia_term_corpus.py
@@ -31,7 +31,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.CELL, Tags.ANATOMY, Tags.TISSUE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/gnormplus/gnormplus.py b/bigbio/biodatasets/gnormplus/gnormplus.py
index 28d16d360..fc1a2367b 100644
--- a/bigbio/biodatasets/gnormplus/gnormplus.py
+++ b/bigbio/biodatasets/gnormplus/gnormplus.py
@@ -27,7 +27,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = []
+_TAGS = [Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py b/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py
index 73439fe04..973bf970a 100644
--- a/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py
+++ b/bigbio/biodatasets/hallmarks_of_cancer/hallmarks_of_cancer.py
@@ -21,7 +21,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.CANCER]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
@@ -66,7 +66,7 @@
 
 _URLs = {
     "corpus": "https://github.com/sb895/Hallmarks-of-Cancer/archive/refs/heads/master.zip",
-    "split_indices": "https://microsoft.github.io/BLURB/sample_code/data_generation.tar.gz"    
+    "split_indices": "https://microsoft.github.io/BLURB/sample_code/data_generation.tar.gz",
 }
 
 _SUPPORTED_TASKS = [Tasks.TEXT_CLASSIFICATION]
@@ -74,17 +74,17 @@
 _BIGBIO_VERSION = "1.0.0"
 
 _CLASS_NAMES = [
-    'evading growth suppressors',
-    'tumor promoting inflammation',
-    'enabling replicative immortality',
-    'cellular energetics',
-    'resisting cell death',
-    'activating invasion and metastasis',
-    'genomic instability and mutation',
-    'none',
-    'inducing angiogenesis',
-    'sustaining proliferative signaling',
-    'avoiding immune destruction'
+    "evading growth suppressors",
+    "tumor promoting inflammation",
+    "enabling replicative immortality",
+    "cellular energetics",
+    "resisting cell death",
+    "activating invasion and metastasis",
+    "genomic instability and mutation",
+    "none",
+    "inducing angiogenesis",
+    "sustaining proliferative signaling",
+    "avoiding immune destruction",
 ]
 
 
@@ -144,21 +144,24 @@ def _split_generators(self, dl_manager):
                 name=datasets.Split.TRAIN,
                 gen_kwargs={
                     "corpuspath": Path(data_dir["corpus"]),
-                    "indicespath": Path(data_dir["split_indices"]) / "data_generation/indexing/HoC/train_pmid.tsv"                
+                    "indicespath": Path(data_dir["split_indices"])
+                    / "data_generation/indexing/HoC/train_pmid.tsv",
                 },
             ),
             datasets.SplitGenerator(
                 name=datasets.Split.TEST,
                 gen_kwargs={
                     "corpuspath": Path(data_dir["corpus"]),
-                    "indicespath": Path(data_dir["split_indices"]) / "data_generation/indexing/HoC/test_pmid.tsv"
+                    "indicespath": Path(data_dir["split_indices"])
+                    / "data_generation/indexing/HoC/test_pmid.tsv",
                 },
             ),
             datasets.SplitGenerator(
                 name=datasets.Split.VALIDATION,
                 gen_kwargs={
                     "corpuspath": Path(data_dir["corpus"]),
-                    "indicespath": Path(data_dir["split_indices"]) / "data_generation/indexing/HoC/dev_pmid.tsv"              
+                    "indicespath": Path(data_dir["split_indices"])
+                    / "data_generation/indexing/HoC/dev_pmid.tsv",
                 },
             ),
         ]
@@ -184,13 +187,15 @@ def _generate_examples(self, corpuspath: Path, indicespath: Path):
                 sentence, label = example_pair
 
                 label = label.strip()
-                
+
                 if label == "":
                     label = "none"
 
                 multi_labels = [m_label.strip() for m_label in label.split("AND")]
                 unique_multi_labels = {
-                    m_label.split("--")[0].lower().lstrip() for m_label in multi_labels if m_label != "NULL"
+                    m_label.split("--")[0].lower().lstrip()
+                    for m_label in multi_labels
+                    if m_label != "NULL"
                 }
 
                 arrow_file_unique_key = 100 * document_index + example_index
diff --git a/bigbio/biodatasets/hprd50/hprd50.py b/bigbio/biodatasets/hprd50/hprd50.py
index 63de60017..834bc1c5d 100644
--- a/bigbio/biodatasets/hprd50/hprd50.py
+++ b/bigbio/biodatasets/hprd50/hprd50.py
@@ -42,7 +42,7 @@
 from bigbio.utils.license import Licenses
 
 # TODO: Add BibTeX citation
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.PPI]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/iepa/iepa.py b/bigbio/biodatasets/iepa/iepa.py
index 157893562..be945fb60 100644
--- a/bigbio/biodatasets/iepa/iepa.py
+++ b/bigbio/biodatasets/iepa/iepa.py
@@ -33,7 +33,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.DRUG, Tags.DDI]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/jnlpba/jnlpba.py b/bigbio/biodatasets/jnlpba/jnlpba.py
index a10a42981..9e03eaea1 100644
--- a/bigbio/biodatasets/jnlpba/jnlpba.py
+++ b/bigbio/biodatasets/jnlpba/jnlpba.py
@@ -29,7 +29,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.CELL]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/linnaeus/linnaeus.py b/bigbio/biodatasets/linnaeus/linnaeus.py
index 4a079d1d9..805188879 100644
--- a/bigbio/biodatasets/linnaeus/linnaeus.py
+++ b/bigbio/biodatasets/linnaeus/linnaeus.py
@@ -35,7 +35,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/lll/lll.py b/bigbio/biodatasets/lll/lll.py
index ccc4eca80..560185a5f 100644
--- a/bigbio/biodatasets/lll/lll.py
+++ b/bigbio/biodatasets/lll/lll.py
@@ -39,7 +39,7 @@
 from bigbio.utils.constants import BigBioValues, Lang, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mantra_gsc/mantra_gsc.py b/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
index 0db20bd50..cf572db03 100644
--- a/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
+++ b/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
@@ -25,7 +25,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.DISEASE, Tags.PROCEDURE]
 _LANGUAGES = [Lang.EN, Lang.FR, Lang.DE, Lang.NL, Lang.ES]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mayosrs/mayosrs.py b/bigbio/biodatasets/mayosrs/mayosrs.py
index 160a66668..e0b63b87e 100644
--- a/bigbio/biodatasets/mayosrs/mayosrs.py
+++ b/bigbio/biodatasets/mayosrs/mayosrs.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CONCEPT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/med_qa/med_qa.py b/bigbio/biodatasets/med_qa/med_qa.py
index 4cdbc1d96..5e000263c 100644
--- a/bigbio/biodatasets/med_qa/med_qa.py
+++ b/bigbio/biodatasets/med_qa/med_qa.py
@@ -32,7 +32,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.MULTIPLE_CHOICE, Tags.ABSTRACTIVE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/meddialog/meddialog.py b/bigbio/biodatasets/meddialog/meddialog.py
index 4d0e95b64..ee647d081 100644
--- a/bigbio/biodatasets/meddialog/meddialog.py
+++ b/bigbio/biodatasets/meddialog/meddialog.py
@@ -25,7 +25,7 @@
 
 _DATASETNAME = "meddialog"
 
-_TAGS = []
+_TAGS = [Tags.DIALOGUE]
 _LANGUAGES = [Lang.EN, Lang.ZH]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/meddocan/meddocan.py b/bigbio/biodatasets/meddocan/meddocan.py
index e1fb393d1..d2dc14e9b 100644
--- a/bigbio/biodatasets/meddocan/meddocan.py
+++ b/bigbio/biodatasets/meddocan/meddocan.py
@@ -32,7 +32,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.ANONYMIZATION]
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/medhop/medhop.py b/bigbio/biodatasets/medhop/medhop.py
index 96c926399..196490081 100644
--- a/bigbio/biodatasets/medhop/medhop.py
+++ b/bigbio/biodatasets/medhop/medhop.py
@@ -23,7 +23,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.MULTIPLE_CHOICE, Tags.MRC]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/medical_data/medical_data.py b/bigbio/biodatasets/medical_data/medical_data.py
index 80ddfdef7..48929faa6 100644
--- a/bigbio/biodatasets/medical_data/medical_data.py
+++ b/bigbio/biodatasets/medical_data/medical_data.py
@@ -24,7 +24,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DRUG, Tags.SENTIMENT_ANALYSIS]
 _LANGUAGES = [Lang.EN]
 _LOCAL = True
 _CITATION = """\
diff --git a/bigbio/biodatasets/mediqa_qa/mediqa_qa.py b/bigbio/biodatasets/mediqa_qa/mediqa_qa.py
index 1c26254e7..5af9b45b2 100644
--- a/bigbio/biodatasets/mediqa_qa/mediqa_qa.py
+++ b/bigbio/biodatasets/mediqa_qa/mediqa_qa.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.FACTOID, Tags.DISEASE, Tags.DRUG]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/medmentions/medmentions.py b/bigbio/biodatasets/medmentions/medmentions.py
index a1322f7e1..9c9746635 100644
--- a/bigbio/biodatasets/medmentions/medmentions.py
+++ b/bigbio/biodatasets/medmentions/medmentions.py
@@ -46,7 +46,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.CHEMICAL, Tags.ORGANISM]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/utils/resources/tags.json b/bigbio/utils/resources/tags.json
index f2daadbbb..c4e07a69c 100644
--- a/bigbio/utils/resources/tags.json
+++ b/bigbio/utils/resources/tags.json
@@ -8,27 +8,37 @@
   "SPECIES" : "Species",
   "GENE" : "Gene, proteins, gene products, ...",
   "DISEASE" : "Disease",
+  "DRUG" : "Drug",
   "CHEMICAL" : "Chemical",
-  "UMLS" : "Unified Medical Language System",
   "COVID" : "Coronavirus disease 2019 (COVID-19)",
   "LEXICAL" : "Lexical data (e.g. word, verbs,...)",
-  "DECS" : "Descriptores en Ciencias de la Salud",
-  "QA_YESNO" : "QA with yes no answer",
-  "QA_FACTOID" : "QA with factoid answer",
-  "QA_LIST": "QA with list of factoid answer",
-  "QA_SUMMARY_ANSWER" : "QA with abstractive summary answer",
-  "QA_HOW" : "`How` question",
-  "QA_WHY" : "`Why` question",
+  "YESNO" : "QA with yes no answer",
+  "HOW" : "`How` question",
+  "WHY" : "`Why` question",
+  "FACTOID" : "QA with factoid answer",
+  "FACTOIND_LIST": "QA with list of factoid answer",
+  "ABSTRACTIVE" : "Abstractive summary/answer",
+  "EXTRACTIVE" : "Extractive summary/answer",
+  "CLOZE_TEST" : "Cloze test",
   "GRANT" : "Grants data",
   "PPI" : "Protein-protein interaction",
-  "QA_CLOZE" : "Cloze test",
   "MRC" : "Machine Reading Comprehension",
-  "QA_MULTIPLE_CHOICE" : "QA with multiple choice",
+  "MULTIPLE_CHOICE" : "QA with multiple choice",
   "NEGATION" : "Negation",
   "SPECULATION" : "Speculation",
   "EPIGENETICS" : "Epigenetics",
   "PART_OF" : "Part-of relations",
   "CANCER" : "Cancer",
-  "PATHWAY" : "Pathway",
-  "MESH" : "Medical Subject Headings (MeSH)"
+  "PATHWAY_CURATION" : "Pathway curation",
+  "DOCUMENT_INDEXING" : "Document indexing",
+  "ADR" : "Adverse Drug Reaction",
+  "POS" : "Part of Speech Tagging",
+  "PICO" : "(P)articipants, (I)nterventions, and (O)utcomes",
+  "DDI" : "Drug-drug interaction",
+  "CONCEPT" : "Concept, Multi-word expression (MWE)",
+  "SENTENCE" : "Sentence",
+  "PROCEDURE" : "Procedure, treatment",
+  "DIALOGUE" : "Dialogue",
+  "ANONYMIZATION" : "Anonymizatio (De-identification)"
+  "SENTIMENT_ANALYSIS" : "Sentiment analysis"
 }

From 9ebbdf4d2cfd77f2a26c7a4b526751a0541fbb80 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 13:09:42 +0200
Subject: [PATCH 05/20] new tags

---
 bigbio/utils/resources/tags.json | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/bigbio/utils/resources/tags.json b/bigbio/utils/resources/tags.json
index c4e07a69c..2dcc382e1 100644
--- a/bigbio/utils/resources/tags.json
+++ b/bigbio/utils/resources/tags.json
@@ -2,6 +2,7 @@
   "SOCIAL_MEDIA" : "Social media",
   "ANATOMY" : "Anatomy",
   "ORGANISM" : "Organism",
+  "ORGAN" : "Organ", 
   "VARIANT" : "Variant/Mutation",
   "TISSUE" : "Tissue",
   "CELL" : "Cells and/or cell lines",
@@ -39,6 +40,8 @@
   "SENTENCE" : "Sentence",
   "PROCEDURE" : "Procedure, treatment",
   "DIALOGUE" : "Dialogue",
-  "ANONYMIZATION" : "Anonymizatio (De-identification)"
-  "SENTIMENT_ANALYSIS" : "Sentiment analysis"
+  "ANONYMIZATION" : "Anonymizatio (De-identification)",
+  "SENTIMENT_ANALYSIS" : "Sentiment analysis",
+  "MIRNA" : "miRNA",
+  "ABBREVIATION" : "Abbreviation"
 }

From 02deb9a37a62ecb926773c5cf0b2ecf0a2cac1c2 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 13:10:50 +0200
Subject: [PATCH 06/20] add tags

---
 bigbio/biodatasets/medal/medal.py                 | 15 +++++++++------
 bigbio/biodatasets/meqsum/meqsum.py               |  2 +-
 bigbio/biodatasets/minimayosrs/minimayosrs.py     |  2 +-
 bigbio/biodatasets/mlee/mlee.py                   |  2 +-
 bigbio/biodatasets/msh_wsd/msh_wsd.py             |  2 +-
 bigbio/biodatasets/muchmore/muchmore.py           |  2 +-
 .../biodatasets/multi_xscience/multi_xscience.py  |  2 +-
 .../mutation_finder/mutation_finder.py            |  2 +-
 bigbio/biodatasets/nagel/nagel.py                 |  2 +-
 bigbio/biodatasets/ncbi_disease/ncbi_disease.py   |  2 +-
 bigbio/biodatasets/nlm_gene/nlm_gene.py           |  2 +-
 bigbio/biodatasets/nlm_wsd/nlm_wsd.py             |  2 +-
 bigbio/biodatasets/nlmchem/nlmchem.py             |  2 +-
 .../ntcir_13_medweb/ntcir_13_medweb.py            |  2 +-
 bigbio/biodatasets/osiris/osiris.py               |  2 +-
 bigbio/biodatasets/pcr/pcr.py                     |  2 +-
 bigbio/biodatasets/pdr/pdr.py                     |  2 +-
 17 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/bigbio/biodatasets/medal/medal.py b/bigbio/biodatasets/medal/medal.py
index 03df40fc3..0ceeb8cb4 100644
--- a/bigbio/biodatasets/medal/medal.py
+++ b/bigbio/biodatasets/medal/medal.py
@@ -31,7 +31,7 @@
 
 logger = datasets.logging.get_logger(__name__)
 
-_TAGS = []
+_TAGS = [Tags.ABBREVIATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
@@ -74,10 +74,11 @@
 
 _BIGBIO_VERSION = "1.0.0"
 
+
 class MedalDataset(datasets.GeneratorBasedBuilder):
     """The Repository for Medical Dataset for Abbreviation Disambiguation for Natural Language Understanding (MeDAL) is
-a large medical text dataset curated for abbreviation disambiguation, designed for natural language understanding
-pre-training in the medical domain."""
+    a large medical text dataset curated for abbreviation disambiguation, designed for natural language understanding
+    pre-training in the medical domain."""
 
     SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
     BIGBIO_VERSION = datasets.Version(_BIGBIO_VERSION)
@@ -124,7 +125,9 @@ def _info(self) -> datasets.DatasetInfo:
             citation=_CITATION,
         )
 
-    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+    def _split_generators(
+        self, dl_manager: datasets.DownloadManager
+    ) -> List[datasets.SplitGenerator]:
         """Returns SplitGenerators."""
 
         urls = _URLS
@@ -169,7 +172,7 @@ def _generate_offsets(self, text, location):
 
         Returns
         -------
-        dict 
+        dict
             "word": str,
             "offsets": tuple (int, int)
         """
@@ -179,7 +182,7 @@ def _generate_offsets(self, text, location):
         offset_end = offset_start + len(word)
 
         # return word and offsets
-        return {"word":word, "offsets":(offset_start, offset_end)}
+        return {"word": word, "offsets": (offset_start, offset_end)}
 
     def _generate_examples(self, filepath, split: str) -> Tuple[int, Dict]:
         """Yields examples as (key, example) tuples."""
diff --git a/bigbio/biodatasets/meqsum/meqsum.py b/bigbio/biodatasets/meqsum/meqsum.py
index 21fe7f58c..a2a3d8bec 100644
--- a/bigbio/biodatasets/meqsum/meqsum.py
+++ b/bigbio/biodatasets/meqsum/meqsum.py
@@ -33,7 +33,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.ABSTRACTIVE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/minimayosrs/minimayosrs.py b/bigbio/biodatasets/minimayosrs/minimayosrs.py
index f8f095bbe..cd2eba509 100644
--- a/bigbio/biodatasets/minimayosrs/minimayosrs.py
+++ b/bigbio/biodatasets/minimayosrs/minimayosrs.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CONCEPT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/mlee/mlee.py b/bigbio/biodatasets/mlee/mlee.py
index 478079624..5582f2193 100644
--- a/bigbio/biodatasets/mlee/mlee.py
+++ b/bigbio/biodatasets/mlee/mlee.py
@@ -32,7 +32,7 @@
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.DRUG, Tags.CELL, Tags.ORGAN, Tags.TISSUE, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/msh_wsd/msh_wsd.py b/bigbio/biodatasets/msh_wsd/msh_wsd.py
index 2195106ac..b47656330 100644
--- a/bigbio/biodatasets/msh_wsd/msh_wsd.py
+++ b/bigbio/biodatasets/msh_wsd/msh_wsd.py
@@ -43,7 +43,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.ABBREVIATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/muchmore/muchmore.py b/bigbio/biodatasets/muchmore/muchmore.py
index 3ae9d047d..bc5d1335e 100644
--- a/bigbio/biodatasets/muchmore/muchmore.py
+++ b/bigbio/biodatasets/muchmore/muchmore.py
@@ -76,7 +76,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.POS]
 _LANGUAGES = [Lang.EN, Lang.DE]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/multi_xscience/multi_xscience.py b/bigbio/biodatasets/multi_xscience/multi_xscience.py
index 6be1347be..a5f9fcd3e 100644
--- a/bigbio/biodatasets/multi_xscience/multi_xscience.py
+++ b/bigbio/biodatasets/multi_xscience/multi_xscience.py
@@ -24,7 +24,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.ABSTRACTIVE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/mutation_finder/mutation_finder.py b/bigbio/biodatasets/mutation_finder/mutation_finder.py
index 5dc113a7c..e14b715a9 100644
--- a/bigbio/biodatasets/mutation_finder/mutation_finder.py
+++ b/bigbio/biodatasets/mutation_finder/mutation_finder.py
@@ -23,7 +23,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = []
+_TAGS = [Tags.VARIANT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/nagel/nagel.py b/bigbio/biodatasets/nagel/nagel.py
index fd8a05f68..0f5990ff7 100644
--- a/bigbio/biodatasets/nagel/nagel.py
+++ b/bigbio/biodatasets/nagel/nagel.py
@@ -26,7 +26,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = []
+_TAGS = [Tags.MUTATION, Tags.GENE, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ncbi_disease/ncbi_disease.py b/bigbio/biodatasets/ncbi_disease/ncbi_disease.py
index 1efee20e5..c2b1d7487 100644
--- a/bigbio/biodatasets/ncbi_disease/ncbi_disease.py
+++ b/bigbio/biodatasets/ncbi_disease/ncbi_disease.py
@@ -29,7 +29,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/nlm_gene/nlm_gene.py b/bigbio/biodatasets/nlm_gene/nlm_gene.py
index 1a6c0e06f..d084ad477 100644
--- a/bigbio/biodatasets/nlm_gene/nlm_gene.py
+++ b/bigbio/biodatasets/nlm_gene/nlm_gene.py
@@ -26,7 +26,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = []
+_TAGS = [Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/nlm_wsd/nlm_wsd.py b/bigbio/biodatasets/nlm_wsd/nlm_wsd.py
index 7437d8df2..3882db161 100644
--- a/bigbio/biodatasets/nlm_wsd/nlm_wsd.py
+++ b/bigbio/biodatasets/nlm_wsd/nlm_wsd.py
@@ -56,7 +56,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.ABBREVIATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/nlmchem/nlmchem.py b/bigbio/biodatasets/nlmchem/nlmchem.py
index e816e3788..10472c244 100644
--- a/bigbio/biodatasets/nlmchem/nlmchem.py
+++ b/bigbio/biodatasets/nlmchem/nlmchem.py
@@ -26,7 +26,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.DOCUMENT_INDEXING]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py b/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
index ff8734739..26e972f20 100644
--- a/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
+++ b/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
@@ -66,7 +66,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.SOCIAL_MEDIA, Tags.SENTIMENT_ANALYSIS]
 _LANGUAGES = [Lang.EN, Lang.ZH, Lang.JA]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/osiris/osiris.py b/bigbio/biodatasets/osiris/osiris.py
index b83262563..19b0872c9 100644
--- a/bigbio/biodatasets/osiris/osiris.py
+++ b/bigbio/biodatasets/osiris/osiris.py
@@ -27,7 +27,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.VARIANT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pcr/pcr.py b/bigbio/biodatasets/pcr/pcr.py
index 28e3987e9..8295b1777 100644
--- a/bigbio/biodatasets/pcr/pcr.py
+++ b/bigbio/biodatasets/pcr/pcr.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pdr/pdr.py b/bigbio/biodatasets/pdr/pdr.py
index 1c7bb9f77..efa60062c 100644
--- a/bigbio/biodatasets/pdr/pdr.py
+++ b/bigbio/biodatasets/pdr/pdr.py
@@ -31,7 +31,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False

From 5d42aebe66ae44456f1f1ed04d0671164ce0fae5 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 13:14:42 +0200
Subject: [PATCH 07/20] add tags

---
 bigbio/biodatasets/mirna/mirna.py | 742 +++++++++++++++---------------
 1 file changed, 380 insertions(+), 362 deletions(-)

diff --git a/bigbio/biodatasets/mirna/mirna.py b/bigbio/biodatasets/mirna/mirna.py
index aa7e72793..44babefe0 100644
--- a/bigbio/biodatasets/mirna/mirna.py
+++ b/bigbio/biodatasets/mirna/mirna.py
@@ -1,366 +1,384 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Datasets Authors and the current dataset script contributor.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import xml.etree.ElementTree as ET
-from typing import Dict, Iterator, List, Tuple
-
-import datasets
-
-from bigbio.utils import schemas
-from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import Lang, Tags, Tasks
+# coding=utf-8
+# Copyright 2022 The HuggingFace Datasets Authors and the current dataset script contributor.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import xml.etree.ElementTree as ET
+from typing import Dict, Iterator, List, Tuple
+
+import datasets
+
+from bigbio.utils import schemas
+from bigbio.utils.configs import BigBioConfig
+from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
-
-_TAGS = []
+
+_TAGS = [Tags.MIRNA, Tags.GENE, Tags.DISEASE, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
-_CITATION = """\
-@Article{Bagewadi2014,
-author={Bagewadi, Shweta
-and Bobi{\'{c}}, Tamara
-and Hofmann-Apitius, Martin
-and Fluck, Juliane
-and Klinger, Roman},
-title={Detecting miRNA Mentions and Relations in Biomedical Literature},
-journal={F1000Research},
-year={2014},
-month={Aug},
-day={28},
-publisher={F1000Research},
-volume={3},
-pages={205-205},
-keywords={MicroRNAs; corpus; prediction algorithms},
-abstract={
-    INTRODUCTION: MicroRNAs (miRNAs) have demonstrated their potential as post-transcriptional
-    gene expression regulators, participating in a wide spectrum of regulatory events such as
-    apoptosis, differentiation, and stress response. Apart from the role of miRNAs in normal
-    physiology, their dysregulation is implicated in a vast array of diseases. Dissection of
-    miRNA-related associations are valuable for contemplating their mechanism in diseases,
-    leading to the discovery of novel miRNAs for disease prognosis, diagnosis, and therapy.
-    MOTIVATION: Apart from databases and prediction tools, miRNA-related information is largely
-    available as unstructured text. Manual retrieval of these associations can be labor-intensive
-    due to steadily growing number of publications. Additionally, most of the published miRNA
-    entity recognition methods are keyword based, further subjected to manual inspection for
-    retrieval of relations. Despite the fact that several databases host miRNA-associations
-    derived from text, lower sensitivity and lack of published details for miRNA entity
-    recognition and associated relations identification has motivated the need for developing
-    comprehensive methods that are freely available for the scientific community. Additionally,
-    the lack of a standard corpus for miRNA-relations has caused difficulty in evaluating the
-    available systems. We propose methods to automatically extract mentions of miRNAs, species,
-    genes/proteins, disease, and relations from scientific literature. Our generated corpora,
-    along with dictionaries, and miRNA regular expression are freely available for academic
-    purposes. To our knowledge, these resources are the most comprehensive developed so far.
-    RESULTS: The identification of specific miRNA mentions reaches a recall of 0.94 and
-    precision of 0.93. Extraction of miRNA-disease and miRNA-gene relations lead to an
-    F1 score of up to 0.76. A comparison of the information extracted by our approach to
-    the databases miR2Disease and miRSel for the extraction of Alzheimer's disease
-    related relations shows the capability of our proposed methods in identifying correct
-    relations with improved sensitivity. The published resources and described methods can
-    help the researchers for maximal retrieval of miRNA-relations and generation of
-    miRNA-regulatory networks. AVAILABILITY: The training and test corpora, annotation
-    guidelines, developed dictionaries, and supplementary files are available at
-    http://www.scai.fraunhofer.de/mirna-corpora.html.
-},
-note={26535109[pmid]},
-note={PMC4602280[pmcid]},
-issn={2046-1402},
-url={https://pubmed.ncbi.nlm.nih.gov/26535109},
-language={eng}
-}
-"""
-
-_DATASETNAME = "mirna"
-
-_DESCRIPTION = """\
-The corpus consists of 301 Medline citations. The documents were screened for
-mentions of miRNA in the abstract text. Gene, disease and miRNA entities were manually
-annotated. The corpus comprises of two separate files, a train and a test set, coming
-from 201 and 100 documents respectively. 
-"""
-
-_HOMEPAGE = "https://www.scai.fraunhofer.de/en/business-research-areas/bioinformatics/downloads/download-mirna-test-corpus.html"
-
-_LICENSE = Licenses.CC_BY_NC_3p0
-
-_BASE = "https://www.scai.fraunhofer.de/content/dam/scai/de/downloads/bioinformatik/miRNA/miRNA-"
-
-_URLs = {
-    "source": {
-        "train": _BASE + "Train-Corpus.xml",
-        "test": _BASE + "Test-Corpus.xml",
-    },
-    "bigbio_kb": {
-        "train": _BASE + "Train-Corpus.xml",
-        "test": _BASE + "Test-Corpus.xml",
-    },
-}
-
-_SUPPORTED_TASKS = [Tasks.NAMED_ENTITY_RECOGNITION, Tasks.NAMED_ENTITY_DISAMBIGUATION]
-_SOURCE_VERSION = "1.0.0"
-_BIGBIO_VERSION = "1.0.0"
-
-
-class miRNADataset(datasets.GeneratorBasedBuilder):
-    """mirna"""
-
-    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
-    BIGBIO_VERSION = datasets.Version(_BIGBIO_VERSION)
-
-    BUILDER_CONFIGS = [
-        BigBioConfig(
-            name="mirna_source",
-            version=SOURCE_VERSION,
-            description="mirna source schema",
-            schema="source",
-            subset_id="mirna",
-        ),
-        BigBioConfig(
-            name="mirna_bigbio_kb",
-            version=BIGBIO_VERSION,
-            description="mirna BigBio schema",
-            schema="bigbio_kb",
-            subset_id="mirna",
-        ),
-    ]
-
-    DEFAULT_CONFIG_NAME = "mirna_source"
-
-    def _info(self):
-
-        if self.config.schema == "source":
-
-            features = datasets.Features(
-                {
-                    "passages": [
-                        {
-                            "document_id": datasets.Value("string"),
-                            "type": datasets.Value("string"),
-                            "text": datasets.Value("string"),
-                            "offset": datasets.Value("int32"),
-                            "entities": [
-                                {
-                                    "id": datasets.Value("string"),
-                                    "offsets": [[datasets.Value("int32")]],
-                                    "text": [datasets.Value("string")],
-                                    "type": datasets.Value("string"),
-                                    "normalized": [
-                                        {
-                                            "db_name": datasets.Value("string"),
-                                            "db_id": datasets.Value("string"),
-                                        }
-                                    ],
-                                }
-                            ],
-                        }
-                    ]
-                }
-            )
-
-        elif self.config.schema == "bigbio_kb":
-            features = schemas.kb_features
-
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=features,
-            supervised_keys=None,
-            homepage=_HOMEPAGE,
-            license=str(_LICENSE),
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-
-        my_urls = _URLs[self.config.schema]
-
-        path_xml_train = dl_manager.download(my_urls["train"])
-        path_xml_test = dl_manager.download(my_urls["test"])
-
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                # These kwargs will be passed to _generate_examples
-                gen_kwargs={
-                    "filepath": path_xml_train,
-                    "split": "train",
-                },
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST,
-                # These kwargs will be passed to _generate_examples
-                gen_kwargs={
-                    "filepath": path_xml_test,
-                    "split": "test",
-                },
-            ),
-        ]
-
-    def _get_passages_and_entities(self, d) -> Tuple[List[Dict], List[List[Dict]]]:
-
-        sentences: List[Dict] = []
-        entities: List[List[Dict]] = []
-        relations: List[List[Dict]] = []
-
-        text_total_length = 0
-
-        po_start = 0
-
-        # Get sentences of the document
-        for _, s in enumerate(d):
-
-            # annotation used only for document indexing
-            if s.attrib["text"] is None or len(s.attrib["text"]) <= 0:
-                continue
-
-            # annotation used only for document indexing
-            if len(s) <= 0:
-                continue
-
-            text_total_length += len(s.attrib["text"]) + 1
-
-            po_end = po_start + len(s.attrib["text"])
-
-            start = po_start
-
-            dp = {
-                "text": s.attrib["text"],
-                "type": "title" if ".s0" in s.attrib["id"] else "abstract",
-                "offsets": [(po_start, po_end)],
-                "offset": 0,  # original offset
-            }
-
-            po_start = po_end + 1
-
-            sentences.append(dp)
-
-            pe = []  # entities
-            re = []  # relations
-
-            # For each entity
-            for a in s:
-
-                # If correspond to a entity
-                if a.tag == "entity":
-
-                    length = len(a.attrib["text"])
-
-                    if a.attrib["text"] is None or length <= 0:
-                        continue
-
-                    # no in-text annotation: only for document indexing
-                    if a.attrib["type"] in ["MeSH_Indexing_Chemical", "OTHER"]:
-                        continue
-
-                    startOffset, endOffset = a.attrib["charOffset"].split("-")
-                    startOffset, endOffset = int(startOffset), int(endOffset)
-
-                    pe.append(
-                        {
-                            "id": a.attrib["id"],
-                            "type": a.attrib["type"],
-                            "text": (a.attrib["text"],),
-                            "offsets": [(start + startOffset, start + endOffset + 1)],
-                            "normalized": [{"db_name": "miRNA-corpus", "db_id": a.attrib["id"]}],
-                        }
-                    )
-
-                # If correspond to relation pair
-                elif a.tag == "pair":
-
-                    re.append(
-                        {
-                            "id": a.attrib["id"],
-                            "type": a.attrib["type"],
-                            "arg1_id": a.attrib["e1"],
-                            "arg2_id": a.attrib["e2"],
-                            "normalized": [],
-                        }
-                    )
-
-            entities.append(pe)
-            relations.append(re)
-
-        return sentences, entities, relations
-
-    def _generate_examples(
-        self,
-        filepath: str,
-        split: str,
-    ) -> Iterator[Tuple[int, Dict]]:
-        """Yields examples as (key, example) tuples."""
-
-        reader = ET.fromstring(open(str(filepath), "r").read())
-
-        if self.config.schema == "source":
-
-            for uid, doc in enumerate(reader):
-
-                sentences, sentences_entities, relations = self._get_passages_and_entities(doc)
-
-                if len(sentences) < 1 or len(sentences_entities) < 1 or len(sentences_entities) != len(sentences):
-                    continue
-
-                for p, pe, re in zip(sentences, sentences_entities, relations):
-
-                    p.pop("offsets")  # BioC has only start for passages offsets
-
-                    p["document_id"] = doc.attrib["id"]
-                    p["entities"] = pe  # BioC has per passage entities
-
-                yield uid, {"passages": sentences}
-
-        elif self.config.schema == "bigbio_kb":
-
-            uid = 0
-
-            for idx, doc in enumerate(reader):
-
-                sentences, sentences_entities, relations = self._get_passages_and_entities(doc)
-
-                if len(sentences) < 1 or len(sentences_entities) < 1 or len(sentences_entities) != len(sentences):
-                    continue
-
-                # global id
-                uid += 1
-
-                # unpack per-sentence entities
-                entities = [e for pe in sentences_entities for e in pe]
-
-                for p in sentences:
-                    p.pop("offset")  # drop original offset
-                    p["text"] = (p["text"],)  # text in sentence is Sequence
-                    p["id"] = uid
-                    uid += 1
-
-                for e in entities:
-                    e["id"] = uid
-                    uid += 1
-
-                # unpack per-sentence relations
-                relations = [r for re in relations for r in re]
-
-                for r in relations:
-                    r["id"] = uid
-                    uid += 1
-
-                yield idx, {
-                    "id": uid,
-                    "document_id": doc.attrib["id"],
-                    "passages": sentences,
-                    "entities": entities,
-                    "events": [],
-                    "coreferences": [],
-                    "relations": relations,
-                }
+_CITATION = """\
+@Article{Bagewadi2014,
+author={Bagewadi, Shweta
+and Bobi{\'{c}}, Tamara
+and Hofmann-Apitius, Martin
+and Fluck, Juliane
+and Klinger, Roman},
+title={Detecting miRNA Mentions and Relations in Biomedical Literature},
+journal={F1000Research},
+year={2014},
+month={Aug},
+day={28},
+publisher={F1000Research},
+volume={3},
+pages={205-205},
+keywords={MicroRNAs; corpus; prediction algorithms},
+abstract={
+    INTRODUCTION: MicroRNAs (miRNAs) have demonstrated their potential as post-transcriptional
+    gene expression regulators, participating in a wide spectrum of regulatory events such as
+    apoptosis, differentiation, and stress response. Apart from the role of miRNAs in normal
+    physiology, their dysregulation is implicated in a vast array of diseases. Dissection of
+    miRNA-related associations are valuable for contemplating their mechanism in diseases,
+    leading to the discovery of novel miRNAs for disease prognosis, diagnosis, and therapy.
+    MOTIVATION: Apart from databases and prediction tools, miRNA-related information is largely
+    available as unstructured text. Manual retrieval of these associations can be labor-intensive
+    due to steadily growing number of publications. Additionally, most of the published miRNA
+    entity recognition methods are keyword based, further subjected to manual inspection for
+    retrieval of relations. Despite the fact that several databases host miRNA-associations
+    derived from text, lower sensitivity and lack of published details for miRNA entity
+    recognition and associated relations identification has motivated the need for developing
+    comprehensive methods that are freely available for the scientific community. Additionally,
+    the lack of a standard corpus for miRNA-relations has caused difficulty in evaluating the
+    available systems. We propose methods to automatically extract mentions of miRNAs, species,
+    genes/proteins, disease, and relations from scientific literature. Our generated corpora,
+    along with dictionaries, and miRNA regular expression are freely available for academic
+    purposes. To our knowledge, these resources are the most comprehensive developed so far.
+    RESULTS: The identification of specific miRNA mentions reaches a recall of 0.94 and
+    precision of 0.93. Extraction of miRNA-disease and miRNA-gene relations lead to an
+    F1 score of up to 0.76. A comparison of the information extracted by our approach to
+    the databases miR2Disease and miRSel for the extraction of Alzheimer's disease
+    related relations shows the capability of our proposed methods in identifying correct
+    relations with improved sensitivity. The published resources and described methods can
+    help the researchers for maximal retrieval of miRNA-relations and generation of
+    miRNA-regulatory networks. AVAILABILITY: The training and test corpora, annotation
+    guidelines, developed dictionaries, and supplementary files are available at
+    http://www.scai.fraunhofer.de/mirna-corpora.html.
+},
+note={26535109[pmid]},
+note={PMC4602280[pmcid]},
+issn={2046-1402},
+url={https://pubmed.ncbi.nlm.nih.gov/26535109},
+language={eng}
+}
+"""
+
+_DATASETNAME = "mirna"
+
+_DESCRIPTION = """\
+The corpus consists of 301 Medline citations. The documents were screened for
+mentions of miRNA in the abstract text. Gene, disease and miRNA entities were manually
+annotated. The corpus comprises of two separate files, a train and a test set, coming
+from 201 and 100 documents respectively. 
+"""
+
+_HOMEPAGE = "https://www.scai.fraunhofer.de/en/business-research-areas/bioinformatics/downloads/download-mirna-test-corpus.html"
+
+_LICENSE = Licenses.CC_BY_NC_3p0
+
+_BASE = "https://www.scai.fraunhofer.de/content/dam/scai/de/downloads/bioinformatik/miRNA/miRNA-"
+
+_URLs = {
+    "source": {
+        "train": _BASE + "Train-Corpus.xml",
+        "test": _BASE + "Test-Corpus.xml",
+    },
+    "bigbio_kb": {
+        "train": _BASE + "Train-Corpus.xml",
+        "test": _BASE + "Test-Corpus.xml",
+    },
+}
+
+_SUPPORTED_TASKS = [Tasks.NAMED_ENTITY_RECOGNITION, Tasks.NAMED_ENTITY_DISAMBIGUATION]
+_SOURCE_VERSION = "1.0.0"
+_BIGBIO_VERSION = "1.0.0"
+
+
+class miRNADataset(datasets.GeneratorBasedBuilder):
+    """mirna"""
+
+    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
+    BIGBIO_VERSION = datasets.Version(_BIGBIO_VERSION)
+
+    BUILDER_CONFIGS = [
+        BigBioConfig(
+            name="mirna_source",
+            version=SOURCE_VERSION,
+            description="mirna source schema",
+            schema="source",
+            subset_id="mirna",
+        ),
+        BigBioConfig(
+            name="mirna_bigbio_kb",
+            version=BIGBIO_VERSION,
+            description="mirna BigBio schema",
+            schema="bigbio_kb",
+            subset_id="mirna",
+        ),
+    ]
+
+    DEFAULT_CONFIG_NAME = "mirna_source"
+
+    def _info(self):
+
+        if self.config.schema == "source":
+
+            features = datasets.Features(
+                {
+                    "passages": [
+                        {
+                            "document_id": datasets.Value("string"),
+                            "type": datasets.Value("string"),
+                            "text": datasets.Value("string"),
+                            "offset": datasets.Value("int32"),
+                            "entities": [
+                                {
+                                    "id": datasets.Value("string"),
+                                    "offsets": [[datasets.Value("int32")]],
+                                    "text": [datasets.Value("string")],
+                                    "type": datasets.Value("string"),
+                                    "normalized": [
+                                        {
+                                            "db_name": datasets.Value("string"),
+                                            "db_id": datasets.Value("string"),
+                                        }
+                                    ],
+                                }
+                            ],
+                        }
+                    ]
+                }
+            )
+
+        elif self.config.schema == "bigbio_kb":
+            features = schemas.kb_features
+
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            supervised_keys=None,
+            homepage=_HOMEPAGE,
+            license=str(_LICENSE),
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+
+        my_urls = _URLs[self.config.schema]
+
+        path_xml_train = dl_manager.download(my_urls["train"])
+        path_xml_test = dl_manager.download(my_urls["test"])
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                # These kwargs will be passed to _generate_examples
+                gen_kwargs={
+                    "filepath": path_xml_train,
+                    "split": "train",
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                # These kwargs will be passed to _generate_examples
+                gen_kwargs={
+                    "filepath": path_xml_test,
+                    "split": "test",
+                },
+            ),
+        ]
+
+    def _get_passages_and_entities(self, d) -> Tuple[List[Dict], List[List[Dict]]]:
+
+        sentences: List[Dict] = []
+        entities: List[List[Dict]] = []
+        relations: List[List[Dict]] = []
+
+        text_total_length = 0
+
+        po_start = 0
+
+        # Get sentences of the document
+        for _, s in enumerate(d):
+
+            # annotation used only for document indexing
+            if s.attrib["text"] is None or len(s.attrib["text"]) <= 0:
+                continue
+
+            # annotation used only for document indexing
+            if len(s) <= 0:
+                continue
+
+            text_total_length += len(s.attrib["text"]) + 1
+
+            po_end = po_start + len(s.attrib["text"])
+
+            start = po_start
+
+            dp = {
+                "text": s.attrib["text"],
+                "type": "title" if ".s0" in s.attrib["id"] else "abstract",
+                "offsets": [(po_start, po_end)],
+                "offset": 0,  # original offset
+            }
+
+            po_start = po_end + 1
+
+            sentences.append(dp)
+
+            pe = []  # entities
+            re = []  # relations
+
+            # For each entity
+            for a in s:
+
+                # If correspond to a entity
+                if a.tag == "entity":
+
+                    length = len(a.attrib["text"])
+
+                    if a.attrib["text"] is None or length <= 0:
+                        continue
+
+                    # no in-text annotation: only for document indexing
+                    if a.attrib["type"] in ["MeSH_Indexing_Chemical", "OTHER"]:
+                        continue
+
+                    startOffset, endOffset = a.attrib["charOffset"].split("-")
+                    startOffset, endOffset = int(startOffset), int(endOffset)
+
+                    pe.append(
+                        {
+                            "id": a.attrib["id"],
+                            "type": a.attrib["type"],
+                            "text": (a.attrib["text"],),
+                            "offsets": [(start + startOffset, start + endOffset + 1)],
+                            "normalized": [
+                                {"db_name": "miRNA-corpus", "db_id": a.attrib["id"]}
+                            ],
+                        }
+                    )
+
+                # If correspond to relation pair
+                elif a.tag == "pair":
+
+                    re.append(
+                        {
+                            "id": a.attrib["id"],
+                            "type": a.attrib["type"],
+                            "arg1_id": a.attrib["e1"],
+                            "arg2_id": a.attrib["e2"],
+                            "normalized": [],
+                        }
+                    )
+
+            entities.append(pe)
+            relations.append(re)
+
+        return sentences, entities, relations
+
+    def _generate_examples(
+        self,
+        filepath: str,
+        split: str,
+    ) -> Iterator[Tuple[int, Dict]]:
+        """Yields examples as (key, example) tuples."""
+
+        reader = ET.fromstring(open(str(filepath), "r").read())
+
+        if self.config.schema == "source":
+
+            for uid, doc in enumerate(reader):
+
+                (
+                    sentences,
+                    sentences_entities,
+                    relations,
+                ) = self._get_passages_and_entities(doc)
+
+                if (
+                    len(sentences) < 1
+                    or len(sentences_entities) < 1
+                    or len(sentences_entities) != len(sentences)
+                ):
+                    continue
+
+                for p, pe, re in zip(sentences, sentences_entities, relations):
+
+                    p.pop("offsets")  # BioC has only start for passages offsets
+
+                    p["document_id"] = doc.attrib["id"]
+                    p["entities"] = pe  # BioC has per passage entities
+
+                yield uid, {"passages": sentences}
+
+        elif self.config.schema == "bigbio_kb":
+
+            uid = 0
+
+            for idx, doc in enumerate(reader):
+
+                (
+                    sentences,
+                    sentences_entities,
+                    relations,
+                ) = self._get_passages_and_entities(doc)
+
+                if (
+                    len(sentences) < 1
+                    or len(sentences_entities) < 1
+                    or len(sentences_entities) != len(sentences)
+                ):
+                    continue
+
+                # global id
+                uid += 1
+
+                # unpack per-sentence entities
+                entities = [e for pe in sentences_entities for e in pe]
+
+                for p in sentences:
+                    p.pop("offset")  # drop original offset
+                    p["text"] = (p["text"],)  # text in sentence is Sequence
+                    p["id"] = uid
+                    uid += 1
+
+                for e in entities:
+                    e["id"] = uid
+                    uid += 1
+
+                # unpack per-sentence relations
+                relations = [r for re in relations for r in re]
+
+                for r in relations:
+                    r["id"] = uid
+                    uid += 1
+
+                yield idx, {
+                    "id": uid,
+                    "document_id": doc.attrib["id"],
+                    "passages": sentences,
+                    "entities": entities,
+                    "events": [],
+                    "coreferences": [],
+                    "relations": relations,
+                }

From 52c561d3ee3170d49a7e867880a9883d24797c2a Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:29:25 +0200
Subject: [PATCH 08/20] new tags

---
 bigbio/utils/resources/tags.json | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bigbio/utils/resources/tags.json b/bigbio/utils/resources/tags.json
index 2dcc382e1..51ed9f1b9 100644
--- a/bigbio/utils/resources/tags.json
+++ b/bigbio/utils/resources/tags.json
@@ -43,5 +43,7 @@
   "ANONYMIZATION" : "Anonymizatio (De-identification)",
   "SENTIMENT_ANALYSIS" : "Sentiment analysis",
   "MIRNA" : "miRNA",
-  "ABBREVIATION" : "Abbreviation"
+  "ABBREVIATION" : "Abbreviation",
+  "FACT_CHECKING" : "Fact-checking",
+  "INTENT" : "Intent"
 }

From 14fff7959cf7625abb58fe05f7517da17c5e6c3f Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:29:33 +0200
Subject: [PATCH 09/20] complete adding tags

---
 bigbio/biodatasets/ask_a_patient/ask_a_patient.py        | 2 +-
 bigbio/biodatasets/pharmaconer/pharmaconer.py            | 2 +-
 bigbio/biodatasets/pho_ner/pho_ner.py                    | 2 +-
 bigbio/biodatasets/pico_extraction/pico_extraction.py    | 2 +-
 bigbio/biodatasets/progene/progene.py                    | 2 +-
 bigbio/biodatasets/psytar/psytar.py                      | 2 +-
 bigbio/biodatasets/pubhealth/pubhealth.py                | 2 +-
 bigbio/biodatasets/pubmed_qa/pubmed_qa.py                | 4 ++--
 bigbio/biodatasets/pubtator_central/pubtator_central.py  | 2 +-
 bigbio/biodatasets/quaero/quaero.py                      | 9 ++++++++-
 bigbio/biodatasets/scai_chemical/scai_chemical.py        | 2 +-
 bigbio/biodatasets/scai_disease/scai_disease.py          | 2 +-
 bigbio/biodatasets/scicite/scicite.py                    | 2 +-
 bigbio/biodatasets/scifact/scifact.py                    | 2 +-
 bigbio/biodatasets/sciq/sciq.py                          | 2 +-
 bigbio/biodatasets/seth_corpus/seth_corpus.py            | 2 +-
 bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py        | 2 +-
 .../swedish_medical_ner/swedish_medical_ner.py           | 2 +-
 bigbio/biodatasets/thomas2011/thomas2011.py              | 2 +-
 bigbio/biodatasets/tmvar_v1/tmvar_v1.py                  | 2 +-
 bigbio/biodatasets/tmvar_v2/tmvar_v2.py                  | 2 +-
 bigbio/biodatasets/tmvar_v3/tmvar_v3.py                  | 2 +-
 bigbio/biodatasets/twadrl/twadrl.py                      | 2 +-
 bigbio/biodatasets/umnsrs/umnsrs.py                      | 2 +-
 bigbio/biodatasets/verspoor_2013/verspoor_2013.py        | 2 +-
 25 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/bigbio/biodatasets/ask_a_patient/ask_a_patient.py b/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
index bd89c5026..53bc81a9c 100644
--- a/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
+++ b/bigbio/biodatasets/ask_a_patient/ask_a_patient.py
@@ -26,7 +26,7 @@
 
 _DATASETNAME = "ask_a_patient"
 
-_TAGS = [Tags.SOCIAL_MEDIA, Tags.ADR]
+_TAGS = [Tags.ADR]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pharmaconer/pharmaconer.py b/bigbio/biodatasets/pharmaconer/pharmaconer.py
index f20fd87f1..ac5aade0b 100644
--- a/bigbio/biodatasets/pharmaconer/pharmaconer.py
+++ b/bigbio/biodatasets/pharmaconer/pharmaconer.py
@@ -34,7 +34,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.GENE, Tags.DOCUMENT_INDEXING]
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/pho_ner/pho_ner.py b/bigbio/biodatasets/pho_ner/pho_ner.py
index 4ae3852d1..32e0e4e02 100644
--- a/bigbio/biodatasets/pho_ner/pho_ner.py
+++ b/bigbio/biodatasets/pho_ner/pho_ner.py
@@ -23,7 +23,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = []
+_TAGS = [Tag.DISEASE, Tag.COVID]
 _LANGUAGES = [Lang.VI]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/pico_extraction/pico_extraction.py b/bigbio/biodatasets/pico_extraction/pico_extraction.py
index ab4c36f25..7fba82aba 100644
--- a/bigbio/biodatasets/pico_extraction/pico_extraction.py
+++ b/bigbio/biodatasets/pico_extraction/pico_extraction.py
@@ -30,7 +30,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.PICO]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/progene/progene.py b/bigbio/biodatasets/progene/progene.py
index f1ce6223a..3456fdd26 100644
--- a/bigbio/biodatasets/progene/progene.py
+++ b/bigbio/biodatasets/progene/progene.py
@@ -25,7 +25,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/psytar/psytar.py b/bigbio/biodatasets/psytar/psytar.py
index e0931739a..fef90eed6 100644
--- a/bigbio/biodatasets/psytar/psytar.py
+++ b/bigbio/biodatasets/psytar/psytar.py
@@ -54,7 +54,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DRUG, Tags.ADR]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/pubhealth/pubhealth.py b/bigbio/biodatasets/pubhealth/pubhealth.py
index 5320c16e6..6d64352e4 100644
--- a/bigbio/biodatasets/pubhealth/pubhealth.py
+++ b/bigbio/biodatasets/pubhealth/pubhealth.py
@@ -31,7 +31,7 @@
 
 logger = datasets.utils.logging.get_logger(__name__)
 
-_TAGS = []
+_TAGS = [Tags.FACT_CHECKING]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/pubmed_qa/pubmed_qa.py b/bigbio/biodatasets/pubmed_qa/pubmed_qa.py
index 4bdf15062..7203b608a 100644
--- a/bigbio/biodatasets/pubmed_qa/pubmed_qa.py
+++ b/bigbio/biodatasets/pubmed_qa/pubmed_qa.py
@@ -27,10 +27,10 @@
 import bigbio.utils.parsing as parsing
 import bigbio.utils.schemas as schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import BigBioValues, Lang, Tasks
+from bigbio.utils.constants import BigBioValues, Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.YESNO, Tags.ABSTRACTIVE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pubtator_central/pubtator_central.py b/bigbio/biodatasets/pubtator_central/pubtator_central.py
index 50048a96f..8ba512d3f 100644
--- a/bigbio/biodatasets/pubtator_central/pubtator_central.py
+++ b/bigbio/biodatasets/pubtator_central/pubtator_central.py
@@ -51,7 +51,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.DISEASE, Tags.CELL, Tags.SPECIES, Tags.VARIANT, Tags.CHEMICAL]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/quaero/quaero.py b/bigbio/biodatasets/quaero/quaero.py
index 29558a115..2d949b028 100644
--- a/bigbio/biodatasets/quaero/quaero.py
+++ b/bigbio/biodatasets/quaero/quaero.py
@@ -9,7 +9,14 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = []
+_TAGS = [
+    Tags.CHEMICAL,
+    Tags.ANATOMY,
+    Tags.DRUG,
+    Tags.SPECIES,
+    Tags.PROCEDURE,
+    Tags.DISEASE,
+]
 _LANGUAGES = [Lang.FR]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/scai_chemical/scai_chemical.py b/bigbio/biodatasets/scai_chemical/scai_chemical.py
index e3c4ef800..2935b9a04 100644
--- a/bigbio/biodatasets/scai_chemical/scai_chemical.py
+++ b/bigbio/biodatasets/scai_chemical/scai_chemical.py
@@ -31,7 +31,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/scai_disease/scai_disease.py b/bigbio/biodatasets/scai_disease/scai_disease.py
index 4b7905d9b..d4bdb3f9c 100644
--- a/bigbio/biodatasets/scai_disease/scai_disease.py
+++ b/bigbio/biodatasets/scai_disease/scai_disease.py
@@ -33,7 +33,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.ADR]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/scicite/scicite.py b/bigbio/biodatasets/scicite/scicite.py
index 3a0f3284b..0626f7b70 100644
--- a/bigbio/biodatasets/scicite/scicite.py
+++ b/bigbio/biodatasets/scicite/scicite.py
@@ -40,7 +40,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.INTENT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/scifact/scifact.py b/bigbio/biodatasets/scifact/scifact.py
index 14a48e21a..c537fcfba 100644
--- a/bigbio/biodatasets/scifact/scifact.py
+++ b/bigbio/biodatasets/scifact/scifact.py
@@ -25,7 +25,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.FACT_CHECKING]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/sciq/sciq.py b/bigbio/biodatasets/sciq/sciq.py
index 54f44e622..eee43620f 100644
--- a/bigbio/biodatasets/sciq/sciq.py
+++ b/bigbio/biodatasets/sciq/sciq.py
@@ -25,7 +25,7 @@
 
 _DATASETNAME = "sciq"
 
-_TAGS = []
+_TAGS = [Tags.MULTIPLE_CHOICE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/seth_corpus/seth_corpus.py b/bigbio/biodatasets/seth_corpus/seth_corpus.py
index 82b84b534..fbf5c754c 100644
--- a/bigbio/biodatasets/seth_corpus/seth_corpus.py
+++ b/bigbio/biodatasets/seth_corpus/seth_corpus.py
@@ -31,7 +31,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.VARIANT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py b/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py
index a6b16123f..3936b2305 100644
--- a/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py
+++ b/bigbio/biodatasets/spl_adr_200db/spl_adr_200db.py
@@ -67,7 +67,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.ADR, Tags.DRUG, Tags.NEGATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py b/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py
index 1ec26aca1..4ece98c1a 100644
--- a/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py
+++ b/bigbio/biodatasets/swedish_medical_ner/swedish_medical_ner.py
@@ -43,7 +43,7 @@
 
 _DATASETNAME = "swedish_medical_ner"
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.DRUG, Tags.ANATOMY]
 _LANGUAGES = [Lang.SV]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/thomas2011/thomas2011.py b/bigbio/biodatasets/thomas2011/thomas2011.py
index d2747c4a8..d55c650a3 100644
--- a/bigbio/biodatasets/thomas2011/thomas2011.py
+++ b/bigbio/biodatasets/thomas2011/thomas2011.py
@@ -53,7 +53,7 @@
 from bigbio.utils.license import CustomLicense
 
 # TODO: Add BibTeX citation
-_TAGS = []
+_TAGS = [Tags.VARIANT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/tmvar_v1/tmvar_v1.py b/bigbio/biodatasets/tmvar_v1/tmvar_v1.py
index f6cb22e59..93c910e86 100644
--- a/bigbio/biodatasets/tmvar_v1/tmvar_v1.py
+++ b/bigbio/biodatasets/tmvar_v1/tmvar_v1.py
@@ -26,7 +26,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.VARIANT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/tmvar_v2/tmvar_v2.py b/bigbio/biodatasets/tmvar_v2/tmvar_v2.py
index b522524b6..a3518bbf1 100644
--- a/bigbio/biodatasets/tmvar_v2/tmvar_v2.py
+++ b/bigbio/biodatasets/tmvar_v2/tmvar_v2.py
@@ -26,7 +26,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.VARIANT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/tmvar_v3/tmvar_v3.py b/bigbio/biodatasets/tmvar_v3/tmvar_v3.py
index 1e2bb9dd4..197a33fc9 100644
--- a/bigbio/biodatasets/tmvar_v3/tmvar_v3.py
+++ b/bigbio/biodatasets/tmvar_v3/tmvar_v3.py
@@ -44,7 +44,7 @@
   copyright = {Creative Commons Attribution 4.0 International}
 }
 """
-_TAGS = []
+_TAGS = [Tags.VARIANT, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/twadrl/twadrl.py b/bigbio/biodatasets/twadrl/twadrl.py
index abfb83a91..1735b2ec1 100644
--- a/bigbio/biodatasets/twadrl/twadrl.py
+++ b/bigbio/biodatasets/twadrl/twadrl.py
@@ -26,7 +26,7 @@
 
 _DATASETNAME = "twadrl"
 
-_TAGS = []
+_TAGS = [Tags.ADR]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/umnsrs/umnsrs.py b/bigbio/biodatasets/umnsrs/umnsrs.py
index 8bb39e554..07f603e87 100644
--- a/bigbio/biodatasets/umnsrs/umnsrs.py
+++ b/bigbio/biodatasets/umnsrs/umnsrs.py
@@ -32,7 +32,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CONCEPT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/verspoor_2013/verspoor_2013.py b/bigbio/biodatasets/verspoor_2013/verspoor_2013.py
index 58f613132..2464a95f8 100644
--- a/bigbio/biodatasets/verspoor_2013/verspoor_2013.py
+++ b/bigbio/biodatasets/verspoor_2013/verspoor_2013.py
@@ -35,7 +35,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.VARIANT, Tags.CANCER]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False

From 71ceed5f1fddd71d5895eb9499983ade88320ab8 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:32:13 +0200
Subject: [PATCH 10/20] ORGANISM is SPECIES, SOCIAL_MEDIA belongs to `source`
 not `subtask`

---
 bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py   | 2 +-
 bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py | 2 +-
 bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py   | 2 +-
 bigbio/biodatasets/cadec/cadec.py                           | 2 +-
 bigbio/biodatasets/cord_ner/cord_ner.py                     | 2 +-
 bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py       | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py b/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py
index c5e0734d5..775c56fc0 100644
--- a/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py
+++ b/bigbio/biodatasets/bionlp_st_2011_id/bionlp_st_2011_id.py
@@ -31,7 +31,7 @@
     Tags.DISEASE,
     Tags.GENE,
     Tags.CHEMICAL,
-    Tags.ORGANISM,
+    Tags.SPECIES,
     Tags.SPECULATION,
     Tags.NEGATION,
 ]
diff --git a/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py b/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py
index 1241b22c5..bc61c02eb 100644
--- a/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py
+++ b/bigbio/biodatasets/bionlp_st_2013_gro/bionlp_st_2013_gro.py
@@ -28,7 +28,7 @@
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
-_TAGS = [Tags.GENE, Tags.ORGANISM, Tags.CELL, Tags.TISSUE]
+_TAGS = [Tags.GENE, Tags.SPECIES, Tags.CELL, Tags.TISSUE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py b/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py
index 8d464b851..f399df666 100644
--- a/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py
+++ b/bigbio/biodatasets/bionlp_st_2019_bb/bionlp_st_2019_bb.py
@@ -27,7 +27,7 @@
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
-_TAGS = [Tags.ORGANISM]
+_TAGS = [Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/cadec/cadec.py b/bigbio/biodatasets/cadec/cadec.py
index c604c0920..f9c604467 100644
--- a/bigbio/biodatasets/cadec/cadec.py
+++ b/bigbio/biodatasets/cadec/cadec.py
@@ -38,7 +38,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = [Tags.SOCIAL_MEDIA, Tags.DISEASE, Tags.ADR, Tags.DRUG]
+_TAGS = [Tags.DISEASE, Tags.ADR, Tags.DRUG]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/cord_ner/cord_ner.py b/bigbio/biodatasets/cord_ner/cord_ner.py
index 5457155d7..f29c298ea 100644
--- a/bigbio/biodatasets/cord_ner/cord_ner.py
+++ b/bigbio/biodatasets/cord_ner/cord_ner.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = [Tags.GENE, Tags.DISEASE, Tags.CHEMICAL, Tags.COVID, Tags.ORGANISM]
+_TAGS = [Tags.GENE, Tags.DISEASE, Tags.CHEMICAL, Tags.COVID, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py b/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
index 26e972f20..35e93d536 100644
--- a/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
+++ b/bigbio/biodatasets/ntcir_13_medweb/ntcir_13_medweb.py
@@ -66,7 +66,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.DISEASE, Tags.SOCIAL_MEDIA, Tags.SENTIMENT_ANALYSIS]
+_TAGS = [Tags.DISEASE, Tags.SENTIMENT_ANALYSIS]
 _LANGUAGES = [Lang.EN, Lang.ZH, Lang.JA]
 _PUBMED = False
 _LOCAL = True

From 1b0b89eae1c383cdfb56ab2bfe06fd4f1d5d1f2b Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:32:37 +0200
Subject: [PATCH 11/20] rm ORGANISM

---
 bigbio/utils/resources/tags.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bigbio/utils/resources/tags.json b/bigbio/utils/resources/tags.json
index 51ed9f1b9..46e624683 100644
--- a/bigbio/utils/resources/tags.json
+++ b/bigbio/utils/resources/tags.json
@@ -1,7 +1,6 @@
 {
   "SOCIAL_MEDIA" : "Social media",
   "ANATOMY" : "Anatomy",
-  "ORGANISM" : "Organism",
   "ORGAN" : "Organ", 
   "VARIANT" : "Variant/Mutation",
   "TISSUE" : "Tissue",

From f5cc0525c39b0d76cfe766f2ac959548767891fa Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:53:17 +0200
Subject: [PATCH 12/20] add DIAGNOSIS tag

---
 bigbio/biodatasets/chia/chia.py                             | 2 +-
 bigbio/biodatasets/codiesp/codiesp.py                       | 2 +-
 bigbio/biodatasets/ctebmsp/ctebmsp.py                       | 2 +-
 bigbio/biodatasets/essai/essai.py                           | 2 +-
 bigbio/biodatasets/evidence_inference/evidence_inference.py | 2 +-
 bigbio/biodatasets/mantra_gsc/mantra_gsc.py                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/bigbio/biodatasets/chia/chia.py b/bigbio/biodatasets/chia/chia.py
index da93b98d4..b837bb916 100644
--- a/bigbio/biodatasets/chia/chia.py
+++ b/bigbio/biodatasets/chia/chia.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.DISEASE, Tags.DRUG, Tags.PROCEDURE]
+_TAGS = [Tags.DISEASE, Tags.DRUG, Tags.PROCEDURE, Tags.DIAGNOSIS]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/codiesp/codiesp.py b/bigbio/biodatasets/codiesp/codiesp.py
index 65671fcd8..b9c551e0d 100644
--- a/bigbio/biodatasets/codiesp/codiesp.py
+++ b/bigbio/biodatasets/codiesp/codiesp.py
@@ -38,7 +38,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.DISEASE, Tags.DOCUMENT_INDEXING, Tags.PROCEDURE]
+_TAGS = [Tags.DISEASE, Tags.DOCUMENT_INDEXING, Tags.PROCEDURE, Tags.DIAGNOSIS]
 _LANGUAGES = [Lang.ES]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/biodatasets/ctebmsp/ctebmsp.py b/bigbio/biodatasets/ctebmsp/ctebmsp.py
index f5a3fc2b8..0831f48b7 100644
--- a/bigbio/biodatasets/ctebmsp/ctebmsp.py
+++ b/bigbio/biodatasets/ctebmsp/ctebmsp.py
@@ -34,7 +34,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.ANATOMY, Tags.CHEMICAL, Tags.DISEASE, Tags.PROCEDURE]
+_TAGS = [Tags.ANATOMY, Tags.CHEMICAL, Tags.DISEASE, Tags.PROCEDURE, Tags.DIAGNOSIS]
 _LANGUAGES = [Lang.ES]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/essai/essai.py b/bigbio/biodatasets/essai/essai.py
index aab446381..4cff31a13 100644
--- a/bigbio/biodatasets/essai/essai.py
+++ b/bigbio/biodatasets/essai/essai.py
@@ -9,7 +9,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.NEGATION, Tags.SPECULATION, Tags.DISEASE, Tags.PROCEDURE]
+_TAGS = [Tags.NEGATION, Tags.SPECULATION, Tags.DISEASE, Tags.PROCEDURE, Tags.DIAGNOSIS]
 _LANGUAGES = [Lang.FR]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/evidence_inference/evidence_inference.py b/bigbio/biodatasets/evidence_inference/evidence_inference.py
index d17594ca8..cade748f8 100644
--- a/bigbio/biodatasets/evidence_inference/evidence_inference.py
+++ b/bigbio/biodatasets/evidence_inference/evidence_inference.py
@@ -35,7 +35,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.PROCEDURE]
+_TAGS = [Tags.PROCEDURE, Tags.DIAGNOSIS]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/mantra_gsc/mantra_gsc.py b/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
index cf572db03..8eb1891c1 100644
--- a/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
+++ b/bigbio/biodatasets/mantra_gsc/mantra_gsc.py
@@ -25,7 +25,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.GENE, Tags.DISEASE, Tags.PROCEDURE]
+_TAGS = [Tags.GENE, Tags.DISEASE, Tags.PROCEDURE, Tags.DIAGNOSIS]
 _LANGUAGES = [Lang.EN, Lang.FR, Lang.DE, Lang.NL, Lang.ES]
 _PUBMED = True
 _LOCAL = False

From f4b528e02486f06647640aba776f657523a325df Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:53:34 +0200
Subject: [PATCH 13/20] add n2c2 datasets

---
 bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py       | 2 +-
 bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py | 2 +-
 bigbio/biodatasets/n2c2_2008/n2c2_2008.py                 | 2 +-
 bigbio/biodatasets/n2c2_2009/n2c2_2009.py                 | 2 +-
 bigbio/biodatasets/n2c2_2010/n2c2_2010.py                 | 2 +-
 bigbio/biodatasets/n2c2_2011/n2c2_2011.py                 | 2 +-
 bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py   | 2 +-
 bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py   | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py b/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py
index 9144f25f0..cde53908b 100644
--- a/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py
+++ b/bigbio/biodatasets/n2c2_2006_deid/n2c2_2006_deid.py
@@ -71,7 +71,7 @@
 _DATASETNAME = "n2c2_2006"
 
 # https://academic.oup.com/jamia/article/14/5/550/720189
-_TAGS = []
+_TAGS = [Tags.ANONYMIZATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py b/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py
index 6e0fc9209..9d0b1a99b 100644
--- a/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py
+++ b/bigbio/biodatasets/n2c2_2006_smokers/n2c2_2006_smokers.py
@@ -69,7 +69,7 @@
 _DATASETNAME = "n2c2_2006"
 
 # https://academic.oup.com/jamia/article/15/1/14/779738
-_TAGS = []
+_TAGS = [Tags.DIAGNOSIS]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2008/n2c2_2008.py b/bigbio/biodatasets/n2c2_2008/n2c2_2008.py
index 4b3054ac1..bb2f37e3b 100644
--- a/bigbio/biodatasets/n2c2_2008/n2c2_2008.py
+++ b/bigbio/biodatasets/n2c2_2008/n2c2_2008.py
@@ -77,7 +77,7 @@
 _DATASETNAME = "n2c2_2008"
 
 # https://academic.oup.com/jamia/article/16/4/561/766997
-_TAGS = []
+_TAGS = [Tags.DIAGNOSIS, Tags.DISEASE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2009/n2c2_2009.py b/bigbio/biodatasets/n2c2_2009/n2c2_2009.py
index 88f1e60c5..742ce0955 100644
--- a/bigbio/biodatasets/n2c2_2009/n2c2_2009.py
+++ b/bigbio/biodatasets/n2c2_2009/n2c2_2009.py
@@ -60,7 +60,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.PROCEDURE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2010/n2c2_2010.py b/bigbio/biodatasets/n2c2_2010/n2c2_2010.py
index 549ac121a..3b095e3f7 100644
--- a/bigbio/biodatasets/n2c2_2010/n2c2_2010.py
+++ b/bigbio/biodatasets/n2c2_2010/n2c2_2010.py
@@ -55,7 +55,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.DIAGNOSIS, Tags.NEGATION]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2011/n2c2_2011.py b/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
index 67fc5e684..478fba48d 100644
--- a/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
+++ b/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
@@ -78,7 +78,7 @@
 _DATASETNAME = "n2c2_2011"
 
 # https://academic.oup.com/jamia/article/19/5/786/716138
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.TREATMENT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py b/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py
index 59411a293..0c18374c7 100644
--- a/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py
+++ b/bigbio/biodatasets/n2c2_2018_track1/n2c2_2018_track1.py
@@ -46,7 +46,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py b/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py
index 13ddc19b1..9862ea227 100644
--- a/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py
+++ b/bigbio/biodatasets/n2c2_2018_track2/n2c2_2018_track2.py
@@ -49,7 +49,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DRUG, Tags.ADR]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True

From fbfdc7063ce71db88be2e321f1b8366692456ff8 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:53:44 +0200
Subject: [PATCH 14/20] add diagnosis tag

---
 bigbio/biodatasets/quaero/quaero.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bigbio/biodatasets/quaero/quaero.py b/bigbio/biodatasets/quaero/quaero.py
index 2d949b028..4edc5c45c 100644
--- a/bigbio/biodatasets/quaero/quaero.py
+++ b/bigbio/biodatasets/quaero/quaero.py
@@ -16,6 +16,7 @@
     Tags.SPECIES,
     Tags.PROCEDURE,
     Tags.DISEASE,
+    Tags.DIAGNOSIS,
 ]
 _LANGUAGES = [Lang.FR]
 _PUBMED = True

From 7f96f08e3164ea6129f496da89226cf6c8004e8d Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:53:50 +0200
Subject: [PATCH 15/20] update tags

---
 bigbio/utils/resources/tags.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bigbio/utils/resources/tags.json b/bigbio/utils/resources/tags.json
index 46e624683..f58f27794 100644
--- a/bigbio/utils/resources/tags.json
+++ b/bigbio/utils/resources/tags.json
@@ -44,5 +44,6 @@
   "MIRNA" : "miRNA",
   "ABBREVIATION" : "Abbreviation",
   "FACT_CHECKING" : "Fact-checking",
-  "INTENT" : "Intent"
+  "INTENT" : "Intent",
+  "DIAGNOSIS" : "DIAGNOSIS"
 }

From 4109ebfcb05d3e96e119ff33b3e0e7f4c37fd1e6 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:58:29 +0200
Subject: [PATCH 16/20] format

---
 bigbio/biodatasets/biosses/biosses.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigbio/biodatasets/biosses/biosses.py b/bigbio/biodatasets/biosses/biosses.py
index a55a313c2..7f7e72eec 100644
--- a/bigbio/biodatasets/biosses/biosses.py
+++ b/bigbio/biodatasets/biosses/biosses.py
@@ -33,7 +33,7 @@
 
 _DATASETNAME = "biosses"
 
-_TAGS = []
+_TAGS = [Tags.SENTENCE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False

From a7728abf2df44dd8e18c50e9ee40fe3426665cc0 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 15:58:34 +0200
Subject: [PATCH 17/20] add tags to examples

---
 examples/bc5cdr.py              | 2 +-
 examples/bioasq_task_b.py       | 9 +++++----
 examples/biosses.py             | 2 +-
 examples/chemprot.py            | 2 +-
 examples/hallmarks_of_cancer.py | 2 +-
 examples/mlee.py                | 2 +-
 examples/muchmore.py            | 2 +-
 examples/n2c2_2011.py           | 2 +-
 examples/nlmchem.py             | 2 +-
 9 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/examples/bc5cdr.py b/examples/bc5cdr.py
index ee325c162..0c94c3add 100644
--- a/examples/bc5cdr.py
+++ b/examples/bc5cdr.py
@@ -35,7 +35,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = [Tags.DISEASE, Tags.CHEMICAL, Tags.CHEMICAL_DISEASE_RELATION, Tags.MESH]
+_TAGS = [Tags.DISEASE, Tags.CHEMICAL]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/bioasq_task_b.py b/examples/bioasq_task_b.py
index 9026918f6..8be34588c 100644
--- a/examples/bioasq_task_b.py
+++ b/examples/bioasq_task_b.py
@@ -36,10 +36,11 @@
 from bigbio.utils.license import Licenses
 
 _TAGS = [
-    Tags.QA_YESNO
-    Tags.QA_FACTOID,
-    Tags.QA_LIST,
-    Tags.QA_SUMMARY,
+    Tags.YESNO,
+    Tags.FACTOID,
+    Tags.FACTOID_LIST,
+    Tags.ABSTRACTIVE,
+    Tags.EXTRACTIVE,
 ]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
diff --git a/examples/biosses.py b/examples/biosses.py
index 80aa75b36..913239499 100644
--- a/examples/biosses.py
+++ b/examples/biosses.py
@@ -33,7 +33,7 @@
 
 _DATASETNAME = "biosses"
 
-_TAGS = []
+_TAGS = [Tags.SENTENCE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = False
diff --git a/examples/chemprot.py b/examples/chemprot.py
index c29b362ae..3a43c3197 100644
--- a/examples/chemprot.py
+++ b/examples/chemprot.py
@@ -28,7 +28,7 @@
 from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.GENE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/hallmarks_of_cancer.py b/examples/hallmarks_of_cancer.py
index 50600def9..09f377944 100644
--- a/examples/hallmarks_of_cancer.py
+++ b/examples/hallmarks_of_cancer.py
@@ -22,7 +22,7 @@
 from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.CANCER]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/mlee.py b/examples/mlee.py
index e0330d53a..b98bf327c 100644
--- a/examples/mlee.py
+++ b/examples/mlee.py
@@ -32,7 +32,7 @@
 _SOURCE_VIEW_NAME = "source"
 _UNIFIED_VIEW_NAME = "bigbio"
 
-_TAGS = []
+_TAGS = [Tags.GENE, Tags.DRUG, Tags.CELL, Tags.ORGAN, Tags.TISSUE, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/muchmore.py b/examples/muchmore.py
index 9afb2982d..6ce74b9f8 100644
--- a/examples/muchmore.py
+++ b/examples/muchmore.py
@@ -76,7 +76,7 @@
 from bigbio.utils.constants import Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
-_TAGS = []
+_TAGS = [Tags.POS]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/examples/n2c2_2011.py b/examples/n2c2_2011.py
index d1dd79f7f..2495432fc 100644
--- a/examples/n2c2_2011.py
+++ b/examples/n2c2_2011.py
@@ -78,7 +78,7 @@
 _DATASETNAME = "n2c2_2011"
 
 # https://academic.oup.com/jamia/article/19/5/786/716138
-_TAGS = []
+_TAGS = [Tags.DISEASE, Tags.TREATMENT]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/examples/nlmchem.py b/examples/nlmchem.py
index 885234462..6b9438592 100644
--- a/examples/nlmchem.py
+++ b/examples/nlmchem.py
@@ -26,7 +26,7 @@
 from bigbio.utils.license import Licenses
 from bigbio.utils.parsing import get_texts_and_offsets_from_bioc_ann
 
-_TAGS = []
+_TAGS = [Tags.CHEMICAL, Tags.DOCUMENT_INDEXING]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False

From dda930f9b0cdd73e4bf70f93b129a604a52db316 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 18:05:37 +0200
Subject: [PATCH 18/20] fix missing/errors

---
 bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py | 4 ++--
 bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py | 8 +++++++-
 bigbio/biodatasets/lll/lll.py                             | 2 +-
 bigbio/biodatasets/medmentions/medmentions.py             | 2 +-
 bigbio/biodatasets/n2c2_2011/n2c2_2011.py                 | 2 +-
 bigbio/biodatasets/nagel/nagel.py                         | 2 +-
 bigbio/biodatasets/pho_ner/pho_ner.py                     | 2 +-
 bigbio/utils/resources/tags.json                          | 2 +-
 8 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py b/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py
index a72d0386d..6365cd7e2 100644
--- a/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py
+++ b/bigbio/biodatasets/bionlp_st_2013_cg/bionlp_st_2013_cg.py
@@ -30,11 +30,11 @@
     Tags.DISEASE,
     Tags.CANCER,
     Tags.TISSUE,
-    Tags.ORGANISM,
+    Tags.SPECIES,
     Tags.CELL,
     Tags.GENE,
     Tags.CHEMICAL,
-    Tags.PATHWAY,
+    Tags.PATHWAY_CURATION,
 ]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
diff --git a/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py b/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py
index f685ff3ea..cee27dfac 100644
--- a/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py
+++ b/bigbio/biodatasets/bionlp_st_2013_pc/bionlp_st_2013_pc.py
@@ -26,7 +26,13 @@
 _DATASETNAME = "bionlp_st_2013_pc"
 _UNIFIED_VIEW_NAME = "bigbio"
 
-_TAGS = [Tags.GENE, Tags.CHEMICAL, Tags.PATHWAY, Tags.NEGATION, Tags.SPECULATION]
+_TAGS = [
+    Tags.GENE,
+    Tags.CHEMICAL,
+    Tags.PATHWAY_CURATION,
+    Tags.NEGATION,
+    Tags.SPECULATION,
+]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/lll/lll.py b/bigbio/biodatasets/lll/lll.py
index 560185a5f..6dfe9914e 100644
--- a/bigbio/biodatasets/lll/lll.py
+++ b/bigbio/biodatasets/lll/lll.py
@@ -36,7 +36,7 @@
 
 from bigbio.utils import schemas
 from bigbio.utils.configs import BigBioConfig
-from bigbio.utils.constants import BigBioValues, Lang, Tasks
+from bigbio.utils.constants import BigBioValues, Lang, Tasks, Tags
 from bigbio.utils.license import Licenses
 
 _TAGS = [Tags.GENE]
diff --git a/bigbio/biodatasets/medmentions/medmentions.py b/bigbio/biodatasets/medmentions/medmentions.py
index 9c9746635..633b86dd8 100644
--- a/bigbio/biodatasets/medmentions/medmentions.py
+++ b/bigbio/biodatasets/medmentions/medmentions.py
@@ -46,7 +46,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import Licenses
 
-_TAGS = [Tags.DISEASE, Tags.CHEMICAL, Tags.ORGANISM]
+_TAGS = [Tags.DISEASE, Tags.CHEMICAL, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/n2c2_2011/n2c2_2011.py b/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
index 478fba48d..7ab93a594 100644
--- a/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
+++ b/bigbio/biodatasets/n2c2_2011/n2c2_2011.py
@@ -78,7 +78,7 @@
 _DATASETNAME = "n2c2_2011"
 
 # https://academic.oup.com/jamia/article/19/5/786/716138
-_TAGS = [Tags.DISEASE, Tags.TREATMENT]
+_TAGS = [Tags.DISEASE, Tags.PROCEDURE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True
diff --git a/bigbio/biodatasets/nagel/nagel.py b/bigbio/biodatasets/nagel/nagel.py
index 0f5990ff7..c80f80dbf 100644
--- a/bigbio/biodatasets/nagel/nagel.py
+++ b/bigbio/biodatasets/nagel/nagel.py
@@ -26,7 +26,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = [Tags.MUTATION, Tags.GENE, Tags.SPECIES]
+_TAGS = [Tags.VARIANT, Tags.GENE, Tags.SPECIES]
 _LANGUAGES = [Lang.EN]
 _PUBMED = True
 _LOCAL = False
diff --git a/bigbio/biodatasets/pho_ner/pho_ner.py b/bigbio/biodatasets/pho_ner/pho_ner.py
index 32e0e4e02..821cd0d01 100644
--- a/bigbio/biodatasets/pho_ner/pho_ner.py
+++ b/bigbio/biodatasets/pho_ner/pho_ner.py
@@ -23,7 +23,7 @@
 from bigbio.utils.constants import Lang, Tags, Tasks
 from bigbio.utils.license import CustomLicense
 
-_TAGS = [Tag.DISEASE, Tag.COVID]
+_TAGS = [Tags.DISEASE, Tags.COVID]
 _LANGUAGES = [Lang.VI]
 _PUBMED = False
 _LOCAL = False
diff --git a/bigbio/utils/resources/tags.json b/bigbio/utils/resources/tags.json
index f58f27794..dc3f93eb4 100644
--- a/bigbio/utils/resources/tags.json
+++ b/bigbio/utils/resources/tags.json
@@ -16,7 +16,7 @@
   "HOW" : "`How` question",
   "WHY" : "`Why` question",
   "FACTOID" : "QA with factoid answer",
-  "FACTOIND_LIST": "QA with list of factoid answer",
+  "FACTOID_LIST": "QA with list of factoid answer",
   "ABSTRACTIVE" : "Abstractive summary/answer",
   "EXTRACTIVE" : "Extractive summary/answer",
   "CLOZE_TEST" : "Cloze test",

From 3493d0f5a19cff5f4174a39a6590b09eb896ae58 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 18:05:49 +0200
Subject: [PATCH 19/20] treatment is procedure

---
 examples/n2c2_2011.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/n2c2_2011.py b/examples/n2c2_2011.py
index 2495432fc..ca6f47e10 100644
--- a/examples/n2c2_2011.py
+++ b/examples/n2c2_2011.py
@@ -78,7 +78,7 @@
 _DATASETNAME = "n2c2_2011"
 
 # https://academic.oup.com/jamia/article/19/5/786/716138
-_TAGS = [Tags.DISEASE, Tags.TREATMENT]
+_TAGS = [Tags.DISEASE, Tags.PROCEDURE]
 _LANGUAGES = [Lang.EN]
 _PUBMED = False
 _LOCAL = True

From 44bce0ced4836f5ce6b848aba02f04c9e48ae465 Mon Sep 17 00:00:00 2001
From: "sgarda.wbi" <gardasam@informatik.hu-berlin.de>
Date: Wed, 8 Jun 2022 18:06:06 +0200
Subject: [PATCH 20/20] add script to gather (fine-grained) tasks counts

---
 scripts/gather_dataset_tasks.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 scripts/gather_dataset_tasks.py

diff --git a/scripts/gather_dataset_tasks.py b/scripts/gather_dataset_tasks.py
new file mode 100644
index 000000000..7523e8f45
--- /dev/null
+++ b/scripts/gather_dataset_tasks.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Generate counts of tasks and fine-grained taks
+"""
+
+from bigbio.dataloader import BigBioConfigHelpers
+
+
+def main():
+    """
+    Gather counts on tasks and fine-grained tasks
+    """
+
+    configs = BigBioConfigHelpers()
+
+    dataset_task = set()
+
+    for conf in configs:
+        for task in conf.tasks:
+            dataset_task.add(conf.dataset_name, str(task))
+
+    print(dataset_task)
+
+
+if __name__ == "__main__":
+    main()