Update geneformer/emb_extractor.py

#453
Files changed (1) hide show
  1. geneformer/emb_extractor.py +6 -0
geneformer/emb_extractor.py CHANGED
@@ -596,6 +596,12 @@ class EmbExtractor:
596
  filtered_input_data = pu.load_and_filter(
597
  self.filter_data, self.nproc, input_data_file
598
  )
 
 
 
 
 
 
599
  if cell_state is not None:
600
  filtered_input_data = pu.filter_by_dict(
601
  filtered_input_data, cell_state, self.nproc
 
596
  filtered_input_data = pu.load_and_filter(
597
  self.filter_data, self.nproc, input_data_file
598
  )
599
+
600
+ # Check to make sure that all the labels exist in the tokenized data:
601
+ if self.emb_label is not None:
602
+ for label in self.emb_label:
603
+ assert label in filtered_input_data.features.keys(), f"Attribute `{label}` not present in dataset features"
604
+
605
  if cell_state is not None:
606
  filtered_input_data = pu.filter_by_dict(
607
  filtered_input_data, cell_state, self.nproc