Skip to content

Conversation

LucasDedieu
Copy link
Collaborator

Description

add trainable document classifier

Checklist

  • If this PR is a bug fix, the bug is documented in the test suite.
  • Changes were documented in the changelog (pending section).
  • If necessary, changes were made to the documentation (eg new pipeline).

Copy link

Docs preview URL

https://edsnlp-classifhead.vercel.app/

Copy link

github-actions bot commented Jul 16, 2025

Coverage Report

NameStmtsMiss∆ MissCover
edsnlp/metrics/doc_classif.py

New missing coverage at lines 35-133 !

     """
-     examples = make_examples(examples)
-     if filter_expr is not None:
-         filter_fn = eval(f"lambda doc: {filter_expr}")
-         examples = [eg for eg in examples if filter_fn(eg.reference)]
- 
-     pred_labels = []
-     gold_labels = []
  ...
-         "f": macro_f1,
-         "p": macro_precision,
-         "r": macro_recall,
-         "support": len(labels),
-         "classes": len(labels),
-     }
-     return results
New missing coverage at lines 143-146 !
     ):
-         self.label_attr = label_attr
-         self.micro_key = micro_key
-         self.filter_expr = filter_expr
New missing coverage at line 148 !
     def __call__(self, *examples):
-         return doc_classification_metric(
             examples,

524210219.23%
edsnlp/pipes/trainable/doc_classifier/doc_classifier.py

New missing coverage at lines 73-93 !

     def post_init(self, gold_data: Iterable[Doc], exclude: Set[str]):
-         if not self.label2id:
-             if self.labels is not None:
-                 labels = set(self.labels)
-             else:
-                 labels = set()
-                 for doc in gold_data:
-                     label = getattr(doc._, self.label_attr, None)
-                     if isinstance(label, str):
-                         labels.add(label)
-             if labels:
-                 self.label2id = {}
-                 self.id2label = {}
-                 for i, label in enumerate(labels):
-                     self.label2id[label] = i
-                     self.id2label[i] = label
-                 print("num classes:", len(self.label2id))
-                 self.classifier = torch.nn.Linear(
-                     self.embedding.output_size, len(self.label2id)
-                 )
-         super().post_init(gold_data, exclude=exclude)
New missing coverage at lines 98-109 !
     def preprocess_supervised(self, doc: Doc) -> Dict[str, Any]:
-         preps = self.preprocess(doc)
-         label = getattr(doc._, self.label_attr, None)
-         if label is None:
-             raise ValueError(
-                 f"Document does not have a gold label in 'doc._.{self.label_attr}'"
-             )
-         if isinstance(label, str) and self.label2id:
-             if label not in self.label2id:
-                 raise ValueError(f"Label '{label}' not in label2id mapping.")
-             label = self.label2id[label]
-         return {
             **preps,
New missing coverage at line 117 !
         if "targets" in batch:
-             batch_input["targets"] = torch.stack(batch["targets"])
         return batch_input
New missing coverage at lines 128-131 !
         if "targets" in batch:
-             loss = self.loss_fn(logits, batch["targets"])
-             output["loss"] = loss
-             output["labels"] = None
         else:
New missing coverage at line 142 !
             if self.id2label and isinstance(label, int):
-                 label = self.id2label.get(label, label)
             setattr(doc._, self.label_attr, label)
New missing coverage at lines 147-163 !
     def to_disk(self, path, *, exclude=set()):
-         repr_id = object.__repr__(self)
-         if repr_id in exclude:
-             return
-         exclude.add(repr_id)
-         os.makedirs(path, exist_ok=True)
-         data_path = path / "label_attr.pkl"
-         with open(data_path, "wb") as f:
-             pickle.dump(
-                 {
-                     "label_attr": self.label_attr,
-                     "label2id": self.label2id,
-                     "id2label": self.id2label,
-                 },
-                 f,
-             )
-         return super().to_disk(path, exclude=exclude)
New missing coverage at lines 166-174 !
     def from_disk(cls, path, **kwargs):
-         data_path = path / "label_attr.pkl"
-         with open(data_path, "rb") as f:
-             data = pickle.load(f)
-         obj = super().from_disk(path, **kwargs)
-         obj.label_attr = data.get("label_attr", "label")
-         obj.label2id = data.get("label2id", {})
-         obj.id2label = data.get("id2label", {})
-         return obj

101446056.44%
TOTAL1194632316297.30%
Files without new missing coverage
NameStmtsMiss∆ MissCover
edsnlp/utils/torch.py

Was already missing at line 102

 def load_pruned_obj(obj, _):
-     return obj
Was already missing at line 118
     def save_align_devices_hook(pickler, obj):
-         pickler.save_reduce(load_align_devices_hook, (obj.__dict__,), obj=obj)
Was already missing at lines 121-128
     def load_align_devices_hook(state):
-         state["execution_device"] = MAP_LOCATION
  ...
-     AlignDevicesHook = None
Was already missing at line 143
             if torch.Tensor in copyreg.dispatch_table:
-                 old_dispatch[torch.Tensor] = copyreg.dispatch_table[torch.Tensor]
             copyreg.pickle(torch.Tensor, reduce_empty)

839089.16%
edsnlp/utils/span_getters.py

Was already missing at lines 78-80

     if span_getter is None:
-         yield doc[:], None
-         return
     if callable(span_getter):
Was already missing at lines 81-83
     if callable(span_getter):
-         yield from span_getter(doc)
-         return
     for key, span_filter in span_getter.items():
Was already missing at line 85
         if key == "*":
-             candidates = (
                 (span, group) for group in doc.spans.values() for span in group
Was already missing at lines 94-97
         else:
-             for span, group in candidates:
-                 if span.label_ in span_filter:
-                     yield span, group
Was already missing at line 101
     if callable(span_setter):
-         span_setter(doc, matches)
     else:
Was already missing at line 132
     if callable(value):
-         return value
     if isinstance(value, str):
Was already missing at line 181
             elif isinstance(v, str):
-                 new_value[k] = [v]
             elif isinstance(v, list) and all(isinstance(i, str) for i in v):

23111095.24%
edsnlp/utils/resources.py

Was already missing at line 33

     if not verbs:
-         return conjugated_verbs

241095.83%
edsnlp/utils/numbers.py

Was already missing at line 34

     else:
-         string = s
     string = string.lower().strip()
Was already missing at lines 38-41
         return int(string)
-     except ValueError:
-         parsed = DIGITS_MAPPINGS.get(string, None)
-         return parsed

164075.00%
edsnlp/utils/filter.py

Was already missing at line 206

     if isinstance(label, int):
-         return [span for span in spans if span.label == label]
     else:

741098.65%
edsnlp/tune.py

Was already missing at line 169

             )
-         except RuntimeError as e:
             if "zero total variance" in str(e):  # pragma: no cover
Was already missing at line 684
         else:
-             n_trials = compute_n_trials(
                 gpu_hours, compute_time_per_trial(study, ema=True)

2892099.31%
edsnlp/training/trainer.py

Was already missing at line 57

     if result is None:
-         result = {}
     if isinstance(x, dict):
Was already missing at lines 365-371
         if self.sub_batch_size and self.sub_batch_size[1] == "splits":
-             data = data.batchify(
  ...
-             data = data.map(lambda b: [nlp.collate(sb, device=device) for sb in b])
         elif self.sub_batch_size:
Was already missing at lines 873-880
                         raise
-                     except Exception:
  ...
-                         raise
Was already missing at lines 907-909
                     ) > grad_max_dev * math.sqrt(grad_var):
-                         spike = True
-                         spikes += 1
                     else:
Was already missing at line 916
                     if spike and grad_dev_policy == "clip_mean":
-                         torch.nn.utils.clip_grad_norm_(
                             grad_params, grad_mean, norm_type=2
Was already missing at line 920
                     elif spike and grad_dev_policy == "clip_threshold":
-                         torch.nn.utils.clip_grad_norm_(
                             grad_params,

32912096.35%
edsnlp/training/loggers.py

Was already missing at line 109

                 if col not in values and col != "step":
-                     row.append("")
                 else:
Was already missing at line 278
     def tracker(self):
-         return self.printer
Was already missing at lines 369-388
         """
-         env_logging_dir = os.environ.get("AIM_LOGGING_DIR", None)
  ...
-         accelerate.tracking.logger.debug(
             f"Initialized Aim run {self.writer.hash} in project {project_name}"
Was already missing at lines 392-394
     def log(self, values: dict, step: Optional[int], **kwargs):
-         values = flatten_dict(values)
-         return super().log(values, step, **kwargs)

14011092.14%
edsnlp/reducers.py

Was already missing at line 115

     if not hasattr(module, "__file__"):
-         return True
     if module.__file__ is None:
Was already missing at line 117
     if module.__file__ is None:
-         return False
     # Hack to avoid copying the full module dict

672097.01%
edsnlp/processing/spark.py

Was already missing at line 50

         getActiveSession = SparkSession.getActiveSession
-     except AttributeError:

471097.87%
edsnlp/processing/multiprocessing.py

Was already missing at lines 393-398

                 self.on_stop()
-         except BaseException as e:
  ...
-             self.main_control_queue.put(e)
         finally:
Was already missing at lines 402-404
                     pass
-             except StopSignal:
-                 pass
             for name, queue in self.consumer_queues(stage):
Was already missing at line 542
                     while schedule[task_idx] is None:
-                         task_idx = (task_idx + 1) % len(schedule)
Was already missing at lines 606-608
             if isinstance(docs, StreamSentinel):
-                 self.active_batches[stage].append([None, None, None, docs])
-                 continue
             batch_id = str(hash(tuple(id(x) for x in docs)))[-8:] + "-" + self.uid
Was already missing at lines 1121-1127
                 if out[0].kind == requires_sentinel:
-                     missing_sentinels -= 1
  ...
-                         missing_sentinels = len(self.cpu_worker_names)
                 continue

62614097.76%
edsnlp/processing/deprecated_pipe.py

Was already missing at lines 207-209

         def converter(doc):
-             res = results_extractor(doc)
-             return (
                 [{"note_id": doc._.note_id, **row} for row in res]

572096.49%
edsnlp/pipes/trainable/span_linker/span_linker.py

Was already missing at lines 402-404

             if self.reference_mode == "synonym":
-                 embeds = embeds.to(new_lin.weight)
-                 new_lin.weight.data = embeds
             else:

1732098.84%
edsnlp/pipes/trainable/span_classifier/span_classifier.py

Was already missing at line 379

         if not all(keep_bindings):
-             logger.warning(
                 "Some attributes have no labels or values and have been removed:"

1641099.39%
edsnlp/pipes/trainable/ner_crf/ner_crf.py

Was already missing at line 301

         if self.labels is not None and not self.infer_span_setter:
-             return
Was already missing at lines 309-311
             if callable(self.target_span_getter):
-                 for span in get_spans(doc, self.target_span_getter):
-                     inferred_labels.add(span.label_)
             else:

1723098.26%
edsnlp/pipes/trainable/layers/crf.py

Was already missing at line 21

     # out: 2 * N * O
-     return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).logsumexp(-2)
Was already missing at line 29
     # out: 2 * N * O
-     return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).max(-2)
Was already missing at line 98
         if learnable_transitions:
-             self.transitions = torch.nn.Parameter(
                 torch.zeros_like(forbidden_transitions, dtype=torch.float)
Was already missing at line 108
         if learnable_transitions and with_start_end_transitions:
-             self.start_transitions = torch.nn.Parameter(
                 torch.zeros(num_tags, dtype=torch.float)
Was already missing at line 117
         if learnable_transitions and with_start_end_transitions:
-             self.end_transitions = torch.nn.Parameter(
                 torch.zeros(num_tags, dtype=torch.float)

1375096.35%
edsnlp/pipes/trainable/embeddings/transformer/transformer.py

Was already missing at line 166

         if quantization is not None:
-             kwargs["quantization_config"] = quantization
Was already missing at line 189
         if self.cls_token_id is None:
-             [self.cls_token_id] = self.tokenizer.convert_tokens_to_ids(
                 [self.tokenizer.special_tokens_map["bos_token"]]
Was already missing at line 193
         if self.sep_token_id is None:
-             [self.sep_token_id] = self.tokenizer.convert_tokens_to_ids(
                 [self.tokenizer.special_tokens_map["eos_token"]]

1683098.21%
edsnlp/pipes/qualifiers/reported_speech/reported_speech.py

Was already missing at lines 24-28

         return "REPORTED"
-     elif token._.rspeech is False:
-         return "DIRECT"
-     else:
-         return None

1003097.00%
edsnlp/pipes/qualifiers/negation/negation.py

Was already missing at line 28

     else:
-         return None

1011099.01%
edsnlp/pipes/qualifiers/hypothesis/hypothesis.py

Was already missing at line 27

     else:
-         return None

981098.98%
edsnlp/pipes/qualifiers/history/history.py

Was already missing at lines 26-32

 def history_getter(token: Union[Token, Span]) -> Optional[str]:
-     if token._.history is True:
-         return "ATCD"
-     elif token._.history is False:
-         return "CURRENT"
-     else:
-         return None
Was already missing at lines 353-359
                 )
-             except ValueError:
  ...
-                 note_datetime = None
Was already missing at lines 368-374
                 )
-             except ValueError:
  ...
-                 birth_datetime = None
Was already missing at lines 440-443
                         )
-                     except ValueError as e:
-                         absolute_date = None
-                         logger.warning(
                             "In doc {}, the following date {} raises this error: {}. "

18014092.22%
edsnlp/pipes/qualifiers/family/family.py

Was already missing at line 27

     else:
-         return None

831098.80%
edsnlp/pipes/ner/tnm/model.py

Was already missing at line 147

     def __str__(self):
-         return self.norm()
Was already missing at line 171
             )
-             exclude_unset = skip_defaults

1122098.21%
edsnlp/pipes/ner/scores/sofa/sofa.py

Was already missing at line 32

             if not assigned:
-                 continue
             if assigned.get("method_max") is not None:
Was already missing at line 40
             else:
-                 method = "Non précisée"

252092.00%
edsnlp/pipes/ner/scores/elston_ellis/patterns.py

Was already missing at line 26

         if x <= 5:
-             return 1
Was already missing at lines 32-36
         else:
-             return 3
- 
-     except ValueError:
-         return None

214080.95%
edsnlp/pipes/ner/scores/charlson/patterns.py

Was already missing at lines 21-23

             return int(extracted_score)
-     except ValueError:
-         return None

132084.62%
edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py

Was already missing at lines 131-137

         for span in spans:
-             span.label_ = "solid_tumor"
  ...
-             yield span

386084.21%
edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py

Was already missing at line 108

                 if "peripheral" not in span._.assigned.keys():
-                     continue

161093.75%
edsnlp/pipes/ner/disorders/diabetes/diabetes.py

Was already missing at line 131

                 # Mostly FP
-                 continue
Was already missing at line 134
             elif self.has_far_complications(span):
-                 span._.status = 2
Was already missing at line 145
         if next(iter(self.complication_matcher(context)), None) is not None:
-             return True
         return False

303090.00%
edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py

Was already missing at line 104

                 # Huge change of FP / Title section
-                 continue

151093.33%
edsnlp/pipes/ner/disorders/ckd/ckd.py

Was already missing at lines 121-124

             dfg_value = float(dfg_span.text.replace(",", ".").strip())
-         except ValueError:
-             logger.trace(f"DFG value couldn't be extracted from {dfg_span.text}")
-             return False

303090.00%
edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py

Was already missing at lines 112-114

             if span._.source == "ischemia":
-                 if "brain" not in span._.assigned.keys():
-                     continue

182088.89%
edsnlp/pipes/ner/adicap/models.py

Was already missing at line 15

     def norm(self) -> str:
-         return self.code
Was already missing at line 18
     def __str__(self):
-         return self.norm()

162087.50%
edsnlp/pipes/misc/split/split.py

Was already missing at lines 186-188

         if max_length <= 0 and self.regex is None:
-             yield doc
-             return

732097.26%
edsnlp/pipes/misc/sections/sections.py

Was already missing at line 126

         if sections is None:
-             sections = patterns.sections
         sections = dict(sections)

451097.78%
edsnlp/pipes/misc/quantities/quantities.py

Was already missing at lines 147-149

     def __getitem__(self, item: int):
-         assert isinstance(item, int)
-         return [self][item]
Was already missing at lines 160-163
     def __eq__(self, other: Any):
-         if isinstance(other, SimpleQuantity):
-             return self.convert_to(other.unit) == other.value
-         return False
Was already missing at line 166
         if other.unit == self.unit:
-             return SimpleQuantity(self.value + other.value, self.unit, self.registry)
         return SimpleQuantity(
Was already missing at line 193
             return self.convert_to(other_unit)
-         except KeyError:
             raise AttributeError(f"Unit {other_unit} not found")
Was already missing at line 198
     def verify(cls, ent):
-         return True
Was already missing at line 264
     def __lt__(self, other: Union[SimpleQuantity, "RangeQuantity"]):
-         return max(self.convert_to(other.unit)) < min((part.value for part in other))
Was already missing at line 275
             return self.convert_to(other.unit) == other.value
-         return False
Was already missing at line 289
     def verify(cls, ent):
-         return True
Was already missing at line 888
         if snippet.end != last and doclike.doc[last: snippet.end].text.strip() == "":
-             pseudo.append("w")
         pseudo = "".join(pseudo)
Was already missing at line 1069
                             if start_line is None:
-                                 continue
Was already missing at lines 1100-1102
                         unit_norm = self.unit_followers[unit_before.label_]
-                 except (KeyError, AttributeError, IndexError):
-                     pass
Was already missing at line 1145
             ):
-                 ent = doc[unit_text.start: number.end]
             else:
Was already missing at lines 1152-1154
                 dims = self.unit_registry.parse_unit(unit_norm)[0]
-             except KeyError:
-                 continue
Was already missing at lines 1260-1262
                     last._.set(last.label_, new_value)
-                 except (AttributeError, TypeError):
-                     merged.append(ent)
             else:

44020095.45%
edsnlp/pipes/misc/dates/models.py

Was already missing at line 165

                     else:
-                         d["month"] = note_datetime.month
                 if self.day is None:
Was already missing at lines 169-175
             else:
-                 if self.year is None:
  ...
-                     d["day"] = default_day
Was already missing at lines 183-185
                 return dt
-             except ValueError:
-                 return None
Was already missing at line 201
         else:
-             return None
Was already missing at line 217
         if self.second:
-             norm += f"{self.second:02}s"

20611094.66%
edsnlp/pipes/misc/dates/dates.py

Was already missing at line 249

         if isinstance(absolute, str):
-             absolute = [absolute]
         if isinstance(relative, str):
Was already missing at line 251
         if isinstance(relative, str):
-             relative = [relative]
         if isinstance(duration, str):
Was already missing at line 253
         if isinstance(duration, str):
-             relative = [duration]
         if isinstance(false_positive, str):
Was already missing at lines 357-366
             if self.merge_mode == "align":
-                 alignments = align_spans(matches, spans, sort_by_overlap=True)
  ...
-                         matches.append(span)
Was already missing at lines 462-464
                 if v1.mode == Mode.DURATION:
-                     m1 = Bound.FROM if v2.bound == Bound.UNTIL else Bound.UNTIL
-                     m2 = v2.mode or Bound.FROM
                 elif v2.mode == Mode.DURATION:

15314090.85%
edsnlp/pipes/misc/consultation_dates/consultation_dates.py

Was already missing at line 131

         else:
-             self.date_matcher = None
Was already missing at line 134
         if not consultation_mention:
-             consultation_mention = []
         elif consultation_mention is True:

482095.83%
edsnlp/pipes/core/normalizer/__init__.py

Was already missing at line 7

 def excluded_or_space_getter(t):
-     return t.is_space or t.tag_ == "EXCLUDED"

51080.00%
edsnlp/pipes/core/endlines/endlines.py

Was already missing at lines 160-164

         if end_lines_model is None:
-             path = build_path(__file__, "base_model.pkl")
- 
-             with open(path, "rb") as inp:
-                 self.model = pickle.load(inp)
         elif isinstance(end_lines_model, str):
Was already missing at lines 167-169
                 self.model = pickle.load(inp)
-         elif isinstance(end_lines_model, EndLinesModel):
-             self.model = end_lines_model
         else:
Was already missing at line 200
         ):
-             return "ENUMERATION"
Was already missing at line 287
         if np.isnan(sigma):
-             sigma = 1

897092.13%
edsnlp/pipes/core/contextual_matcher/contextual_matcher.py

Was already missing at lines 241-243

             ):
-                 to_keep = False
-                 break

1302098.46%
edsnlp/patch_spacy.py

Was already missing at lines 67-69

             # if module is reloaded.
-             existing_func = registry.factories.get(internal_name)
-             if not util.is_same_func(factory_func, existing_func):
                 raise ValueError(

312093.55%
edsnlp/package.py

Was already missing at lines 474-476

             version = version or pyproject["project"]["version"]
-         except (KeyError, TypeError):
-             version = "0.1.0"
         name = name or pyproject["project"]["name"]
Was already missing at line 480
         else:
-             main_package = None
         model_package = snake_case(name.lower())

2143098.60%
edsnlp/metrics/span_attribute.py

Was already missing at lines 67-69

         )
-         assert attributes is None
-         attributes = kwargs.pop("qualifiers")
     if attributes is None:

732097.26%
edsnlp/matchers/simstring.py

Was already missing at line 280

     if custom:
-         attr = attr[1:].lower()
Was already missing at line 295
             if custom:
-                 token_text = getattr(token._, attr)
             else:

1462098.63%
edsnlp/language.py

Was already missing at line 103

             if last != begin:
-                 logger.warning(
                     "Missed some characters during"

511098.04%
edsnlp/data/standoff.py

Was already missing at line 38

     def __init__(self, ann_file, line):
-         super().__init__(f"File {ann_file}, unrecognized Brat line {line}")
Was already missing at line 192
                         )
-                 except Exception:
                     raise Exception(

1862098.92%
edsnlp/data/polars.py

Was already missing at line 36

         if hasattr(data, "collect"):
-             data = data.collect()
         assert isinstance(data, pl.DataFrame)

551098.18%
edsnlp/data/json.py

Was already missing at line 81

                 return records
-         except Exception as e:
             raise Exception(f"Cannot read {file}: {e}")

1121099.11%
edsnlp/data/converters.py

Was already missing at line 428

                 elif key == "XPOS":
-                     word.tag_ = value
                 elif key == "FEATS":
Was already missing at line 822
         for attr in bool_attributes:
-             self.default_attributes[attr] = False
         self.opener = opener or self.PRESETS[preset]["opener"]
Was already missing at line 830
         if self.keep_raw_attribute_values:
-             return value
         try:
Was already missing at lines 873-876
                     )
-                 except StopIteration:
-                     warnings.warn(f"Unmatched closing tag for '{sep.group()}'")
-                     continue
                 start, start_label, start_attrs = starts.pop(idx)
Was already missing at line 904
         ):
-             if not Span.has_extension(dst):
                 Span.set_extension(dst, default=None)
Was already missing at line 911
             if span is None:
-                 continue
             for k, v in attrs.items():
Was already missing at lines 926-929
         for attr, value in self.default_attributes.items():
-             for span in spans:
-                 if span._.get(attr) is None:
-                     span._.set(attr, value)
Was already missing at line 964
     if isinstance(converter, type):
-         return converter(**kwargs), {}
     return converter, validate_kwargs(converter, kwargs)

31711096.53%
edsnlp/data/conll.py

Was already missing at lines 81-83

             )
-         except StopIteration:
-             cols = DEFAULT_COLUMNS
             warnings.warn(
Was already missing at lines 92-96
         if not line:
-             if doc["words"]:
-                 yield doc
-                 doc = {"words": []}
-             continue
         if line.startswith("#"):

766092.11%
edsnlp/core/torch_component.py

Was already missing at line 405

             if hasattr(self, "compiled"):
-                 res = self.compiled(batch)
             else:
Was already missing at line 451
         """
-         return self.preprocess(doc)

1892098.94%
edsnlp/core/stream.py

Was already missing at lines 190-192

                 if isinstance(batch, StreamSentinel):
-                     yield batch
-                     continue
                 results = []
Was already missing at lines 1007-1009
                 elif op.batch_fn is None:
-                     batch_size = op.size
-                     batch_fn = batchify
                 else:

3564098.88%
edsnlp/core/pipeline.py

Was already missing at line 605

             if name in exclude:
-                 continue
             if name not in components:
Was already missing at lines 716-719
         """
-         res = Stream.ensure_stream(docs)
-         res = res.map(functools.partial(self.preprocess, supervision=supervision))
-         return res

4464099.10%
edsnlp/connectors/omop.py

Was already missing at line 69

         if not isinstance(row.ents, list):
-             continue
Was already missing at line 87
             else:
-                 doc.spans[span.label_].append(span)
Was already missing at line 127
     if df.note_id.isna().any():
-         df["note_id"] = range(len(df))
Was already missing at line 171
         if i > 0:
-             df.term_modifiers += ";"
         df.term_modifiers += ext + "=" + df[ext].astype(str)

844095.24%

283 files skipped due to complete coverage.

Coverage failure: total of 97.30% is less than 97.98% ❌

Copy link

Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant