mlverse · cregouby · Sep 28, 2025 · Sep 27, 2025 · Sep 27, 2025 · Sep 27, 2025
diff --git a/NAMESPACE b/NAMESPACE
@@ -84,6 +84,7 @@ export(coco_detection_dataset)
 export(draw_bounding_boxes)
 export(draw_keypoints)
 export(draw_segmentation_masks)
+export(emnist_collection)
 export(emnist_dataset)
 export(eurosat100_dataset)
 export(eurosat_all_bands_dataset)

diff --git a/NEWS.md b/NEWS.md
@@ -34,6 +34,7 @@
 * Breaking Change : Refactoring of `coco_*` dataset family now provides each `item$x` being an image array (for consistency with other datasets). 
 You can use `transform = transform_to_tensor` to restore the previous x output to be a `torch_tensor()`.
 * `transform_` are now documented into 3 different categories: unitary transformations, random transformations and combining transformations. (@cregouby, #250)
+* Deprecation : `emnist_dataset` is deprecated in favor of `emnist_collection()` (@cregouby, #260).
 
 # torchvision 0.7.0
 

diff --git a/R/dataset-mnist.R b/R/dataset-mnist.R
@@ -7,7 +7,7 @@
 #' - **Fashion-MNIST**: Clothing item images for classification.
 #' - **Kuzushiji-MNIST**: Japanese cursive character dataset.
 #' - **QMNIST**: Extended MNIST with high-precision NIST data.
-#' - **EMNIST**: Letters and digits with multiple label splits.
+#' - **EMNIST**: A collection of letters and digits with multiple datasets and  splits.
 #'
 #' @param root Root directory for dataset storage. The dataset will be stored under `root/<dataset-name>`. Defaults to `tempdir()`.
 #' @param train Logical. If TRUE, use the training set; otherwise, use the test set. Not applicable to all datasets.
@@ -18,7 +18,7 @@
 #'
 #' @return A torch dataset object, where each items is a list of `x` (image) and `y` (label).
 #'
-#' @section Supported `kind`s for `emnist_dataset()`:
+#' @section Supported `dataset`s for `emnist_collection()`:
 #' - `"byclass"`: 62 classes (digits + uppercase + lowercase)
 #' - `"bymerge"`: 47 classes (merged uppercase and lowercase)
 #' - `"balanced"`: 47 classes, balanced digits and letters
@@ -43,7 +43,7 @@
 #' item$x
 #' item$y
 #'
-#' emnist <- emnist_dataset(kind = "balanced", split = "test", download = TRUE)
+#' emnist <- emnist_collection(dataset = "balanced", split = "test", download = TRUE)
 #' item <- emnist[1]
 #' item$x
 #' item$y
@@ -354,19 +354,19 @@ fashion_mnist_dataset <- dataset(
   )
 )
 
-#' @describeIn mnist_dataset EMNIST dataset with digits and letters and multiple split modes.
-#' @param kind change the classes into one of "byclass", "bymerge", "balanced" representing the kind of emnist dataset. You
-#' can look at dataset attribute `$classes` to see the actual classes.
+#' @describeIn mnist_dataset EMNIST collection with digits and letters arranged in multiple datasets.
+#' @param dataset one of "byclass", "bymerge", "balanced" representing the subset of emnist collection
+#' made of a set of classes. You can look at dataset attribute `$classes` to see the actual classes.
 #' @export
-emnist_dataset <- dataset(
-  name = "emnist_dataset",
+emnist_collection <- dataset(
+  name = "emnist_collection",
   archive_size = "540 MB",
 
   resources = list(
     c("https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip", "58c8d27c78d21e728a6bc7b3cc06412e")
   ),
   rds_file = function(split, kind) paste0(split,"-",kind,".rds"),
-  classes_all_kind = list(
+  classes_all_dataset = list(
     byclass = c(
       "0","1","2","3","4","5","6","7","8","9",
       LETTERS,
@@ -390,34 +390,34 @@ emnist_dataset <- dataset(
   initialize = function(
     root = tempdir(),
     split = "test",
-    kind = "balanced",
+    dataset = "balanced",
     transform = NULL,
     target_transform = NULL,
     download = FALSE
   ) {
 
     self$split <- match.arg(split, choices = c("train", "test"))
-    self$kind <- match.arg(kind,  choices = names(self$classes_all_kind))
+    self$dataset <- match.arg(dataset,  choices = names(self$classes_all_dataset))
     self$root_path <- root
     self$raw_folder <- file.path(root, class(self)[1], "raw")
     self$processed_folder <- file.path(root, class(self)[1], "processed")
     self$transform <- transform
     self$target_transform <- target_transform
-    self$class <- self$classes_all_kind[[self$kind]]
+    self$class <- self$classes_all_dataset[[self$dataset]]
 
     if (download) {
-      cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
+      cli_inform("{.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
       self$download()
     }
 
     if (!self$check_exists())
       runtime_error("Dataset not found. You can use `download = TRUE` to download it.")
 
-    dataset_lst <- readRDS(file.path(self$processed_folder, self$rds_file(self$split, self$kind)))
+    dataset_lst <- readRDS(file.path(self$processed_folder, self$rds_file(self$split, self$dataset)))
     self$data <- dataset_lst[[1]]
     self$targets <- dataset_lst[[2]] + 1L
 
-    cli_inform("Split {.val {self$split}} of {.cls {class(self)[[1]]}} dataset of kind {.val {self$kind}} processed successfully!")
+    cli_inform("Split {.val {self$split}} of dataset {.val {self$dataset}} from {.cls {class(self)[[1]]}} processed successfully!")
   },
 
   download = function() {
@@ -440,15 +440,15 @@ emnist_dataset <- dataset(
     unzipped_root <- fs::dir_ls(unzip_dir, type = "directory", recurse = FALSE)[1]
 
     # only manage extraction of the 2 ubyte.gz under interest
-    img <- file.path(unzipped_root, glue::glue("emnist-{self$kind}-{self$split}-images-idx3-ubyte.gz"))
-    lbl <- file.path(unzipped_root, glue::glue("emnist-{self$kind}-{self$split}-labels-idx1-ubyte.gz"))
+    img <- file.path(unzipped_root, glue::glue("emnist-{self$dataset}-{self$split}-images-idx3-ubyte.gz"))
+    lbl <- file.path(unzipped_root, glue::glue("emnist-{self$dataset}-{self$split}-labels-idx1-ubyte.gz"))
     dataset_set <- list(read_sn3_pascalvincent(img), read_sn3_pascalvincent(lbl))
-    saveRDS(dataset_set, file.path(self$processed_folder, self$rds_file(self$split, self$kind)))
+    saveRDS(dataset_set, file.path(self$processed_folder, self$rds_file(self$split, self$dataset)))
 
   },
   # only manage existence of the rds file under interest
   check_exists = function() {
-    fs::file_exists(file.path(self$processed_folder, self$rds_file(self$split, self$kind)))
+    fs::file_exists(file.path(self$processed_folder, self$rds_file(self$split, self$dataset)))
   },
 
   .getitem = function(index) {
@@ -492,3 +492,12 @@ read_sn3_pascalvincent <- function(path) {
   a <- aperm(a, perm = rev(seq_along(dim)))
   a
 }
+
+#' @describeIn mnist_dataset Deprecated. Please use emnist_collection.
+#' @param kind the `dataset` in `emnist_collection`.
+#' @param ... the other `emnist_collection` parameters.
+#' @export
+emnist_dataset <- function(kind, ...){
+  .Deprecated("emnist_collection")
+  emnist_collection(dataset = kind, ...)
+}
diff --git a/cran-comments.md b/cran-comments.md
@@ -1 +0,0 @@
-Re-submission to fix function call removed from dependency.

diff --git a/inst/po/fr/LC_MESSAGES/R-torchvision.mo b/inst/po/fr/LC_MESSAGES/R-torchvision.mo
diff --git a/man/mnist_dataset.Rd b/man/mnist_dataset.Rd
diff --git a/po/R-fr.po b/po/R-fr.po
@@ -1,8 +1,8 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: torchvision 0.7.0.9000\n"
-"POT-Creation-Date: 2025-09-13 23:15+0200\n"
-"PO-Revision-Date: 2025-09-13 23:20+0200\n"
+"POT-Creation-Date: 2025-09-28 10:02+0200\n"
+"PO-Revision-Date: 2025-09-28 10:08+0200\n"
 "Last-Translator: Christophe Regouby <[email protected]>\n"
 "Language-Team: \n"
 "Language: fr\n"
@@ -12,7 +12,7 @@ msgstr ""
 "X-Generator: Poedit 3.7\n"
 "X-Poedit-SourceCharset: UTF-8\n"
 
-#: collection-rf100-doc.R:142
+#: collection-rf100-doc.R:139
 msgid ""
 "Dataset {.val {self$dataset}} split {.val {self$split}} of {.cls {class(self)"
 "[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if "
@@ -22,7 +22,7 @@ msgstr ""
 "(de taille ~{.emph {self$archive_size}}) sera téléchargée et traitée si elle "
 "n'est pas déjà disponible."
 
-#: collection-rf100-doc.R:147
+#: collection-rf100-doc.R:144
 msgid ""
 "Dataset not found. Use download=TRUE or check that parquet files exist at "
 "the expected paths."
@@ -31,19 +31,19 @@ msgstr ""
 "télécharger, ou vérifier que le fichier `.parquet` existe à l'emplacement "
 "attendu."
 
-#: collection-rf100-doc.R:158
+#: collection-rf100-doc.R:156
 msgid ""
 "{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images for "
 "split {.val {self$split}}."
 msgstr ""
 "Le jeu de données {.cls {class(self)[[1]]}} est disponible avec "
 "{self$.length()} images pour la partition {.val {self$split}}."
 
-#: collection-rf100-doc.R:163
+#: collection-rf100-doc.R:161
 msgid "Downloading {.val {self$dataset}}..."
 msgstr "Téléchargement de {.val {self$dataset}}..."
 
-#: collection-rf100-doc.R:168
+#: collection-rf100-doc.R:166
 msgid "Corrupt file! Delete the cached files and try again."
 msgstr "Fichier corrompu. Supprimez le fichier en cache et recommencez."
 
@@ -78,7 +78,7 @@ msgstr ""
 #: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:97 dataset-coco.R:289
 #: dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69
 #: dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108
-#: dataset-lfw.R:242 dataset-mnist.R:86 dataset-mnist.R:256 dataset-mnist.R:426
+#: dataset-lfw.R:242 dataset-mnist.R:86 dataset-mnist.R:256
 #: dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:279
 #: dataset-oxfordiiitpet.R:343 dataset-pascal.R:136 dataset-pascal.R:296
 #: dataset-places365.R:98
@@ -93,7 +93,7 @@ msgstr ""
 #: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:102
 #: dataset-coco.R:294 dataset-eurosat.R:64 dataset-fer.R:70
 #: dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:247 dataset-mnist.R:91
-#: dataset-mnist.R:261 dataset-mnist.R:431 dataset-oxfordiiitpet.R:76
+#: dataset-mnist.R:261 dataset-mnist.R:414 dataset-oxfordiiitpet.R:76
 #: dataset-oxfordiiitpet.R:284 dataset-oxfordiiitpet.R:348 dataset-pascal.R:141
 #: dataset-pascal.R:301 dataset-places365.R:103 dataset-plankton.R:89
 #: dataset-rf100-peixos.R:70
@@ -122,7 +122,7 @@ msgstr "Téléchargement de {.cls {class(self)[[1]]}}..."
 #: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:198
 #: dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136
 #: dataset-lfw.R:155 dataset-lfw.R:168 dataset-mnist.R:121 dataset-mnist.R:286
-#: dataset-mnist.R:450 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168
+#: dataset-mnist.R:433 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168
 #: dataset-places365.R:182 dataset-plankton.R:105 dataset-rf100-peixos.R:82
 #: models-facenet.R:128 models-facenet.R:174 models-facenet.R:228
 #: models-facenet.R:311 models-vit.R:49
@@ -281,25 +281,34 @@ msgstr ""
 msgid "Processing {.cls {class(self)[[1]]}}..."
 msgstr "Préparation de {.cls {class(self)[[1]]}} ..."
 
-#: dataset-mnist.R:437
+#: dataset-mnist.R:409
 msgid ""
-"Split {.val {self$split}} of {.cls {class(self)[[1]]}} dataset of kind {.val "
-"{self$kind}} processed successfully!"
+"{.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded "
+"and processed if not already available."
 msgstr ""
-"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} "
-"de type {.val {self$kind}} a été traitée avec succès !"
+"Le jeu de données {.cls {class(self)[[1]]}} (de taille ~{.emph "
+"{self$archive_size}}) sera téléchargé et traité s'il n'est pas déjà "
+"disponible."
+
+#: dataset-mnist.R:420
+msgid ""
+"Split {.val {self$split}} of dataset {.val {self$dataset}} from {.cls "
+"{class(self)[[1]]}} processed successfully!"
+msgstr ""
+"La partition {.val {self$split}} du jeu de données {.val {self$dataset}} de "
+"{.cls {class(self)[[1]]}} a été traitée avec succès !"
 
 #: dataset-places365.R:128
 msgid ""
 "{.cls {class(self)[[1]]}} Split '{self$split}' loaded with {length(self)} "
 "samples."
 msgstr ""
-"{.cls {class(self)[[1]]}} Le pratage '{self$split}' chargé et contient "
-"{length(self)} échantillons."
+"{.cls {class(self)[[1]]}} La partition '{self$split}' est chargée et "
+"contient {length(self)} échantillons."
 
 #: dataset-places365.R:155
 msgid "Invalid split: {self$split}"
-msgstr "Partage non valide : {self$split}"
+msgstr "Partition non valide : {self$split}"
 
 #: dataset-places365.R:163
 msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..."
Original file line number	Diff line number	Diff line change
		@@ -1 +0,0 @@
		Re-submission to fix function call removed from dependency.