Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export(coco_detection_dataset)
export(draw_bounding_boxes)
export(draw_keypoints)
export(draw_segmentation_masks)
export(emnist_collection)
export(emnist_dataset)
export(eurosat100_dataset)
export(eurosat_all_bands_dataset)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
* Breaking Change : Refactoring of `coco_*` dataset family now provides each `item$x` being an image array (for consistency with other datasets).
You can use `transform = transform_to_tensor` to restore the previous x output to be a `torch_tensor()`.
* `transform_` are now documented into 3 different categories: unitary transformations, random transformations and combining transformations. (@cregouby, #250)
* Deprecation : `emnist_dataset` is deprecated in favor of `emnist_collection()` (@cregouby, #260).

# torchvision 0.7.0

Expand Down
47 changes: 28 additions & 19 deletions R/dataset-mnist.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' - **Fashion-MNIST**: Clothing item images for classification.
#' - **Kuzushiji-MNIST**: Japanese cursive character dataset.
#' - **QMNIST**: Extended MNIST with high-precision NIST data.
#' - **EMNIST**: Letters and digits with multiple label splits.
#' - **EMNIST**: A collection of letters and digits with multiple datasets and splits.
#'
#' @param root Root directory for dataset storage. The dataset will be stored under `root/<dataset-name>`. Defaults to `tempdir()`.
#' @param train Logical. If TRUE, use the training set; otherwise, use the test set. Not applicable to all datasets.
Expand All @@ -18,7 +18,7 @@
#'
#' @return A torch dataset object, where each items is a list of `x` (image) and `y` (label).
#'
#' @section Supported `kind`s for `emnist_dataset()`:
#' @section Supported `dataset`s for `emnist_collection()`:
#' - `"byclass"`: 62 classes (digits + uppercase + lowercase)
#' - `"bymerge"`: 47 classes (merged uppercase and lowercase)
#' - `"balanced"`: 47 classes, balanced digits and letters
Expand All @@ -43,7 +43,7 @@
#' item$x
#' item$y
#'
#' emnist <- emnist_dataset(kind = "balanced", split = "test", download = TRUE)
#' emnist <- emnist_collection(dataset = "balanced", split = "test", download = TRUE)
#' item <- emnist[1]
#' item$x
#' item$y
Expand Down Expand Up @@ -354,19 +354,19 @@ fashion_mnist_dataset <- dataset(
)
)

#' @describeIn mnist_dataset EMNIST dataset with digits and letters and multiple split modes.
#' @param kind change the classes into one of "byclass", "bymerge", "balanced" representing the kind of emnist dataset. You
#' can look at dataset attribute `$classes` to see the actual classes.
#' @describeIn mnist_dataset EMNIST collection with digits and letters arranged in multiple datasets.
#' @param dataset one of "byclass", "bymerge", "balanced" representing the subset of emnist collection
#' made of a set of classes. You can look at dataset attribute `$classes` to see the actual classes.
#' @export
emnist_dataset <- dataset(
name = "emnist_dataset",
emnist_collection <- dataset(
name = "emnist_collection",
archive_size = "540 MB",

resources = list(
c("https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip", "58c8d27c78d21e728a6bc7b3cc06412e")
),
rds_file = function(split, kind) paste0(split,"-",kind,".rds"),
classes_all_kind = list(
classes_all_dataset = list(
byclass = c(
"0","1","2","3","4","5","6","7","8","9",
LETTERS,
Expand All @@ -390,34 +390,34 @@ emnist_dataset <- dataset(
initialize = function(
root = tempdir(),
split = "test",
kind = "balanced",
dataset = "balanced",
transform = NULL,
target_transform = NULL,
download = FALSE
) {

self$split <- match.arg(split, choices = c("train", "test"))
self$kind <- match.arg(kind, choices = names(self$classes_all_kind))
self$dataset <- match.arg(dataset, choices = names(self$classes_all_dataset))
self$root_path <- root
self$raw_folder <- file.path(root, class(self)[1], "raw")
self$processed_folder <- file.path(root, class(self)[1], "processed")
self$transform <- transform
self$target_transform <- target_transform
self$class <- self$classes_all_kind[[self$kind]]
self$class <- self$classes_all_dataset[[self$dataset]]

if (download) {
cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
cli_inform("{.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
self$download()
}

if (!self$check_exists())
runtime_error("Dataset not found. You can use `download = TRUE` to download it.")

dataset_lst <- readRDS(file.path(self$processed_folder, self$rds_file(self$split, self$kind)))
dataset_lst <- readRDS(file.path(self$processed_folder, self$rds_file(self$split, self$dataset)))
self$data <- dataset_lst[[1]]
self$targets <- dataset_lst[[2]] + 1L

cli_inform("Split {.val {self$split}} of {.cls {class(self)[[1]]}} dataset of kind {.val {self$kind}} processed successfully!")
cli_inform("Split {.val {self$split}} of dataset {.val {self$dataset}} from {.cls {class(self)[[1]]}} processed successfully!")
},

download = function() {
Expand All @@ -440,15 +440,15 @@ emnist_dataset <- dataset(
unzipped_root <- fs::dir_ls(unzip_dir, type = "directory", recurse = FALSE)[1]

# only manage extraction of the 2 ubyte.gz under interest
img <- file.path(unzipped_root, glue::glue("emnist-{self$kind}-{self$split}-images-idx3-ubyte.gz"))
lbl <- file.path(unzipped_root, glue::glue("emnist-{self$kind}-{self$split}-labels-idx1-ubyte.gz"))
img <- file.path(unzipped_root, glue::glue("emnist-{self$dataset}-{self$split}-images-idx3-ubyte.gz"))
lbl <- file.path(unzipped_root, glue::glue("emnist-{self$dataset}-{self$split}-labels-idx1-ubyte.gz"))
dataset_set <- list(read_sn3_pascalvincent(img), read_sn3_pascalvincent(lbl))
saveRDS(dataset_set, file.path(self$processed_folder, self$rds_file(self$split, self$kind)))
saveRDS(dataset_set, file.path(self$processed_folder, self$rds_file(self$split, self$dataset)))

},
# only manage existence of the rds file under interest
check_exists = function() {
fs::file_exists(file.path(self$processed_folder, self$rds_file(self$split, self$kind)))
fs::file_exists(file.path(self$processed_folder, self$rds_file(self$split, self$dataset)))
},

.getitem = function(index) {
Expand Down Expand Up @@ -492,3 +492,12 @@ read_sn3_pascalvincent <- function(path) {
a <- aperm(a, perm = rev(seq_along(dim)))
a
}

#' @describeIn mnist_dataset Deprecated. Please use emnist_collection.
#' @param kind the `dataset` in `emnist_collection`.
#' @param ... the other `emnist_collection` parameters.
#' @export
emnist_dataset <- function(kind, ...){
.Deprecated("emnist_collection")
emnist_collection(dataset = kind, ...)
}
1 change: 0 additions & 1 deletion cran-comments.md
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
Re-submission to fix function call removed from dependency.
Binary file modified inst/po/fr/LC_MESSAGES/R-torchvision.mo
Binary file not shown.
25 changes: 17 additions & 8 deletions man/mnist_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 27 additions & 18 deletions po/R-fr.po
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
msgid ""
msgstr ""
"Project-Id-Version: torchvision 0.7.0.9000\n"
"POT-Creation-Date: 2025-09-13 23:15+0200\n"
"PO-Revision-Date: 2025-09-13 23:20+0200\n"
"POT-Creation-Date: 2025-09-28 10:02+0200\n"
"PO-Revision-Date: 2025-09-28 10:08+0200\n"
"Last-Translator: Christophe Regouby <[email protected]>\n"
"Language-Team: \n"
"Language: fr\n"
Expand All @@ -12,7 +12,7 @@ msgstr ""
"X-Generator: Poedit 3.7\n"
"X-Poedit-SourceCharset: UTF-8\n"

#: collection-rf100-doc.R:142
#: collection-rf100-doc.R:139
msgid ""
"Dataset {.val {self$dataset}} split {.val {self$split}} of {.cls {class(self)"
"[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if "
Expand All @@ -22,7 +22,7 @@ msgstr ""
"(de taille ~{.emph {self$archive_size}}) sera téléchargée et traitée si elle "
"n'est pas déjà disponible."

#: collection-rf100-doc.R:147
#: collection-rf100-doc.R:144
msgid ""
"Dataset not found. Use download=TRUE or check that parquet files exist at "
"the expected paths."
Expand All @@ -31,19 +31,19 @@ msgstr ""
"télécharger, ou vérifier que le fichier `.parquet` existe à l'emplacement "
"attendu."

#: collection-rf100-doc.R:158
#: collection-rf100-doc.R:156
msgid ""
"{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images for "
"split {.val {self$split}}."
msgstr ""
"Le jeu de données {.cls {class(self)[[1]]}} est disponible avec "
"{self$.length()} images pour la partition {.val {self$split}}."

#: collection-rf100-doc.R:163
#: collection-rf100-doc.R:161
msgid "Downloading {.val {self$dataset}}..."
msgstr "Téléchargement de {.val {self$dataset}}..."

#: collection-rf100-doc.R:168
#: collection-rf100-doc.R:166
msgid "Corrupt file! Delete the cached files and try again."
msgstr "Fichier corrompu. Supprimez le fichier en cache et recommencez."

Expand Down Expand Up @@ -78,7 +78,7 @@ msgstr ""
#: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:97 dataset-coco.R:289
#: dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69
#: dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108
#: dataset-lfw.R:242 dataset-mnist.R:86 dataset-mnist.R:256 dataset-mnist.R:426
#: dataset-lfw.R:242 dataset-mnist.R:86 dataset-mnist.R:256
#: dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:279
#: dataset-oxfordiiitpet.R:343 dataset-pascal.R:136 dataset-pascal.R:296
#: dataset-places365.R:98
Expand All @@ -93,7 +93,7 @@ msgstr ""
#: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:102
#: dataset-coco.R:294 dataset-eurosat.R:64 dataset-fer.R:70
#: dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:247 dataset-mnist.R:91
#: dataset-mnist.R:261 dataset-mnist.R:431 dataset-oxfordiiitpet.R:76
#: dataset-mnist.R:261 dataset-mnist.R:414 dataset-oxfordiiitpet.R:76
#: dataset-oxfordiiitpet.R:284 dataset-oxfordiiitpet.R:348 dataset-pascal.R:141
#: dataset-pascal.R:301 dataset-places365.R:103 dataset-plankton.R:89
#: dataset-rf100-peixos.R:70
Expand Down Expand Up @@ -122,7 +122,7 @@ msgstr "Téléchargement de {.cls {class(self)[[1]]}}..."
#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:198
#: dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136
#: dataset-lfw.R:155 dataset-lfw.R:168 dataset-mnist.R:121 dataset-mnist.R:286
#: dataset-mnist.R:450 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168
#: dataset-mnist.R:433 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168
#: dataset-places365.R:182 dataset-plankton.R:105 dataset-rf100-peixos.R:82
#: models-facenet.R:128 models-facenet.R:174 models-facenet.R:228
#: models-facenet.R:311 models-vit.R:49
Expand Down Expand Up @@ -281,25 +281,34 @@ msgstr ""
msgid "Processing {.cls {class(self)[[1]]}}..."
msgstr "Préparation de {.cls {class(self)[[1]]}} ..."

#: dataset-mnist.R:437
#: dataset-mnist.R:409
msgid ""
"Split {.val {self$split}} of {.cls {class(self)[[1]]}} dataset of kind {.val "
"{self$kind}} processed successfully!"
"{.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded "
"and processed if not already available."
msgstr ""
"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} "
"de type {.val {self$kind}} a été traitée avec succès !"
"Le jeu de données {.cls {class(self)[[1]]}} (de taille ~{.emph "
"{self$archive_size}}) sera téléchargé et traité s'il n'est pas déjà "
"disponible."

#: dataset-mnist.R:420
msgid ""
"Split {.val {self$split}} of dataset {.val {self$dataset}} from {.cls "
"{class(self)[[1]]}} processed successfully!"
msgstr ""
"La partition {.val {self$split}} du jeu de données {.val {self$dataset}} de "
"{.cls {class(self)[[1]]}} a été traitée avec succès !"

#: dataset-places365.R:128
msgid ""
"{.cls {class(self)[[1]]}} Split '{self$split}' loaded with {length(self)} "
"samples."
msgstr ""
"{.cls {class(self)[[1]]}} Le pratage '{self$split}' chargé et contient "
"{length(self)} échantillons."
"{.cls {class(self)[[1]]}} La partition '{self$split}' est chargée et "
"contient {length(self)} échantillons."

#: dataset-places365.R:155
msgid "Invalid split: {self$split}"
msgstr "Partage non valide : {self$split}"
msgstr "Partition non valide : {self$split}"

#: dataset-places365.R:163
msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..."
Expand Down
Loading
Loading