7
7
# ' - **Fashion-MNIST**: Clothing item images for classification.
8
8
# ' - **Kuzushiji-MNIST**: Japanese cursive character dataset.
9
9
# ' - **QMNIST**: Extended MNIST with high-precision NIST data.
10
- # ' - **EMNIST**: Letters and digits with multiple label splits.
10
+ # ' - **EMNIST**: A collection of letters and digits with multiple datasets and splits.
11
11
# '
12
12
# ' @param root Root directory for dataset storage. The dataset will be stored under `root/<dataset-name>`. Defaults to `tempdir()`.
13
13
# ' @param train Logical. If TRUE, use the training set; otherwise, use the test set. Not applicable to all datasets.
18
18
# '
19
19
# ' @return A torch dataset object, where each items is a list of `x` (image) and `y` (label).
20
20
# '
21
- # ' @section Supported `kind `s for `emnist_dataset ()`:
21
+ # ' @section Supported `dataset `s for `emnist_collection ()`:
22
22
# ' - `"byclass"`: 62 classes (digits + uppercase + lowercase)
23
23
# ' - `"bymerge"`: 47 classes (merged uppercase and lowercase)
24
24
# ' - `"balanced"`: 47 classes, balanced digits and letters
43
43
# ' item$x
44
44
# ' item$y
45
45
# '
46
- # ' emnist <- emnist_dataset(kind = "balanced", split = "test", download = TRUE)
46
+ # ' emnist <- emnist_collection(dataset = "balanced", split = "test", download = TRUE)
47
47
# ' item <- emnist[1]
48
48
# ' item$x
49
49
# ' item$y
@@ -354,19 +354,19 @@ fashion_mnist_dataset <- dataset(
354
354
)
355
355
)
356
356
357
- # ' @describeIn mnist_dataset EMNIST dataset with digits and letters and multiple split modes .
358
- # ' @param kind change the classes into one of "byclass", "bymerge", "balanced" representing the kind of emnist dataset. You
359
- # ' can look at dataset attribute `$classes` to see the actual classes.
357
+ # ' @describeIn mnist_dataset EMNIST collection with digits and letters arranged in multiple datasets .
358
+ # ' @param dataset one of "byclass", "bymerge", "balanced" representing the subset of emnist collection
359
+ # ' made of a set of classes. You can look at dataset attribute `$classes` to see the actual classes.
360
360
# ' @export
361
- emnist_dataset <- dataset(
362
- name = " emnist_dataset " ,
361
+ emnist_collection <- dataset(
362
+ name = " emnist_collection " ,
363
363
archive_size = " 540 MB" ,
364
364
365
365
resources = list (
366
366
c(" https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip" , " 58c8d27c78d21e728a6bc7b3cc06412e" )
367
367
),
368
368
rds_file = function (split , kind ) paste0(split ," -" ,kind ," .rds" ),
369
- classes_all_kind = list (
369
+ classes_all_dataset = list (
370
370
byclass = c(
371
371
" 0" ," 1" ," 2" ," 3" ," 4" ," 5" ," 6" ," 7" ," 8" ," 9" ,
372
372
LETTERS ,
@@ -390,34 +390,34 @@ emnist_dataset <- dataset(
390
390
initialize = function (
391
391
root = tempdir(),
392
392
split = " test" ,
393
- kind = " balanced" ,
393
+ dataset = " balanced" ,
394
394
transform = NULL ,
395
395
target_transform = NULL ,
396
396
download = FALSE
397
397
) {
398
398
399
399
self $ split <- match.arg(split , choices = c(" train" , " test" ))
400
- self $ kind <- match.arg(kind , choices = names(self $ classes_all_kind ))
400
+ self $ dataset <- match.arg(dataset , choices = names(self $ classes_all_dataset ))
401
401
self $ root_path <- root
402
402
self $ raw_folder <- file.path(root , class(self )[1 ], " raw" )
403
403
self $ processed_folder <- file.path(root , class(self )[1 ], " processed" )
404
404
self $ transform <- transform
405
405
self $ target_transform <- target_transform
406
- self $ class <- self $ classes_all_kind [[self $ kind ]]
406
+ self $ class <- self $ classes_all_dataset [[self $ dataset ]]
407
407
408
408
if (download ) {
409
- cli_inform(" Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available." )
409
+ cli_inform(" {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available." )
410
410
self $ download()
411
411
}
412
412
413
413
if (! self $ check_exists())
414
414
runtime_error(" Dataset not found. You can use `download = TRUE` to download it." )
415
415
416
- dataset_lst <- readRDS(file.path(self $ processed_folder , self $ rds_file(self $ split , self $ kind )))
416
+ dataset_lst <- readRDS(file.path(self $ processed_folder , self $ rds_file(self $ split , self $ dataset )))
417
417
self $ data <- dataset_lst [[1 ]]
418
418
self $ targets <- dataset_lst [[2 ]] + 1L
419
419
420
- cli_inform(" Split {.val {self$split}} of {.cls {class( self)[[1]] }} dataset of kind {.val { self$kind }} processed successfully!" )
420
+ cli_inform(" Split {.val {self$split}} of dataset {.val { self$dataset }} from {.cls {class( self)[[1]] }} processed successfully!" )
421
421
},
422
422
423
423
download = function () {
@@ -440,15 +440,15 @@ emnist_dataset <- dataset(
440
440
unzipped_root <- fs :: dir_ls(unzip_dir , type = " directory" , recurse = FALSE )[1 ]
441
441
442
442
# only manage extraction of the 2 ubyte.gz under interest
443
- img <- file.path(unzipped_root , glue :: glue(" emnist-{self$kind }-{self$split}-images-idx3-ubyte.gz" ))
444
- lbl <- file.path(unzipped_root , glue :: glue(" emnist-{self$kind }-{self$split}-labels-idx1-ubyte.gz" ))
443
+ img <- file.path(unzipped_root , glue :: glue(" emnist-{self$dataset }-{self$split}-images-idx3-ubyte.gz" ))
444
+ lbl <- file.path(unzipped_root , glue :: glue(" emnist-{self$dataset }-{self$split}-labels-idx1-ubyte.gz" ))
445
445
dataset_set <- list (read_sn3_pascalvincent(img ), read_sn3_pascalvincent(lbl ))
446
- saveRDS(dataset_set , file.path(self $ processed_folder , self $ rds_file(self $ split , self $ kind )))
446
+ saveRDS(dataset_set , file.path(self $ processed_folder , self $ rds_file(self $ split , self $ dataset )))
447
447
448
448
},
449
449
# only manage existence of the rds file under interest
450
450
check_exists = function () {
451
- fs :: file_exists(file.path(self $ processed_folder , self $ rds_file(self $ split , self $ kind )))
451
+ fs :: file_exists(file.path(self $ processed_folder , self $ rds_file(self $ split , self $ dataset )))
452
452
},
453
453
454
454
.getitem = function (index ) {
@@ -492,3 +492,12 @@ read_sn3_pascalvincent <- function(path) {
492
492
a <- aperm(a , perm = rev(seq_along(dim )))
493
493
a
494
494
}
495
+
496
+ # ' @describeIn mnist_dataset Deprecated. Please use emnist_collection.
497
+ # ' @param kind the `dataset` in `emnist_collection`.
498
+ # ' @param ... the other `emnist_collection` parameters.
499
+ # ' @export
500
+ emnist_dataset <- function (kind , ... ){
501
+ .Deprecated(" emnist_collection" )
502
+ emnist_collection(dataset = kind , ... )
503
+ }
0 commit comments