update docs for new release

haifeng-jin · haifeng-jin · commit 319040066081 · 2020-10-18T20:50:34.000-05:00
diff --git a/autokeras/tasks/structured_data.py b/autokeras/tasks/structured_data.py
@@ -20,6 +20,7 @@
 from typing import Union
 
 import pandas as pd
+import tensorflow as tf
 from tensorflow.python.util import nest
 
 from autokeras import auto_model
@@ -122,7 +123,7 @@ def fit(
             self._target_col_name = y
             x, y = self._read_from_csv(x, y)
 
-        if validation_data:
+        if validation_data and not isinstance(validation_data, tf.data.Dataset):
             x_val, y_val = validation_data
             if isinstance(x_val, str):
                 validation_data = self._read_from_csv(x_val, y_val)
diff --git a/autokeras/utils/io_utils.py b/autokeras/utils/io_utils.py
@@ -229,14 +229,8 @@ def image_dataset_from_directory(
     )
 
     images = tf.data.Dataset.from_tensor_slices(image_paths)
-    images = images.map(tf.io.read_file)
     images = images.map(
-        lambda img: tf.io.decode_image(
-            img, channels=num_channels, expand_animations=False
-        )
-    )
-    images = images.map(
-        lambda img: tf.image.resize(img, image_size, method=interpolation)
+        lambda img: path_to_image(img, num_channels, image_size, interpolation)
     )
 
     labels = np.array(class_names)[np.array(labels)]
@@ -245,3 +239,11 @@ def image_dataset_from_directory(
     dataset = tf.data.Dataset.zip((images, labels))
     dataset = dataset.batch(batch_size)
     return dataset
+
+
+def path_to_image(image, num_channels, image_size, interpolation):
+    image = tf.io.read_file(image)
+    image = tf.io.decode_image(image, channels=num_channels, expand_animations=False)
+    image = tf.image.resize(image, image_size, method=interpolation)
+    image.set_shape((image_size[0], image_size[1], num_channels))
+    return image
diff --git a/docs/autogen.py b/docs/autogen.py
@@ -88,6 +88,10 @@
         'autokeras.ClassificationHead',
         'autokeras.RegressionHead',
     ],
+    'utils.md': [
+        'autokeras.image_dataset_from_directory',
+        'autokeras.text_dataset_from_directory',
+    ]
 }
 
 
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -42,6 +42,7 @@ nav:
     - Multi-Modal and Multi-Task: tutorial/multi.md
     - Customized Model: tutorial/customized.md
     - Export Model: tutorial/export.md
+    - Load Data from Disk: tutorial/load.md
     - FAQ: tutorial/faq.md
   - Extensions:
     - TensorFlow Cloud: extensions/tf_cloud.md
@@ -59,4 +60,5 @@ nav:
     - Base Class: base.md
     - Node: node.md
     - Block: block.md
+    - Utils: utils.md
   - About: about.md
diff --git a/docs/py/customized.py b/docs/py/customized.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/docs/py/export.py b/docs/py/export.py
@@ -9,7 +9,7 @@
 
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 import tensorflow as tf
diff --git a/docs/py/image_classification.py b/docs/py/image_classification.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/docs/py/image_regression.py b/docs/py/image_regression.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/docs/py/load.py b/docs/py/load.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
@@ -10,18 +10,19 @@
 First, we download the data and extract the files.
 """
 
+import autokeras as ak
 import tensorflow as tf
 import os
 
-# dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
-# local_file_path = tf.keras.utils.get_file(origin=dataset_url, 
-                                          # fname='image_data', 
-                                          # extract=True)
-# # The file is extracted in the same directory as the downloaded file.
-# local_dir_path = os.path.dirname(local_file_path)
-# # After check mannually, we know the extracted data is in 'flower_photos'.
-# data_dir = os.path.join(local_dir_path, 'flower_photos')
-# print(data_dir)
+dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
+local_file_path = tf.keras.utils.get_file(origin=dataset_url, 
+                                          fname='image_data', 
+                                          extract=True)
+# The file is extracted in the same directory as the downloaded file.
+local_dir_path = os.path.dirname(local_file_path)
+# After check mannually, we know the extracted data is in 'flower_photos'.
+data_dir = os.path.join(local_dir_path, 'flower_photos')
+print(data_dir)
 
 """
 The directory should look like this. Each folder contains the images in the same class.
@@ -42,33 +43,31 @@
 img_height = 180
 img_width = 180
 
-# train_data = tf.keras.preprocessing.image_dataset_from_directory(
-    # data_dir,
-    # # Use 20% data as testing data.
-    # validation_split=0.2,
-    # subset="training",
-    # # Set seed to ensure the same split when loading testing data.
-    # seed=123,
-    # image_size=(img_height, img_width),
-    # batch_size=batch_size)
-
-# test_data = tf.keras.preprocessing.image_dataset_from_directory(
-    # data_dir,
-    # validation_split=0.2,
-    # subset="validation",
-    # seed=123,
-    # image_size=(img_height, img_width),
-    # batch_size=batch_size)
+train_data = ak.image_dataset_from_directory(
+    data_dir,
+    # Use 20% data as testing data.
+    validation_split=0.2,
+    subset="training",
+    # Set seed to ensure the same split when loading testing data.
+    seed=123,
+    image_size=(img_height, img_width),
+    batch_size=batch_size)
+
+test_data = ak.image_dataset_from_directory(
+    data_dir,
+    validation_split=0.2,
+    subset="validation",
+    seed=123,
+    image_size=(img_height, img_width),
+    batch_size=batch_size)
 
 """
 Then we just do one quick demo of AutoKeras to make sure the dataset works.
 """
 
-import autokeras as ak
-
-# clf = ak.ImageClassifier(overwrite=True, max_trials=1)
-# clf.fit(train_data, epochs=1)
-# print(clf.evaluate(test_data))
+clf = ak.ImageClassifier(overwrite=True, max_trials=1)
+clf.fit(train_data, epochs=1)
+print(clf.evaluate(test_data))
 
 """
 You can also load text datasets in the same way.
@@ -94,76 +93,59 @@
 For this dataset, the data is already split into train and test.
 We just load them separately.
 """
-print(data_dir)
-train_data = tf.keras.preprocessing.text_dataset_from_directory(
-    os.path.join(data_dir, 'train'),
-    class_names=['pos', 'neg'],
-    validation_split=0.2,
-    subset="training",
-    # shuffle=False,
-    seed=123,
-    batch_size=batch_size)
 
-val_data = tf.keras.preprocessing.text_dataset_from_directory(
+print(data_dir)
+train_data = ak.text_dataset_from_directory(
     os.path.join(data_dir, 'train'),
-    class_names=['pos', 'neg'],
-    validation_split=0.2,
-    subset="validation",
-    # shuffle=False,
-    seed=123,
     batch_size=batch_size)
 
-test_data = tf.keras.preprocessing.text_dataset_from_directory(
+test_data = ak.text_dataset_from_directory(
     os.path.join(data_dir, 'test'),
-    class_names=['pos', 'neg'],
     shuffle=False,
     batch_size=batch_size)
 
-for x, y in train_data:
-    print(x.numpy()[0])
-    print(y.numpy()[0])
-    # record_x = x.numpy()
-    # record_y = y.numpy()
-    break
-
-for x, y in train_data:
-    print(x.numpy()[0])
-    print(y.numpy()[0])
-    break
-
-# train_data = tf.keras.preprocessing.text_dataset_from_directory(
-    # os.path.join(data_dir, 'train'),
-    # class_names=['pos', 'neg'],
-    # shuffle=True,
-    # seed=123,
-    # batch_size=batch_size)
-
-# for x, y in train_data:
-    # for i, a in enumerate(x.numpy()):
-        # for j, b in enumerate(record_x):
-            # if a == b:
-                # print('*')
-                # assert record_y[j] == y.numpy()[i]
-
-# import numpy as np
-# x_train = []
-# y_train = []
-# for x, y in train_data:
-    # for a in x.numpy():
-        # x_train.append(a)
-    # for a in y.numpy():
-        # y_train.append(a)
-
-# x_train = np.array(x_train)
-# y_train = np.array(y_train)
-
-# train_data = train_data.shuffle(1000, seed=123, reshuffle_each_iteration=False)
-
-
-clf = ak.TextClassifier(overwrite=True, max_trials=2)
-# clf.fit(train_data, validation_data=test_data)
-# clf.fit(train_data, validation_data=train_data)
-clf.fit(train_data, validation_data=val_data)
-# clf.fit(x_train, y_train)
-# clf.fit(train_data)
+clf = ak.TextClassifier(overwrite=True, max_trials=1)
+clf.fit(train_data, epochs=2)
 print(clf.evaluate(test_data))
+
+
+"""
+If you want to use generators, you can refer to the following code.
+"""
+
+import math
+
+import numpy as np
+
+N_BATCHES = 30
+BATCH_SIZE = 100
+N_FEATURES = 10
+
+
+def get_data_generator(n_batches, batch_size, n_features):
+    """Get a generator returning n_batches random data of batch_size with n_features."""
+
+    def data_generator():
+        for _ in range(n_batches * batch_size):
+            x = np.random.randn(n_features)
+            y = x.sum(axis=0) / n_features > 0.5
+            yield x, y
+
+    return data_generator
+
+
+dataset = tf.data.Dataset.from_generator(
+    get_data_generator(N_BATCHES, BATCH_SIZE, N_FEATURES),
+    output_types=(tf.float32, tf.float32),
+    output_shapes=((N_FEATURES,), tuple()),
+).batch(BATCH_SIZE)
+
+clf = ak.StructuredDataClassifier(overwrite=True, max_trials=1, seed=5)
+clf.fit(x=dataset, validation_data=dataset, batch_size=BATCH_SIZE)
+print(clf.evaluate(dataset))
+
+"""
+## Reference
+[image_dataset_from_directory](utils/#image_dataset_from_directory-function)
+[text_dataset_from_directory](utils/#text_dataset_from_directory-function)
+"""
diff --git a/docs/py/multi.py b/docs/py/multi.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/docs/py/structured_data_classification.py b/docs/py/structured_data_classification.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/docs/py/structured_data_regression.py b/docs/py/structured_data_regression.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/docs/py/text_classification.py b/docs/py/text_classification.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/docs/py/text_regression.py b/docs/py/text_regression.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc2
+pip install git+https://github.com/keras-team/keras-tuner.git@1.0.2rc3
 """
 
 """
diff --git a/tests/autokeras/utils/io_utils_test.py b/tests/autokeras/utils/io_utils_test.py
@@ -116,3 +116,16 @@ def test_load_image_data_grey_scale():
     io_utils.image_dataset_from_directory(
         IMG_DATA_DIR, image_size=(180, 180), color_mode="grayscale"
     )
+
+
+def test_path_to_image():
+    img_dir = os.path.join(IMG_DATA_DIR, "roses")
+    assert isinstance(
+        io_utils.path_to_image(
+            os.path.join(img_dir, os.listdir(img_dir)[5]),
+            num_channels=3,
+            image_size=(180, 180),
+            interpolation="bilinear",
+        ),
+        tf.Tensor,
+    )
diff --git a/tests/performance.py b/tests/performance.py
@@ -30,7 +30,7 @@ def test_mnist_accuracy_over_98(tmp_path):
 
 def test_cifar10_accuracy_over_93(tmp_path):
     (x_train, y_train), (x_test, y_test) = cifar10.load_data()
-    clf = ak.ImageClassifier(max_trials=2, directory=tmp_path)
+    clf = ak.ImageClassifier(max_trials=3, directory=tmp_path)
     clf.fit(x_train, y_train, epochs=5)
     accuracy = clf.evaluate(x_test, y_test)[1]
     assert accuracy >= 0.93

Original file line number	Diff line number	Diff line change
`@@ -88,6 +88,10 @@`
`88`	`88`	`'autokeras.ClassificationHead',`
`89`	`89`	`'autokeras.RegressionHead',`
`90`	`90`	`],`
	`91`	`+ 'utils.md': [`
	`92`	`+ 'autokeras.image_dataset_from_directory',`
	`93`	`+ 'autokeras.text_dataset_from_directory',`
	`94`	`+ ]`
`91`	`95`	`}`
`92`	`96`
`93`	`97`
-Original file line number
+Diff line change
@@ @@ -1,6 +1,6 @@ @@
 """shell
 pip install autokeras
 -pip install git+https://github.com/keras-team/[email protected].2rc2
 +pip install git+https://github.com/keras-team/[email protected].2rc3
 """
 """