Tensorflow Cheatsheet

This is a cheatsheet for Tensorflow 2.0.

Install Tensorflow

# Upgrade pip pip install --upgrade pip
# Install the current stable release of tensorflow. pip install tensorflow

Tensorflow APIs

Tensor

# Define a constant tensor. tf.constant(5)
# Define another constant tensor. tf.constant([[1.0, 2.0], [3.0, 4.0]])
# Compute a tensor from two constant tensors. t1 = tf.constant([[1.0, 2.0], [3.0, 4.0]]) t2 = tf.constant([[1.0, 1.0], [0.0, 1.0]]) t3 = tf.matmul(t1, t2)
# Get the tensor device. t.device
# Get DType of the tensor. t.dtype
# Get the tensor name. t.name
# Get the graph containing this tensor. t.graph
# Get the Operation that produces this tensor as an output. t.op
# Get the tensor shape. t.shape
# Get the index of this tensor in the outputs of its Operation. t.value_index

RaggedTensor

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]> tf.RaggedTensor.from_row_splits( values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8] )
# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]> tf.RaggedTensor.from_row_lengths( values=[3, 1, 4, 1, 5, 9, 2, 6], row_lengths=[4, 0, 3, 1, 0] )
# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]> tf.RaggedTensor.from_value_rowids( values=[3, 1, 4, 1, 5, 9, 2, 6], value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5 )
# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]> tf.RaggedTensor.from_row_starts( values=[3, 1, 4, 1, 5, 9, 2, 6], row_starts=[0, 4, 4, 7, 8] )
# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]> tf.RaggedTensor.from_row_limits( values=[3, 1, 4, 1, 5, 9, 2, 6], row_limits=[4, 4, 7, 8, 8] )
# The tensor value is <tf.RaggedTensor [[3, 1], [4, 1], [5, 9], [2, 6]]> tf.RaggedTensor.from_uniform_row_length( values=[3, 1, 4, 1, 5, 9, 2, 6], uniform_row_length=2 )

SparseTensor

# Defines a sparse tensor representing the following dense tensor: # [[1, 0, 0, 0] # [0, 0, 2, 0] # [0, 0, 0, 0]] SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])

Variable

# Create a variable. v = tf.Variable(1.)
# Assign 2.0 to the variable. v.assign(2.)
# Add 0.5 to the variable. v.assign_add(0.5)
# Substract 0.5 from the variable. v.assign_sub(0.5)
# Matmul a variable and a constant tensor. w = tf.Variable([[1.], [2.]]) x = tf.constant([[3., 4.]]) tf.matmul(w, x)
# Variable can only be created once within a tf.function. class M(tf.Module): @tf.function def __call__(self, x): if not hasattr(self, "v"): # Or set self.v to None in __init__ self.v = tf.Variable(x) return self.v * x

tf.data

# Load dataset using range. tf.data.Dataset.range(5) # [0, 1, 2, 3, 4] tf.data.Dataset.range(2, 5) # [2, 3, 4] tf.data.Dataset.range(1, 5, 2) # [1, 3] tf.data.Dataset.range(1, 5, -2) # [] tf.data.Dataset.range(5, 1) # [] tf.data.Dataset.range(5, 1, -2) # [5, 3] tf.data.Dataset.range(2, 5, output_type=tf.int32) # [2, 3, 4] tf.data.Dataset.range(1, 5, 2, output_type=tf.float32) # [1.0, 3.0]
# Load tf data from python array dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
# Load dataset from txt files. dataset = tf.data.TextLineDataset(["file1.txt", "file2.txt"])
# Load data from tfrecords files. dataset = tf.data.TFRecordDataset(["file1.tfrecords", "file2.tfrecords"])
# Create a dataset using all files matching a pattern. dataset = tf.data.Dataset.list_files("/path/*.txt")
# Split dataset into batches. dataset = tf.data.Dataset.range(8) dataset = dataset.batch(3) # The dataset value is [[0, 1, 2], [3, 4, 5], [6, 7]]
# Transform a dataset. dataset = dataset.map(lambda x: x*2)
# Prefetch a dataset. dataset = tf.data.Dataset.range(3) dataset = dataset.prefetch(2)
# Repeat a dataset. dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) dataset = dataset.repeat(3) # [1, 2, 3, 1, 2, 3, 1, 2, 3]
# Shuttle a dataset. dataset = tf.data.Dataset.range(3) dataset = dataset.shuffle(3, reshuffle_each_iteration=False) dataset = dataset.repeat(2) # [1, 0, 2, 1, 0, 2]
# Concat a dataset. a = tf.data.Dataset.range(1, 4) # [1, 2, 3] b = tf.data.Dataset.range(4, 8) # [4, 5, 6, 7] ds = a.concatenate(b) # [1, 2, 3, 4, 5, 6, 7]
# Zip two datasets. a = tf.data.Dataset.range(1, 4) # [1, 2, 3] b = tf.data.Dataset.range(4, 7) # [4, 5, 6] ds = tf.data.Dataset.zip((a, b)) # [(1, 4), (2, 5), (3, 6)]
# Iterating data in tf.data. for element in dataset: print(element)

tf.math

# Get absolute values. x = tf.constant([-2.25, 3.25]) tf.abs(x) # [2.25, 3.25]
# Add a scalar and a list. tf.add([1, 2, 3, 4, 5], 1) # [2, 3, 4, 5, 6]
# Add two tensors. x = tf.convert_to_tensor([1, 2, 3, 4, 5]) y = tf.convert_to_tensor(1) z = x + y # [2, 3, 4, 5, 6]
# Add a list and a tensor. x = [1, 2, 3, 4, 5] y = tf.constant([1, 2, 3, 4, 5]) tf.add(x, y)
# Add n tensors. a = tf.constant([[3, 5], [4, 8]]) b = tf.constant([[1, 6], [2, 9]]) tf.math.add_n([a, b, a]) # [[7, 16], [10, 25]]
# Get the cumulative sum. x = tf.constant([2, 4, 6, 8]) tf.cumsum(x) # [2, 6, 12, 20]
# Get the cumulative sum for certain axis. y = tf.constant([[2, 4, 6, 8], [1, 3, 5, 7]]) tf.cumsum(y, axis=0) # [[2, 4, 6, 8], [3, 7, 11, 15]] tf.cumsum(y, axis=1) # [[2, 6, 12, 20], [1, 4, 9, 16]]
# Get the exclusive cumulative sum. x = tf.constant([2, 4, 6, 8]) tf.cumsum(x, exclusive=True) # [0, 2, 6, 12]
# Get the reverse cumulative sum. x = tf.constant([2, 4, 6, 8]) tf.cumsum(x, reverse=True) # [18, 14, 8, 0]
# Divide tensors. x = tf.constant([16, 12, 11]) y = tf.constant([4, 6, 2]) tf.divide(x, y) # [4.0, 2.0, 5.5]
# Get tensor equals. x = tf.constant([2, 4]) y = tf.constant(2) tf.math.equal(x, y) # [True, False]
# Get tensor equals. x = tf.constant([2, 4]) y = tf.constant([2, 4]) tf.math.equal(x, y) # [True, True]
# Multiply tensors. x = tf.constant(([1, 2, 3, 4])) tf.math.multiply(x, x) # [1, 4, 9, 16]
# Multiple tensors of different shapes with broadcast. x = tf.ones([1, 2]); y = tf.ones([2, 1]); x * y # [[1.0, 1.0], [1.0, 1.0]]
# Compute the power of one value to another. x = tf.constant([[2, 2], [3, 3]]) y = tf.constant([[8, 16], [2, 3]]) tf.pow(x, y) # [[256, 65536], [9, 27]]
# Compute sigmoid of a tensor. x = tf.constant([0.0, 1.0, 50.0, 100.0]) tf.math.sigmoid(x) # [0.5, 0.7310586, 1.0, 1.0]

tf.linalg

# Transpose a matrix. x = tf.constant([[1, 2, 3], [4, 5, 6]]) tf.linalg.matrix_transpose(x) # [[1, 4], [2, 5], [3, 6]]
# Matmul two tensors. a = tf.constant([[1, 2, 3], [4, 5, 6]]) b = tf.constant([[7, 8], [9, 10], [11, 12]]) c = tf.matmul(a, b) # [[58, 64], [139, 154]]

tf.distribute

################################################################################ # Define a mirrored strategy, and create a variable in it. # The variable will be mirrored on both GPU:0 and GPU:1. ################################################################################ strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1"]) with strategy.scope(): x = tf.Variable(1.)
################################################################################ # Variables (e.g., x in this example) created in tf.function is still mirrored. ################################################################################ x = [] @tf.function # Wrap the function with tf.function. def create_variable(): if not x: x.append(tf.Variable(1.)) return x[0] strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1"]) with strategy.scope(): _ = create_variable()
################################################################################ # Dataset can also be mirrored to multiple devices within the MirroredStrategy. ################################################################################ my_strategy = tf.distribute.MirroredStrategy() with my_strategy.scope(): @tf.function def distribute_train_epoch(dataset): def replica_fn(input): # process input and return result return result
total_result = 0 for x in dataset: per_replica_result = my_strategy.run(replica_fn, args=(x,)) total_result += my_strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_result, axis=None) return total_result
dist_dataset = my_strategy.experimental_distribute_dataset(dataset) for _ in range(EPOCHS): train_result = distribute_train_epoch(dist_dataset)
################################################################################ # MultiWorkerMirroredStrategy is used for distributed training. ################################################################################ strategy = tf.distribute.MultiWorkerMirroredStrategy()
@tf.function def train_step(iterator): def step_fn(inputs): features, labels = inputs with tf.GradientTape() as tape: logits = model(features, training=True) loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits)
grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables))
strategy.run(step_fn, args=(next(iterator),))
for _ in range(NUM_STEP): train_step(iterator)
################################################################################ # Use TPUStrategy to train a model on TPUs. ################################################################################ resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='') tf.config.experimental_connect_to_cluster(resolver) tf.tpu.experimental.initialize_tpu_system(resolver) strategy = tf.distribute.TPUStrategy(resolver)
with strategy.scope(): model = tf.keras.Sequential([tf.keras.layers.Dense(2, input_shape=(5,))]) optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
def dataset_fn(ctx): x = np.random.random((2, 5)).astype(np.float32) y = np.random.randint(2, size=(2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)) return dataset.repeat().batch(1, drop_remainder=True)
dist_dataset = strategy.distribute_datasets_from_function(dataset_fn) iterator = iter(dist_dataset)
@tf.function() def train_step(iterator): def step_fn(inputs): features, labels = inputs with tf.GradientTape() as tape: logits = model(features, training=True) loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits)
grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables))
strategy.run(step_fn, args=(next(iterator),))
train_step(iterator)

tf.saved_model

# Define a tf module. class Adder(tf.Module): @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.float32)]) def add(self, x): return x + x
# Save the tf module as our model. model = Adder() tf.saved_model.save(model, '/tmp/adder')
# Load the model. loaded_model = tf.saved_model.load('/tmp/adder') loaded_model.add(1.) # Returns a tensor with a value of 2.0.

tf.keras

Model

# Defining a keras model. import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self): super().__init__() self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)
def call(self, inputs): x = self.dense1(inputs) return self.dense2(x)
model = MyModel()

Sequential

# Optionally, the first layer can receive an `input_shape` argument: model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(8, input_shape=(16,))) # Afterwards, we do automatic shape inference: model.add(tf.keras.layers.Dense(4))
# This is identical to the following: model = tf.keras.Sequential() model.add(tf.keras.Input(shape=(16,))) model.add(tf.keras.layers.Dense(8))
# Note that you can also omit the `input_shape` argument. # In that case the model doesn't have any weights until the first call # to a training/evaluation method (since it isn't yet built): model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(8)) model.add(tf.keras.layers.Dense(4)) # model.weights not created yet
# Whereas if you specify the input shape, the model gets built # continuously as you are adding layers: model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(8, input_shape=(16,))) model.add(tf.keras.layers.Dense(4)) len(model.weights) # Returns "4"
# When using the delayed-build pattern (no input shape specified), you can # choose to manually build your model by calling # `build(batch_input_shape)`: model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(8)) model.add(tf.keras.layers.Dense(4)) model.build((None, 16)) len(model.weights) # Returns "4"
# Note that when using the delayed-build pattern (no input shape specified), # the model gets built the first time you call `fit`, `eval`, or `predict`, # or the first time you call the model on some input data. model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(8)) model.add(tf.keras.layers.Dense(1)) model.compile(optimizer='sgd', loss='mse') # This builds the model for the first time: model.fit(x, y, batch_size=32, epochs=10)

Layers

# Define a relu activation layer. layer = tf.keras.layers.Activation('relu') output = layer([-3.0, -1.0, 0.0, 2.0]) # [0.0, 0.0, 0.0, 2.0]
# Define an Add layer. input_shape = (2, 3, 4) x1 = tf.random.normal(input_shape) x2 = tf.random.normal(input_shape) y = tf.keras.layers.Add()([x1, x2]) # the shape of y is (2, 3, 4)
# Define an Average layer. x1 = np.ones((2, 2)) x2 = np.zeros((2, 2)) y = tf.keras.layers.Average()([x1, x2]) # [[0.5, 0.5], [0.5, 0.5]]
# Define an AveragePooling1D layer. x = tf.constant([1., 2., 3., 4., 5.]) x = tf.reshape(x, [1, 5, 1]) avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, strides=1, padding='valid') avg_pool_1d(x) # [[[1.5], [2.5], [3.5], [4.5]]]
# Define a Concatenate layer. x = np.arange(20).reshape(2, 2, 5) y = np.arange(20, 30).reshape(2, 1, 5) z = tf.keras.layers.Concatenate(axis=1)([x, y]) # the shape of z is (2, 3, 5)
# Define a Conv1D layer. # The inputs are 128-length vectors with 10 timesteps, and the batch size is 4. input_shape = (4, 10, 128) x = tf.random.normal(input_shape) y = tf.keras.layers.Conv1D(32, 3, activation='relu', input_shape=input_shape[1:])(x) # the shape of y is (4, 8, 32)
# Define a Conv2D layer. input_shape = (4, 28, 28, 3) x = tf.random.normal(input_shape) y = tf.keras.layers.Conv2D(2, 3, activation='relu', input_shape=input_shape[1:])(x) # the shape of y is (4, 26, 26, 2)
# Define a Conv3D layer. input_shape =(4, 28, 28, 28, 1) x = tf.random.normal(input_shape) y = tf.keras.layers.Conv3D(2, 3, activation='relu', input_shape=input_shape[1:])(x) # the shape of y is (4, 26, 26, 26, 2)
# Define a Dense layer with relu as its activation function. tf.keras.layers.Dense(32, activation='relu'))
# Define a Dropout layer with a 20% drop rate. tf.keras.layers.Dropout(.2, input_shape=(2,))
# Define an Embedding layer with an input dimension of 1000 and output dimension of 64. tf.keras.layers.Embedding(1000, 64, input_length=10)
# Define a Flatten layer. Flatten()
# Define a Hashing layer with 32 bins. tf.keras.layers.Hashing(num_bins=32)
# Define a four-units LSTM layer. tf.keras.layers.LSTM(4)
# Define a string look-up layer. vocab = ["a", "b", "c", "d"] data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) layer = tf.keras.layers.StringLookup(vocabulary=vocab) layer(data) # Returns [[1, 3, 4], [4, 0, 2]]

Save & Load

# Define a model. model = tf.keras.Sequential( [tf.keras.layers.Dense(5, input_shape=(3,)), tf.keras.layers.Softmax()] )
# Save the model to /tmp/model. model.save_model('/tmp/model') # Or model.save('/tmp/model')
# Load the model back from /tmp/model loaded_model = tf.keras.models.load_model('/tmp/model')

Example Model Training Pipelines

A Sequential Model Training Pipeline

import tensorflow as tf import tensorflow.keras.datasets.mnist as mnist
# Load training data. (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0
# Construct the model. model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10) ])
# Define the loss function. loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Compile the model. model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
# Train the model. model.fit(x_train, y_train, epochs=5)
# Evaluate the model. model.evaluate(x_test, y_test, verbose=2)

A Custom Model Training Pipeline

import tensorflow as tf from tensorflow.keras.layers import Dense, Flatten, Conv2D from tensorflow.keras import Model
# Get dataset. mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0
# Add a channels dimension x_train = x_train[..., tf.newaxis].astype("float32") x_test = x_test[..., tf.newaxis].astype("float32")
# Process dataset with tf.data. train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32) test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
# Define the model. class MyModel(Model): def __init__(self): super(MyModel, self).__init__() self.conv1 = Conv2D(32, 3, activation='relu') self.flatten = Flatten() self.d1 = Dense(128, activation='relu') self.d2 = Dense(10)
def call(self, x): x = self.conv1(x) x = self.flatten(x) x = self.d1(x) return self.d2(x)
# Create an instance of the model model = MyModel()
# Define loss function. loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Define optimizer. optimizer = tf.keras.optimizers.Adam()
# Define train loss and accuracy. train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
# Define test loss and accuracy. test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
# Define the train step with tf.function. @tf.function def train_step(images, labels): with tf.GradientTape() as tape: # training=True is only needed if there are layers with different # behavior during training versus inference (e.g. Dropout). predictions = model(images, training=True) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss) train_accuracy(labels, predictions)
# Define the test step with tf.function. @tf.function def test_step(images, labels): # training=False is only needed if there are layers with different # behavior during training versus inference (e.g. Dropout). predictions = model(images, training=False) t_loss = loss_object(labels, predictions)
test_loss(t_loss) test_accuracy(labels, predictions)
# Train and evaluate the model. for epoch in range(4): # Reset the metrics at the start of the next epoch train_loss.reset_states() train_accuracy.reset_states() test_loss.reset_states() test_accuracy.reset_states()
for images, labels in train_ds: train_step(images, labels)
for test_images, test_labels in test_ds: test_step(test_images, test_labels)
print( f'Epoch {epoch + 1}, ' f'Loss: {train_loss.result()}, ' f'Accuracy: {train_accuracy.result() * 100}, ' f'Test Loss: {test_loss.result()}, ' f'Test Accuracy: {test_accuracy.result() * 100}' )