TensorFlow Serving C++ API Documentation
tensorflow_serving
batching
batching_options.h
1
/* Copyright 2020 Google Inc. All Rights Reserved.
2
3
Licensed under the Apache License, Version 2.0 (the "License");
4
you may not use this file except in compliance with the License.
5
You may obtain a copy of the License at
6
7
http://www.apache.org/licenses/LICENSE-2.0
8
9
Unless required by applicable law or agreed to in writing, software
10
distributed under the License is distributed on an "AS IS" BASIS,
11
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
See the License for the specific language governing permissions and
13
limitations under the License.
14
==============================================================================*/
15
16
#ifndef TENSORFLOW_SERVING_BATCHING_BATCHING_OPTIONS_H_
17
#define TENSORFLOW_SERVING_BATCHING_BATCHING_OPTIONS_H_
18
19
#include <vector>
20
21
namespace
tensorflow {
22
namespace
serving {
23
24
// Batching options.
25
struct
BatchingOptions
{
26
// If set, restricts the allowed tensor batch sizes.
27
//
28
// When the batch scheduler forms a batch of size N, the batch size is rounded
29
// up to the smallest value M in 'allowed_batch_sizes' s.t. M >= N. The
30
// tensors submitted to the "Run()" call are padded with M-N repeats of one of
31
// the first N entries (i.e. a guaranteed valid entry). The last M-N entries
32
// of the output tensors are ignored.
33
//
34
// This option is useful when the underlying platform has some per-batch-size
35
// overhead, to limit the number of distinct batch sizes that can occur. It
36
// may be sensible to use an exponential sequence e.g. [8, 16, 32, ...,
37
// max_batch_size], a linear one e.g. [100, 200, 300, ..., max_batch_size], or
38
// perhaps a hybrid e.g. [8, 16, 32, 64, 100, 200, 300, ..., max_batch_size].
39
//
40
// IMPORTANT: The entries must be in increasing order.
41
//
42
// IMPORTANT: The final entry in 'allowed_batch_sizes' must equal the maximum
43
// batch size parameter supplied to the batch scheduler.
44
//
45
// If left empty, no rounding/padding is performed.
46
std::vector<int> allowed_batch_sizes;
47
48
// If set to true, padding is performed for tensors of the same name
49
// but with unequal dimensions (modulo zeroth dimension), so that
50
// all tensors of the same name have equal dim sizes.
51
// For each tensor its first element is used as padding value.
52
//
53
// For example:
54
// given input tensors of shapes [1, 500, 101], [2, 300, 101], [1, 400, 101]
55
// they will be padded to shapes [1, 500, 101], [2, 500, 101], [1, 500, 101].
56
// Padding is not performed in zeroth dimension.
57
//
58
// Supported tensor datatypes:
59
// DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16,
60
// DT_UINT16, DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128,
61
// DT_STRING, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT16,
62
// DT_QUINT16, DT_QINT32, DT_HALF, DT_RESOURCE.
63
//
64
// Supported ranks: from 1 to 6.
65
//
66
// This option is useful when using recurrent models(like LSTMs) with serving.
67
// These models typically accept variable-length inputs and when
68
// training them typical strategy is to save sequence lengths for decoding
69
// and pad those variable-length dims to maximum in batch.
70
// So, this option is used to achieve similar behavior
71
// when serving with batching, it is assumed that sequence lengths
72
// have already been saved.
73
//
74
// If tensors with the same name have different shapes
75
// (modulo zeroth dimension) and this option is set to false,
76
// then error Status will be returned.
77
bool
pad_variable_length_inputs =
false
;
78
};
79
80
}
// namespace serving
81
}
// namespace tensorflow
82
83
#endif
// TENSORFLOW_SERVING_BATCHING_BATCHING_OPTIONS_H_
tensorflow::serving::BatchingOptions
Definition:
batching_options.h:25
Generated by
1.9.1