TensorFlow Serving C++ API Documentation
main.cc
1 /* Copyright 2016 Google Inc. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7  http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // gRPC server implementation of
17 // tensorflow_serving/apis/prediction_service.proto.
18 //
19 // It bring up a standard server to serve a single TensorFlow model using
20 // command line flags, or multiple models via config file.
21 //
22 // ModelServer prioritizes easy invocation over flexibility,
23 // and thus serves a statically configured set of models. New versions of these
24 // models will be loaded and managed over time using the
25 // AvailabilityPreservingPolicy at:
26 // tensorflow_serving/core/availability_preserving_policy.h.
27 // by AspiredVersionsManager at:
28 // tensorflow_serving/core/aspired_versions_manager.h
29 //
30 // ModelServer has inter-request batching support built-in, by using the
31 // BatchingSession at:
32 // tensorflow_serving/batching/batching_session.h
33 //
34 // To serve a single model, run with:
35 // $path_to_binary/tensorflow_model_server \
36 // --model_base_path=[/tmp/my_model | gs://gcs_address]
37 // IMPORTANT: Be sure the base path excludes the version directory. For
38 // example for a model at /tmp/my_model/123, where 123 is the version, the base
39 // path is /tmp/my_model.
40 //
41 // To specify model name (default "default"): --model_name=my_name
42 // To specify port (default 8500): --port=my_port
43 // To enable batching (default disabled): --enable_batching
44 // To override the default batching parameters: --batching_parameters_file
45 
46 #include <iostream>
47 #include <vector>
48 
49 #include "tensorflow/c/c_api.h"
50 #include "tensorflow/compiler/jit/flags.h"
51 #include "tensorflow/core/lib/core/status.h"
52 #include "tensorflow/core/platform/init_main.h"
53 #include "tensorflow/core/util/command_line_flags.h"
54 #include "tensorflow_serving/model_servers/server.h"
55 #include "tensorflow_serving/model_servers/version.h"
56 
57 #if defined(LIBTPU_ON_GCE) || defined(PLATFORM_CLOUD_TPU)
58 #include "tensorflow/core/protobuf/tpu/topology.pb.h"
59 #include "tensorflow/core/tpu/tpu_global_init.h"
60 
61 void InitializeTPU(tensorflow::serving::main::Server::Options& server_options) {
62  server_options.enforce_session_run_timeout = false;
63  if (server_options.saved_model_tags.empty()) {
64  server_options.saved_model_tags = "tpu,serve";
65  }
66 
67  if (server_options.skip_initialize_tpu) {
68  std::cout << "Skipping model server level Initializing TPU system.";
69  return;
70  }
71  std::cout << "Initializing TPU system.";
72  tensorflow::tpu::TopologyProto tpu_topology;
73  TF_QCHECK_OK(tensorflow::InitializeTPUSystemGlobally(
74  tensorflow::Env::Default(), &tpu_topology))
75  << "Failed to initialize TPU system.";
76  std::cout << "Initialized TPU topology: " << tpu_topology.DebugString();
77  server_options.num_request_iterations_for_warmup =
78  tpu_topology.num_tpu_devices_per_task();
79 }
80 #endif
81 
82 int main(int argc, char** argv) {
84  bool display_version = false;
85  bool xla_cpu_compilation_enabled = false;
86  std::vector<tensorflow::Flag> flag_list = {
87  tensorflow::Flag("port", &options.grpc_port,
88  "TCP port to listen on for gRPC/HTTP API. Disabled if "
89  "port set to zero."),
90  tensorflow::Flag("grpc_socket_path", &options.grpc_socket_path,
91  "If non-empty, listen to a UNIX socket for gRPC API "
92  "on the given path. Can be either relative or absolute "
93  "path."),
94  tensorflow::Flag("rest_api_port", &options.http_port,
95  "Port to listen on for HTTP/REST API. If set to zero "
96  "HTTP/REST API will not be exported. This port must be "
97  "different than the one specified in --port."),
98  tensorflow::Flag("rest_api_num_threads", &options.http_num_threads,
99  "Number of threads for HTTP/REST API processing. If not "
100  "set, will be auto set based on number of CPUs."),
101  tensorflow::Flag("rest_api_timeout_in_ms", &options.http_timeout_in_ms,
102  "Timeout for HTTP/REST API calls."),
103  tensorflow::Flag("rest_api_enable_cors_support",
104  &options.enable_cors_support,
105  "Enable CORS headers in response"),
106  tensorflow::Flag("enable_batching", &options.enable_batching,
107  "enable batching"),
108  tensorflow::Flag(
109  "allow_version_labels_for_unavailable_models",
110  &options.allow_version_labels_for_unavailable_models,
111  "If true, allows assigning unused version labels to models that are "
112  "not available yet."),
113  tensorflow::Flag("batching_parameters_file",
114  &options.batching_parameters_file,
115  "If non-empty, read an ascii BatchingParameters "
116  "protobuf from the supplied file name and use the "
117  "contained values instead of the defaults."),
118  tensorflow::Flag(
119  "enable_per_model_batching_parameters",
120  &options.enable_per_model_batching_params,
121  "Enables model specific batching params like batch "
122  "sizes, timeouts, batching feature flags to be read from "
123  "`batching_params.pbtxt` file present in SavedModel dir "
124  "of the model. Associated params in the global config "
125  "from --batching_parameters_file are *ignored*. Only "
126  "threadpool (name and size) related params are used from "
127  "the global config, as this threadpool is shared across "
128  "all the models that want to batch requests. This option "
129  "is only applicable when --enable_batching flag is set."),
130  tensorflow::Flag("model_config_file", &options.model_config_file,
131  "If non-empty, read an ascii ModelServerConfig "
132  "protobuf from the supplied file name, and serve the "
133  "models in that file. This config file can be used to "
134  "specify multiple models to serve and other advanced "
135  "parameters including non-default version policy. (If "
136  "used, --model_name, --model_base_path are ignored.)"),
137  tensorflow::Flag("model_config_file_poll_wait_seconds",
138  &options.fs_model_config_poll_wait_seconds,
139  "Interval in seconds between each poll of the filesystem"
140  "for model_config_file. If unset or set to zero, "
141  "poll will be done exactly once and not periodically. "
142  "Setting this to negative is reserved for testing "
143  "purposes only."),
144  tensorflow::Flag("model_name", &options.model_name,
145  "name of model (ignored "
146  "if --model_config_file flag is set)"),
147  tensorflow::Flag("model_base_path", &options.model_base_path,
148  "path to export (ignored if --model_config_file flag "
149  "is set, otherwise required)"),
150  tensorflow::Flag("num_load_threads", &options.num_load_threads,
151  "The number of threads in the thread-pool used to load "
152  "servables. If set as 0, we don't use a thread-pool, "
153  "and servable loads are performed serially in the "
154  "manager's main work loop, may casue the Serving "
155  "request to be delayed. Default: 0"),
156  tensorflow::Flag("num_unload_threads", &options.num_unload_threads,
157  "The number of threads in the thread-pool used to "
158  "unload servables. If set as 0, we don't use a "
159  "thread-pool, and servable loads are performed serially "
160  "in the manager's main work loop, may casue the Serving "
161  "request to be delayed. Default: 0"),
162  tensorflow::Flag("max_num_load_retries", &options.max_num_load_retries,
163  "maximum number of times it retries loading a model "
164  "after the first failure, before giving up. "
165  "If set to 0, a load is attempted only once. "
166  "Default: 5"),
167  tensorflow::Flag("load_retry_interval_micros",
168  &options.load_retry_interval_micros,
169  "The interval, in microseconds, between each servable "
170  "load retry. If set negative, it doesn't wait. "
171  "Default: 1 minute"),
172  tensorflow::Flag("file_system_poll_wait_seconds",
173  &options.file_system_poll_wait_seconds,
174  "Interval in seconds between each poll of the "
175  "filesystem for new model version. If set to zero "
176  "poll will be exactly done once and not periodically. "
177  "Setting this to negative value will disable polling "
178  "entirely causing ModelServer to indefinitely wait for "
179  "a new model at startup. Negative values are reserved "
180  "for testing purposes only."),
181  tensorflow::Flag("flush_filesystem_caches",
182  &options.flush_filesystem_caches,
183  "If true (the default), filesystem caches will be "
184  "flushed after the initial load of all servables, and "
185  "after each subsequent individual servable reload (if "
186  "the number of load threads is 1). This reduces memory "
187  "consumption of the model server, at the potential cost "
188  "of cache misses if model files are accessed after "
189  "servables are loaded."),
190  tensorflow::Flag("tensorflow_session_parallelism",
191  &options.tensorflow_session_parallelism,
192  "Number of threads to use for running a "
193  "Tensorflow session. Auto-configured by default."
194  "Note that this option is ignored if "
195  "--platform_config_file is non-empty."),
196  tensorflow::Flag(
197  "tensorflow_session_config_file",
198  &options.tensorflow_session_config_file,
199  "If non-empty, read an ascii TensorFlow Session "
200  "ConfigProto protobuf from the supplied file name. Note, "
201  "parts of the session config (threads, parallelism etc.) "
202  "can be overridden if needed, via corresponding command "
203  "line flags."),
204  tensorflow::Flag("tensorflow_intra_op_parallelism",
205  &options.tensorflow_intra_op_parallelism,
206  "Number of threads to use to parallelize the execution"
207  "of an individual op. Auto-configured by default."
208  "Note that this option is ignored if "
209  "--platform_config_file is non-empty."),
210  tensorflow::Flag("tensorflow_inter_op_parallelism",
211  &options.tensorflow_inter_op_parallelism,
212  "Controls the number of operators that can be executed "
213  "simultaneously. Auto-configured by default."
214  "Note that this option is ignored if "
215  "--platform_config_file is non-empty."),
216  tensorflow::Flag("use_alts_credentials", &options.use_alts_credentials,
217  "Use Google ALTS credentials"),
218  tensorflow::Flag(
219  "ssl_config_file", &options.ssl_config_file,
220  "If non-empty, read an ascii SSLConfig protobuf from "
221  "the supplied file name and set up a secure gRPC channel"),
222  tensorflow::Flag("platform_config_file", &options.platform_config_file,
223  "If non-empty, read an ascii PlatformConfigMap protobuf "
224  "from the supplied file name, and use that platform "
225  "config instead of the Tensorflow platform. (If used, "
226  "--enable_batching is ignored.)"),
227  tensorflow::Flag(
228  "per_process_gpu_memory_fraction",
229  &options.per_process_gpu_memory_fraction,
230  "Fraction that each process occupies of the GPU memory space "
231  "the value is between 0.0 and 1.0 (with 0.0 as the default) "
232  "If 1.0, the server will allocate all the memory when the server "
233  "starts, If 0.0, Tensorflow will automatically select a value."),
234  tensorflow::Flag("saved_model_tags", &options.saved_model_tags,
235  "Comma-separated set of tags corresponding to the meta "
236  "graph def to load from SavedModel."),
237  tensorflow::Flag("grpc_channel_arguments",
238  &options.grpc_channel_arguments,
239  "A comma separated list of arguments to be passed to "
240  "the grpc server. (e.g. "
241  "grpc.max_connection_age_ms=2000)"),
242  tensorflow::Flag("grpc_max_threads", &options.grpc_max_threads,
243  "Max grpc server threads to handle grpc messages."),
244  tensorflow::Flag("enable_model_warmup", &options.enable_model_warmup,
245  "Enables model warmup, which triggers lazy "
246  "initializations (such as TF optimizations) at load "
247  "time, to reduce first request latency."),
248  tensorflow::Flag("num_request_iterations_for_warmup",
249  &options.num_request_iterations_for_warmup,
250  "Number of times a request is iterated during warmup "
251  "replay. This value is used only if > 0."),
252  tensorflow::Flag("version", &display_version, "Display version"),
253  tensorflow::Flag(
254  "monitoring_config_file", &options.monitoring_config_file,
255  "If non-empty, read an ascii MonitoringConfig protobuf from "
256  "the supplied file name"),
257  tensorflow::Flag(
258  "remove_unused_fields_from_bundle_metagraph",
259  &options.remove_unused_fields_from_bundle_metagraph,
260  "Removes unused fields from MetaGraphDef proto message to save "
261  "memory."),
262  tensorflow::Flag("prefer_tflite_model", &options.prefer_tflite_model,
263  "EXPERIMENTAL; CAN BE REMOVED ANYTIME! "
264  "Prefer TensorFlow Lite model from `model.tflite` file "
265  "in SavedModel directory, instead of the TensorFlow "
266  "model from `saved_model.pb` file. "
267  "If no TensorFlow Lite model found, fallback to "
268  "TensorFlow model."),
269  tensorflow::Flag(
270  "num_tflite_pools", &options.num_tflite_pools,
271  "EXPERIMENTAL; CAN BE REMOVED ANYTIME! Number of TFLite interpreters "
272  "in an interpreter pool of TfLiteSession. Typically there is one "
273  "TfLiteSession for each TF Lite model that is loaded. If not "
274  "set, will be auto set based on number of CPUs."),
275  tensorflow::Flag(
276  "num_tflite_interpreters_per_pool",
277  &options.num_tflite_interpreters_per_pool,
278  "EXPERIMENTAL; CAN BE REMOVED ANYTIME! Number of TFLite interpreters "
279  "in an interpreter pool of TfLiteSession. Typically there is one "
280  "TfLiteSession for each TF Lite model that is loaded. If not "
281  "set, will be 1."),
282  tensorflow::Flag(
283  "enable_signature_method_name_check",
284  &options.enable_signature_method_name_check,
285  "Enable method_name check for SignatureDef. Disable this if serving "
286  "native TF2 regression/classification models."),
287  tensorflow::Flag(
288  "xla_cpu_compilation_enabled", &xla_cpu_compilation_enabled,
289  "EXPERIMENTAL; CAN BE REMOVED ANYTIME! "
290  "Enable XLA:CPU JIT (default is disabled). With XLA:CPU JIT "
291  "disabled, models utilizing this feature will return bad Status "
292  "on first compilation request."),
293  tensorflow::Flag("enable_profiler", &options.enable_profiler,
294  "Enable profiler service."),
295  tensorflow::Flag("thread_pool_factory_config_file",
296  &options.thread_pool_factory_config_file,
297  "If non-empty, read an ascii ThreadPoolConfig protobuf "
298  "from the supplied file name."),
299  tensorflow::Flag("mixed_precision", &options.mixed_precision,
300  "specify mixed_precision mode"),
301  tensorflow::Flag("skip_initialize_tpu", &options.skip_initialize_tpu,
302  "Whether to skip auto initializing TPU."),
303  tensorflow::Flag("enable_grpc_healthcheck_service",
304  &options.enable_grpc_healthcheck_service,
305  "Enable the standard gRPC healthcheck service.")};
306 
307  const auto& usage = tensorflow::Flags::Usage(argv[0], flag_list);
308  if (!tensorflow::Flags::Parse(&argc, argv, flag_list)) {
309  std::cout << usage;
310  return -1;
311  }
312 
313  tensorflow::port::InitMain(argv[0], &argc, &argv);
314 #if defined(LIBTPU_ON_GCE) || defined(PLATFORM_CLOUD_TPU)
315  InitializeTPU(options);
316 #endif
317 
318  if (display_version) {
319  std::cout << "TensorFlow ModelServer: " << TF_Serving_Version() << "\n"
320  << "TensorFlow Library: " << TF_Version() << "\n";
321  return 0;
322  }
323 
324  if (argc != 1) {
325  std::cout << "unknown argument: " << argv[1] << "\n" << usage;
326  }
327 
328  if (!xla_cpu_compilation_enabled) {
329  tensorflow::DisableXlaCompilation();
330  }
331 
333  const auto& status = server.BuildAndStart(options);
334  if (!status.ok()) {
335  std::cout << "Failed to start server. Error: " << status << "\n";
336  return -1;
337  }
338  server.WaitForTermination();
339  return 0;
340 }