49 #include "tensorflow/c/c_api.h"
50 #include "tensorflow/compiler/jit/flags.h"
51 #include "tensorflow/core/lib/core/status.h"
52 #include "tensorflow/core/platform/init_main.h"
53 #include "tensorflow/core/util/command_line_flags.h"
54 #include "tensorflow_serving/model_servers/server.h"
55 #include "tensorflow_serving/model_servers/version.h"
57 #if defined(LIBTPU_ON_GCE) || defined(PLATFORM_CLOUD_TPU)
58 #include "tensorflow/core/protobuf/tpu/topology.pb.h"
59 #include "tensorflow/core/tpu/tpu_global_init.h"
62 server_options.enforce_session_run_timeout =
false;
63 if (server_options.saved_model_tags.empty()) {
64 server_options.saved_model_tags =
"tpu,serve";
67 if (server_options.skip_initialize_tpu) {
68 std::cout <<
"Skipping model server level Initializing TPU system.";
71 std::cout <<
"Initializing TPU system.";
72 tensorflow::tpu::TopologyProto tpu_topology;
73 TF_QCHECK_OK(tensorflow::InitializeTPUSystemGlobally(
74 tensorflow::Env::Default(), &tpu_topology))
75 <<
"Failed to initialize TPU system.";
76 std::cout <<
"Initialized TPU topology: " << tpu_topology.DebugString();
77 server_options.num_request_iterations_for_warmup =
78 tpu_topology.num_tpu_devices_per_task();
82 int main(
int argc,
char** argv) {
84 bool display_version =
false;
85 bool xla_cpu_compilation_enabled =
false;
86 std::vector<tensorflow::Flag> flag_list = {
87 tensorflow::Flag(
"port", &options.grpc_port,
88 "TCP port to listen on for gRPC/HTTP API. Disabled if "
90 tensorflow::Flag(
"grpc_socket_path", &options.grpc_socket_path,
91 "If non-empty, listen to a UNIX socket for gRPC API "
92 "on the given path. Can be either relative or absolute "
94 tensorflow::Flag(
"rest_api_port", &options.http_port,
95 "Port to listen on for HTTP/REST API. If set to zero "
96 "HTTP/REST API will not be exported. This port must be "
97 "different than the one specified in --port."),
98 tensorflow::Flag(
"rest_api_num_threads", &options.http_num_threads,
99 "Number of threads for HTTP/REST API processing. If not "
100 "set, will be auto set based on number of CPUs."),
101 tensorflow::Flag(
"rest_api_timeout_in_ms", &options.http_timeout_in_ms,
102 "Timeout for HTTP/REST API calls."),
103 tensorflow::Flag(
"rest_api_enable_cors_support",
104 &options.enable_cors_support,
105 "Enable CORS headers in response"),
106 tensorflow::Flag(
"enable_batching", &options.enable_batching,
109 "allow_version_labels_for_unavailable_models",
110 &options.allow_version_labels_for_unavailable_models,
111 "If true, allows assigning unused version labels to models that are "
112 "not available yet."),
113 tensorflow::Flag(
"batching_parameters_file",
114 &options.batching_parameters_file,
115 "If non-empty, read an ascii BatchingParameters "
116 "protobuf from the supplied file name and use the "
117 "contained values instead of the defaults."),
119 "enable_per_model_batching_parameters",
120 &options.enable_per_model_batching_params,
121 "Enables model specific batching params like batch "
122 "sizes, timeouts, batching feature flags to be read from "
123 "`batching_params.pbtxt` file present in SavedModel dir "
124 "of the model. Associated params in the global config "
125 "from --batching_parameters_file are *ignored*. Only "
126 "threadpool (name and size) related params are used from "
127 "the global config, as this threadpool is shared across "
128 "all the models that want to batch requests. This option "
129 "is only applicable when --enable_batching flag is set."),
130 tensorflow::Flag(
"model_config_file", &options.model_config_file,
131 "If non-empty, read an ascii ModelServerConfig "
132 "protobuf from the supplied file name, and serve the "
133 "models in that file. This config file can be used to "
134 "specify multiple models to serve and other advanced "
135 "parameters including non-default version policy. (If "
136 "used, --model_name, --model_base_path are ignored.)"),
137 tensorflow::Flag(
"model_config_file_poll_wait_seconds",
138 &options.fs_model_config_poll_wait_seconds,
139 "Interval in seconds between each poll of the filesystem"
140 "for model_config_file. If unset or set to zero, "
141 "poll will be done exactly once and not periodically. "
142 "Setting this to negative is reserved for testing "
144 tensorflow::Flag(
"model_name", &options.model_name,
145 "name of model (ignored "
146 "if --model_config_file flag is set)"),
147 tensorflow::Flag(
"model_base_path", &options.model_base_path,
148 "path to export (ignored if --model_config_file flag "
149 "is set, otherwise required)"),
150 tensorflow::Flag(
"num_load_threads", &options.num_load_threads,
151 "The number of threads in the thread-pool used to load "
152 "servables. If set as 0, we don't use a thread-pool, "
153 "and servable loads are performed serially in the "
154 "manager's main work loop, may casue the Serving "
155 "request to be delayed. Default: 0"),
156 tensorflow::Flag(
"num_unload_threads", &options.num_unload_threads,
157 "The number of threads in the thread-pool used to "
158 "unload servables. If set as 0, we don't use a "
159 "thread-pool, and servable loads are performed serially "
160 "in the manager's main work loop, may casue the Serving "
161 "request to be delayed. Default: 0"),
162 tensorflow::Flag(
"max_num_load_retries", &options.max_num_load_retries,
163 "maximum number of times it retries loading a model "
164 "after the first failure, before giving up. "
165 "If set to 0, a load is attempted only once. "
167 tensorflow::Flag(
"load_retry_interval_micros",
168 &options.load_retry_interval_micros,
169 "The interval, in microseconds, between each servable "
170 "load retry. If set negative, it doesn't wait. "
171 "Default: 1 minute"),
172 tensorflow::Flag(
"file_system_poll_wait_seconds",
173 &options.file_system_poll_wait_seconds,
174 "Interval in seconds between each poll of the "
175 "filesystem for new model version. If set to zero "
176 "poll will be exactly done once and not periodically. "
177 "Setting this to negative value will disable polling "
178 "entirely causing ModelServer to indefinitely wait for "
179 "a new model at startup. Negative values are reserved "
180 "for testing purposes only."),
181 tensorflow::Flag(
"flush_filesystem_caches",
182 &options.flush_filesystem_caches,
183 "If true (the default), filesystem caches will be "
184 "flushed after the initial load of all servables, and "
185 "after each subsequent individual servable reload (if "
186 "the number of load threads is 1). This reduces memory "
187 "consumption of the model server, at the potential cost "
188 "of cache misses if model files are accessed after "
189 "servables are loaded."),
190 tensorflow::Flag(
"tensorflow_session_parallelism",
191 &options.tensorflow_session_parallelism,
192 "Number of threads to use for running a "
193 "Tensorflow session. Auto-configured by default."
194 "Note that this option is ignored if "
195 "--platform_config_file is non-empty."),
197 "tensorflow_session_config_file",
198 &options.tensorflow_session_config_file,
199 "If non-empty, read an ascii TensorFlow Session "
200 "ConfigProto protobuf from the supplied file name. Note, "
201 "parts of the session config (threads, parallelism etc.) "
202 "can be overridden if needed, via corresponding command "
204 tensorflow::Flag(
"tensorflow_intra_op_parallelism",
205 &options.tensorflow_intra_op_parallelism,
206 "Number of threads to use to parallelize the execution"
207 "of an individual op. Auto-configured by default."
208 "Note that this option is ignored if "
209 "--platform_config_file is non-empty."),
210 tensorflow::Flag(
"tensorflow_inter_op_parallelism",
211 &options.tensorflow_inter_op_parallelism,
212 "Controls the number of operators that can be executed "
213 "simultaneously. Auto-configured by default."
214 "Note that this option is ignored if "
215 "--platform_config_file is non-empty."),
216 tensorflow::Flag(
"use_alts_credentials", &options.use_alts_credentials,
217 "Use Google ALTS credentials"),
219 "ssl_config_file", &options.ssl_config_file,
220 "If non-empty, read an ascii SSLConfig protobuf from "
221 "the supplied file name and set up a secure gRPC channel"),
222 tensorflow::Flag(
"platform_config_file", &options.platform_config_file,
223 "If non-empty, read an ascii PlatformConfigMap protobuf "
224 "from the supplied file name, and use that platform "
225 "config instead of the Tensorflow platform. (If used, "
226 "--enable_batching is ignored.)"),
228 "per_process_gpu_memory_fraction",
229 &options.per_process_gpu_memory_fraction,
230 "Fraction that each process occupies of the GPU memory space "
231 "the value is between 0.0 and 1.0 (with 0.0 as the default) "
232 "If 1.0, the server will allocate all the memory when the server "
233 "starts, If 0.0, Tensorflow will automatically select a value."),
234 tensorflow::Flag(
"saved_model_tags", &options.saved_model_tags,
235 "Comma-separated set of tags corresponding to the meta "
236 "graph def to load from SavedModel."),
237 tensorflow::Flag(
"grpc_channel_arguments",
238 &options.grpc_channel_arguments,
239 "A comma separated list of arguments to be passed to "
240 "the grpc server. (e.g. "
241 "grpc.max_connection_age_ms=2000)"),
242 tensorflow::Flag(
"grpc_max_threads", &options.grpc_max_threads,
243 "Max grpc server threads to handle grpc messages."),
244 tensorflow::Flag(
"enable_model_warmup", &options.enable_model_warmup,
245 "Enables model warmup, which triggers lazy "
246 "initializations (such as TF optimizations) at load "
247 "time, to reduce first request latency."),
248 tensorflow::Flag(
"num_request_iterations_for_warmup",
249 &options.num_request_iterations_for_warmup,
250 "Number of times a request is iterated during warmup "
251 "replay. This value is used only if > 0."),
252 tensorflow::Flag(
"version", &display_version,
"Display version"),
254 "monitoring_config_file", &options.monitoring_config_file,
255 "If non-empty, read an ascii MonitoringConfig protobuf from "
256 "the supplied file name"),
258 "remove_unused_fields_from_bundle_metagraph",
259 &options.remove_unused_fields_from_bundle_metagraph,
260 "Removes unused fields from MetaGraphDef proto message to save "
262 tensorflow::Flag(
"prefer_tflite_model", &options.prefer_tflite_model,
263 "EXPERIMENTAL; CAN BE REMOVED ANYTIME! "
264 "Prefer TensorFlow Lite model from `model.tflite` file "
265 "in SavedModel directory, instead of the TensorFlow "
266 "model from `saved_model.pb` file. "
267 "If no TensorFlow Lite model found, fallback to "
268 "TensorFlow model."),
270 "num_tflite_pools", &options.num_tflite_pools,
271 "EXPERIMENTAL; CAN BE REMOVED ANYTIME! Number of TFLite interpreters "
272 "in an interpreter pool of TfLiteSession. Typically there is one "
273 "TfLiteSession for each TF Lite model that is loaded. If not "
274 "set, will be auto set based on number of CPUs."),
276 "num_tflite_interpreters_per_pool",
277 &options.num_tflite_interpreters_per_pool,
278 "EXPERIMENTAL; CAN BE REMOVED ANYTIME! Number of TFLite interpreters "
279 "in an interpreter pool of TfLiteSession. Typically there is one "
280 "TfLiteSession for each TF Lite model that is loaded. If not "
283 "enable_signature_method_name_check",
284 &options.enable_signature_method_name_check,
285 "Enable method_name check for SignatureDef. Disable this if serving "
286 "native TF2 regression/classification models."),
288 "xla_cpu_compilation_enabled", &xla_cpu_compilation_enabled,
289 "EXPERIMENTAL; CAN BE REMOVED ANYTIME! "
290 "Enable XLA:CPU JIT (default is disabled). With XLA:CPU JIT "
291 "disabled, models utilizing this feature will return bad Status "
292 "on first compilation request."),
293 tensorflow::Flag(
"enable_profiler", &options.enable_profiler,
294 "Enable profiler service."),
295 tensorflow::Flag(
"thread_pool_factory_config_file",
296 &options.thread_pool_factory_config_file,
297 "If non-empty, read an ascii ThreadPoolConfig protobuf "
298 "from the supplied file name."),
299 tensorflow::Flag(
"mixed_precision", &options.mixed_precision,
300 "specify mixed_precision mode"),
301 tensorflow::Flag(
"skip_initialize_tpu", &options.skip_initialize_tpu,
302 "Whether to skip auto initializing TPU."),
303 tensorflow::Flag(
"enable_grpc_healthcheck_service",
304 &options.enable_grpc_healthcheck_service,
305 "Enable the standard gRPC healthcheck service.")};
307 const auto& usage = tensorflow::Flags::Usage(argv[0], flag_list);
308 if (!tensorflow::Flags::Parse(&argc, argv, flag_list)) {
313 tensorflow::port::InitMain(argv[0], &argc, &argv);
314 #if defined(LIBTPU_ON_GCE) || defined(PLATFORM_CLOUD_TPU)
315 InitializeTPU(options);
318 if (display_version) {
319 std::cout <<
"TensorFlow ModelServer: " << TF_Serving_Version() <<
"\n"
320 <<
"TensorFlow Library: " << TF_Version() <<
"\n";
325 std::cout <<
"unknown argument: " << argv[1] <<
"\n" << usage;
328 if (!xla_cpu_compilation_enabled) {
329 tensorflow::DisableXlaCompilation();
333 const auto& status = server.BuildAndStart(options);
335 std::cout <<
"Failed to start server. Error: " << status <<
"\n";
338 server.WaitForTermination();