TensorFlow Serving C++ API Documentation
server.h
1 /* Copyright 2018 Google Inc. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7  http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_SERVING_MODEL_SERVERS_SERVER_H_
17 #define TENSORFLOW_SERVING_MODEL_SERVERS_SERVER_H_
18 
19 #include <memory>
20 
21 #include "grpcpp/server.h"
22 #include "tensorflow/core/kernels/batching_util/periodic_function.h"
23 #include "tensorflow/core/lib/core/status.h"
24 #include "tensorflow/core/platform/cpu_info.h"
25 #include "tensorflow/core/platform/types.h"
26 #include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
27 #include "tensorflow_serving/model_servers/http_server.h"
28 #include "tensorflow_serving/model_servers/model_service_impl.h"
29 #include "tensorflow_serving/model_servers/prediction_service_impl.h"
30 #include "tensorflow_serving/model_servers/server_core.h"
31 #include "tensorflow_serving/servables/tensorflow/thread_pool_factory.h"
32 
33 namespace tensorflow {
34 namespace serving {
35 namespace main {
36 
37 class Server {
38  public:
39  struct Options {
40  //
41  // gRPC Server options
42  //
43  tensorflow::int32 grpc_port = 8500;
44  tensorflow::string grpc_channel_arguments;
45  tensorflow::string grpc_socket_path;
46  tensorflow::int32 grpc_max_threads = 4.0 * port::NumSchedulableCPUs();
47 
48  //
49  // HTTP Server options.
50  //
51  tensorflow::int32 http_port = 0;
52  tensorflow::int32 http_num_threads = 4.0 * port::NumSchedulableCPUs();
53  tensorflow::int32 http_timeout_in_ms = 30000; // 30 seconds.
54  bool enable_cors_support = false;
55 
56  //
57  // Model Server options.
58  //
59  bool enable_batching = false;
60  bool enable_per_model_batching_params = false;
61  bool allow_version_labels_for_unavailable_models = false;
62  bool force_allow_any_version_labels_for_unavailable_models = false;
63  float per_process_gpu_memory_fraction = 0;
64  tensorflow::string batching_parameters_file;
65  tensorflow::string model_name;
66  tensorflow::int32 num_load_threads = 0;
67  tensorflow::int32 num_unload_threads = 0;
68  tensorflow::int32 max_num_load_retries = 5;
69  int64_t load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
70  tensorflow::int32 file_system_poll_wait_seconds = 1;
71  bool flush_filesystem_caches = true;
72  tensorflow::string model_base_path;
73  tensorflow::string saved_model_tags;
74  // Tensorflow session parallelism of zero means that both inter and intra op
75  // thread pools will be auto configured.
76  int64_t tensorflow_session_parallelism = 0;
77 
78  // Zero means that the thread pools will be auto configured.
79  int64_t tensorflow_intra_op_parallelism = 0;
80  int64_t tensorflow_inter_op_parallelism = 0;
81  tensorflow::string platform_config_file;
82  // Only one of ALTS or SSl can be specified. I.e. either
83  // use_alts_credentials must be false or ssl_config_file must be empty.
84  bool use_alts_credentials = false;
85  tensorflow::string ssl_config_file;
86  string model_config_file;
87  // Text proto file for TensorFlow Session ConfigProto.
88  string tensorflow_session_config_file;
89  // Zero means server will not poll FS for model config file after start-up.
90  tensorflow::int32 fs_model_config_poll_wait_seconds = 0;
91  bool enable_model_warmup = true;
92  // This value is used only if > 0.
93  tensorflow::int32 num_request_iterations_for_warmup = 0;
94  tensorflow::string monitoring_config_file;
95  // Tensorflow session run options.
96  bool enforce_session_run_timeout = true;
97  bool remove_unused_fields_from_bundle_metagraph = true;
98  bool prefer_tflite_model = false;
99  tensorflow::int32 num_tflite_pools = port::NumSchedulableCPUs();
100  tensorflow::int32 num_tflite_interpreters_per_pool = 1;
101  tensorflow::string thread_pool_factory_config_file;
102  bool enable_signature_method_name_check = false;
103  bool enable_profiler = true;
104  tensorflow::string mixed_precision;
105  bool skip_initialize_tpu = false;
106  // Misc GRPC features
107  bool enable_grpc_healthcheck_service = false;
108  Options();
109  };
110 
111  // Blocks the current thread waiting for servers (if any)
112  // started as part of BuildAndStart() call.
113  ~Server();
114 
115  // Build and start gRPC (and optionally HTTP) server, to be ready to
116  // accept and process new requests over gRPC (and optionally HTTP/REST).
117  Status BuildAndStart(const Options& server_options);
118 
119  // Wait for servers started in BuildAndStart() above to terminate.
120  // This will block the current thread until termination is successful.
121  void WaitForTermination();
122 
123  private:
124  // Polls the filesystem, parses config at specified path, and calls
125  // ServerCore::ReloadConfig with the captured model config.
126  void PollFilesystemAndReloadConfig(const string& config_file_path);
127 
128  std::unique_ptr<ServerCore> server_core_;
129  std::unique_ptr<ModelServiceImpl> model_service_;
130  std::unique_ptr<PredictionService::Service> prediction_service_;
131  std::unique_ptr<tensorflow::grpc::ProfilerService::Service> profiler_service_;
132  std::unique_ptr<::grpc::Server> grpc_server_;
133  std::unique_ptr<net_http::HTTPServerInterface> http_server_;
134  // A thread that calls PollFilesystemAndReloadConfig() periodically if
135  // fs_model_config_poll_wait_seconds > 0.
136  std::unique_ptr<PeriodicFunction> fs_config_polling_thread_;
137  std::unique_ptr<ThreadPoolFactory> thread_pool_factory_;
138 };
139 
140 } // namespace main
141 } // namespace serving
142 } // namespace tensorflow
143 
144 #endif // TENSORFLOW_SERVING_MODEL_SERVERS_SERVER_H_