api/cpp/server__core_8h_source.html

 /* Copyright 2016 Google Inc. All Rights Reserved.


 Licensed under the Apache License, Version 2.0 (the "License");

 you may not use this file except in compliance with the License.

 You may obtain a copy of the License at


     http://www.apache.org/licenses/LICENSE-2.0


 Unless required by applicable law or agreed to in writing, software

 distributed under the License is distributed on an "AS IS" BASIS,

 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 See the License for the specific language governing permissions and

 limitations under the License.

 ==============================================================================*/


 #ifndef TENSORFLOW_SERVING_MODEL_SERVERS_SERVER_CORE_H_

 #define TENSORFLOW_SERVING_MODEL_SERVERS_SERVER_CORE_H_


 #include <functional>

 #include <limits>

 #include <map>

 #include <memory>

 #include <string>

 #include <utility>


 #include "google/protobuf/any.pb.h"

 #include "absl/base/macros.h"

 #include "absl/status/status.h"

 #include "absl/time/time.h"

 #include "absl/types/optional.h"

 #include "tensorflow/core/lib/core/status.h"

 #include "tensorflow/core/platform/cpu_info.h"

 #include "tensorflow/core/platform/macros.h"

 #include "tensorflow/core/platform/mutex.h"

 #include "tensorflow/core/platform/types.h"

 #include "tensorflow_serving/apis/model.pb.h"

 #include "tensorflow_serving/config/logging_config.pb.h"

 #include "tensorflow_serving/config/model_server_config.pb.h"

 #include "tensorflow_serving/config/platform_config.pb.h"

 #include "tensorflow_serving/core/aspired_versions_manager.h"

 #include "tensorflow_serving/core/dynamic_source_router.h"

 #include "tensorflow_serving/core/prefix_storage_path_source_adapter.h"

 #include "tensorflow_serving/core/servable_state_monitor.h"

 #include "tensorflow_serving/core/server_request_logger.h"

 #include "tensorflow_serving/core/source.h"

 #include "tensorflow_serving/core/source_adapter.h"

 #include "tensorflow_serving/core/storage_path.h"

 #include "tensorflow_serving/core/stream_logger.h"

 #include "tensorflow_serving/servables/tensorflow/predict_util.h"

 #include "tensorflow_serving/servables/tensorflow/servable.h"

 #include "tensorflow_serving/sources/storage_path/file_system_storage_path_source.h"

 #include "tensorflow_serving/util/event_bus.h"

 #include "tensorflow_serving/util/unique_ptr_with_deps.h"


 namespace tensorflow {

 namespace serving {


 namespace test_util {

 class ServerCoreTestAccess;

 }  // namespace test_util


 class ServerCore : public Manager {

  public:

   using PreLoadHook = AspiredVersionsManager::PreLoadHook;


   using ServableStateMonitorCreator =

       std::function<Status(EventBus<ServableState>* event_bus,

                            std::unique_ptr<ServableStateMonitor>* monitor)>;


   using CustomModelConfigLoader = std::function<Status(

       const ::google::protobuf::Any& any, EventBus<ServableState>* event_bus,

       UniquePtrWithDeps<AspiredVersionsManager>* manager)>;


   using ServerRequestLoggerUpdater =

       std::function<Status(const ModelServerConfig&, ServerRequestLogger*)>;


   struct Options {

     // ModelServer configuration.

     ModelServerConfig model_server_config;

     // Relative (non-absolute) base-paths in model_server_config will

     // be prepended with model_config_list_root_dir.

     absl::optional<string> model_config_list_root_dir;


     // The AspiredVersionPolicy to use for the manager. Must be non-null.

     std::unique_ptr<AspiredVersionPolicy> aspired_version_policy;


     // See AspiredVersionsManager::Options::custom_sort_actions

     AspiredVersionsManager::CustomSortActionsFn custom_sort_actions;


     // The number of threads used to load models. If set to 0, then no thread

     // pool is used and loads are performed serially in the manager thread.

     int32 num_load_threads = 0;


     // The number of load threads used to load the initial set of models at

     // server startup. This is set high to load up the initial set of models

     // fast, after this the server uses num_load_threads.

     int32 num_initial_load_threads = 4.0 * port::NumSchedulableCPUs();


     // The number of threads used to unload models. If set to 0, then no thread

     // pool is used and unloads are performed serially in the manager thread.

     int32 num_unload_threads = 0;


     // Total model size limit, in terms of main memory, in bytes.

     uint64_t total_model_memory_limit_bytes =

         std::numeric_limits<uint64_t>::max();


     // Maximum number of times we retry loading a model, after the first

     // failure, before we give up.

     //

     // If set to 0, a load is attempted only once.

     int32 max_num_load_retries = 5;


     // The interval, in microseconds, between each servable load retry. If set

     // negative, we don't wait.

     // Default: 1 minute.

     int64_t load_retry_interval_micros = 1LL * 60 * 1000 * 1000;


     // Time interval between file-system polls, in seconds.

     int32 file_system_poll_wait_seconds = 30;


     // If true, filesystem caches are flushed in the following cases:

     //

     // 1) After the initial models are loaded.

     // 2) After a new config is supplied and a changed set of models are loaded.

     // 3) After each new model version is loaded, if num_load_threads == 1.

     //

     // In the common scenario where the number of load threads is set to 1 after

     // the initial load, this will take care of flushing the cache once after

     // the initial load, and after every subsequent load of every model version.

     bool flush_filesystem_caches = false;


     // Configuration for the supported platforms.

     PlatformConfigMap platform_config_map;


     // A function for creating ServableStateMonitor. If not specified, a default

     // creator that creates ServableStateMonitor will be used.

     ServableStateMonitorCreator servable_state_monitor_creator;


     // A function for instantiating and connecting custom sources and source

     // adapters to the manager.

     CustomModelConfigLoader custom_model_config_loader;


     // Whether to permit incoming ModelSpec requests to use the 'version_label'

     // field.

     bool allow_version_labels = true;


     // If set to true, the server will fail to start up (or fail a config

     // reload) if, for any configured model, no versions of the model are found

     // in the filesystem under the model's base path.

     ABSL_DEPRECATED("Use servable_versions_always_present.")

     bool fail_if_no_model_versions_found = false;


     // For servables which end with LoaderHarness::State::kError, enable

     // future attempts at reload to progress.

     bool enable_reload_servables_with_error = false;


     // If set to true, the server will fail to start up (or fail a config

     // reload) if, for any configured model, no versions of the model are found

     // in the filesystem under the model's base path. In addition, if the

     // filesystem polling finds no servables under the base path for a

     // configured model, it will do nothing, rather than unloading all versions.

     bool servable_versions_always_present = false;


     // Logger used for logging requests hitting the server.

     std::unique_ptr<ServerRequestLogger> server_request_logger;


     // If set, we use this function to update the server_request_logger.

     ServerRequestLoggerUpdater server_request_logger_updater;


     // Callback to be called just before a servable is to be loaded. This will

     // called on the same manager load thread which starts the load.

     PreLoadHook pre_load_hook;


     // Whether to allow assigning unused version labels to models that are not

     // available yet.

     bool allow_version_labels_for_unavailable_models = false;


     // Whether to force-allow assigning any version labels to models that are

     // not available yet.

     bool force_allow_any_version_labels_for_unavailable_models = false;


     // In a predict handler, this option specifies how to serialize tensors

     // (e.g: as proto fields or as proto content).

     // Serialize as proto fields by default, for backward compatibility.

     internal::PredictResponseTensorSerializationOption

         predict_response_tensor_serialization_option =

             internal::PredictResponseTensorSerializationOption::kAsProtoField;


     // The prefix to append to the file system storage paths.

     std::string storage_path_prefix;


     bool enable_cors_support = false;


     // If true, propagate current context to children threads (periodic

     // functions) in AspiredVersionsManager.

     bool with_current_context = false;


     // How long to wait for servables to reach a given state.

     absl::Duration servable_state_waiter_timeout = absl::InfiniteDuration();


     // Defines how we want to retry when model loading fails.

     std::function<bool(absl::Status)> should_retry_model_load;

   };


   virtual ~ServerCore() = default;


   static Status Create(Options options, std::unique_ptr<ServerCore>* core);


   std::vector<ServableId> ListAvailableServableIds() const override {

     return manager_->ListAvailableServableIds();

   }


   virtual Status ReloadConfig(const ModelServerConfig& config)

       TF_LOCKS_EXCLUDED(config_mu_);


   virtual ServableStateMonitor* servable_state_monitor() const {

     return servable_state_monitor_.get();

   }


   template <typename T>

   Status GetServableHandle(const ModelSpec& model_spec,

                            ServableHandle<T>* const handle) {

     ServableRequest servable_request;

     tensorflow::Status status =

         ServableRequestFromModelSpec(model_spec, &servable_request);

     if (!status.ok()) {

       VLOG(1) << "Unable to get servable handle due to: " << status;

       return status;

     }

     status = manager_->GetServableHandle(servable_request, handle);

     if (!status.ok()) {

       VLOG(1) << "Unable to get servable handle due to: " << status;

       return status;

     }

     return Status();

   }


   // This specialized version allows us to override GetServableHandle for

   // Servables in sub-classes. Useful for testing.

   virtual Status GetServableHandle(const ModelSpec& model_spec,

                                    ServableHandle<Servable>* const handle) {

     return GetServableHandle<Servable>(model_spec, handle);

   }


   template <typename T>

   std::map<ServableId, ServableHandle<T>> GetAvailableServableHandles() const {

     return manager_->GetAvailableServableHandles<T>();

   }


   virtual Status Log(const google::protobuf::Message& request,

                      const google::protobuf::Message& response,

                      const LogMetadata& log_metadata) {

     return options_.server_request_logger->Log(request, response, log_metadata);

   }


   // Starts logging a stream through returning a StreamLogger created through

   // `create_stream_logger_fn`. Returns NULL if the stream should not be logged.

   template <typename Request, typename Response>

   std::unique_ptr<StreamLogger<Request, Response>> StartLoggingStream(

       const LogMetadata& log_metadata,

       ServerRequestLogger::CreateStreamLoggerFn<Request, Response>

           create_stream_logger_fn) {

     return options_.server_request_logger->StartLoggingStream(

         log_metadata, std::move(create_stream_logger_fn));

   }


   internal::PredictResponseTensorSerializationOption

   predict_response_tensor_serialization_option() const {

     return options_.predict_response_tensor_serialization_option;

   }


   bool enable_cors_support() const { return options_.enable_cors_support; }


  protected:

   ServerCore(Options options);


  private:

   friend class test_util::ServerCoreTestAccess;


   // ************************************************************************

   // Server Setup and Initialization.

   // ************************************************************************


   // Initializes server core.

   // Must be run once and only once per ServerCore instance.

   Status Initialize(

       std::unique_ptr<AspiredVersionPolicy> aspired_version_policy,

       AspiredVersionsManager::CustomSortActionsFn custom_sort_actions);


   // Creates a AspiredVersionsManager with the specified policy.

   Status CreateAspiredVersionsManager(

       std::unique_ptr<AspiredVersionPolicy> policy,

       AspiredVersionsManager::CustomSortActionsFn custom_sort_actions,

       std::unique_ptr<AspiredVersionsManager>* manager);


   // Creates a ResourceTracker.

   Status CreateResourceTracker(

       std::unique_ptr<ResourceTracker>* resource_tracker);


   // Creates a platform-specific source adapter.

   Status CreateAdapter(

       const string& model_platform,

       std::unique_ptr<StoragePathSourceAdapter>* adapter) const;


   // Creates a FileSystemStoragePathSourceConfig from the ModelConfigList of

   // 'config'.

   FileSystemStoragePathSourceConfig CreateStoragePathSourceConfig(

       const ModelServerConfig& config) const;


   // Creates routes for a DynamicSourceRouter from the ModelConfigList of

   // 'config'.

   Status CreateStoragePathRoutes(

       const ModelServerConfig& config,

       DynamicSourceRouter<StoragePath>::Routes* routes) const;


   // Waits until all entries in 'models' have been loaded, according to

   // 'monitor'. Returns an error if any model fails to load.

   Status WaitUntilModelsAvailable(const std::set<string>& models,

                                   ServableStateMonitor* monitor);


   // Creates a FileSystemStoragePathSource and an optional

   // PrefixStoragePathSourceAdapter, and connects them to the supplied target.

   Status CreateStoragePathSource(

       const FileSystemStoragePathSourceConfig& config,

       Target<StoragePath>* target,

       std::unique_ptr<FileSystemStoragePathSource>* source,

       std::unique_ptr<PrefixStoragePathSourceAdapter>* prefix_source_adapter)

       TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_);


   // The source adapters to deploy, to handle the configured platforms as well

   // as models whose platform is unknown (errors).

   //

   // Importantly, we deploy one source adapter per platform, not one per model,

   // to handle cross-model optimizations that some platforms/adapters may employ

   // e.g. cross-model batch scheduling.

   struct SourceAdapters {

     // One adapter for each platform.

     std::map<string, std::unique_ptr<StoragePathSourceAdapter>>

         platform_adapters;


     // An extra adapter to report errors for models with no configured platform.

     std::unique_ptr<StoragePathSourceAdapter> error_adapter;

   };


   // Creates a source router and connects it to the supplied adapter targets.

   Status CreateRouter(

       const DynamicSourceRouter<StoragePath>::Routes& routes,

       SourceAdapters* targets,

       std::unique_ptr<DynamicSourceRouter<StoragePath>>* router) const;


   // Creates a set of source adapters based on options_.platform_config_map.

   Status CreateAdapters(SourceAdapters* adapters) const;


   // Connects the source adapters to the manager and waits it to load all

   // configured models.

   Status ConnectAdaptersToManagerAndAwaitModelLoads(SourceAdapters* adapters)

       TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_);


   // Updates the config of 'storage_path_source_and_router_->source'.

   Status ReloadStoragePathSourceConfig(

       const FileSystemStoragePathSourceConfig& source_config)

       TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_);


   // Updates the configured routes of 'storage_path_source_and_router_->router'.

   Status ReloadRoutes(const DynamicSourceRouter<StoragePath>::Routes& routes)

       TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_);


   // Adds/reloads models through ModelConfigList of 'config_'.

   Status AddModelsViaModelConfigList() TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_);


   // Adds/reloads models through custom model config of 'config_'.

   Status AddModelsViaCustomModelConfig()

       TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_);


   // Updates the ServerRequestLogger based on the ModelConfigList.

   Status MaybeUpdateServerRequestLogger(

       ModelServerConfig::ConfigCase config_case)

       TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_);


   // Updates 'model_labels_to_versions_' based on 'config_'. Throws an error if

   // requesting to assign an existing label to a version not in state

   // kAvailable. For a new version label, it can be assigned to a version that

   // is not in state kAvailable yet if

   // allow_version_labels_for_unavailable_models is true.

   Status UpdateModelVersionLabelMap() TF_EXCLUSIVE_LOCKS_REQUIRED(config_mu_)

       TF_LOCKS_EXCLUDED(model_labels_to_versions_mu_);


   // ************************************************************************

   // Request Processing.

   // ************************************************************************


   // Extracts a ServableRequest from the given ModelSpec.

   Status ServableRequestFromModelSpec(const ModelSpec& model_spec,

                                       ServableRequest* servable_request) const;


   // Gets the version associated with 'label', for the given model name.

   Status GetModelVersionForLabel(const string& model_name, const string& label,

                                  int64_t* version) const

       TF_LOCKS_EXCLUDED(model_labels_to_versions_mu_);


   Status GetUntypedServableHandle(

       const ServableRequest& request,

       std::unique_ptr<UntypedServableHandle>* untyped_handle) override {

     return manager_->GetUntypedServableHandle(request, untyped_handle);

   }


   std::map<ServableId, std::unique_ptr<UntypedServableHandle>>

   GetAvailableUntypedServableHandles() const override {

     return manager_->GetAvailableUntypedServableHandles();

   }


   // The options passed to the ctor, minus the AspiredVersionPolicy.

   Options options_;


   // All of the supported platforms (i.e. the ones given in

   // 'options_.platform_config_map'), and a router output port number for each.

   // Used to deterministically associate a platform with a source adapter.

   std::map<string, int> platform_to_router_port_;


   std::shared_ptr<EventBus<ServableState>> servable_event_bus_;

   std::shared_ptr<ServableStateMonitor> servable_state_monitor_;

   UniquePtrWithDeps<AspiredVersionsManager> manager_;


   // The most recent config supplied to ReloadConfig().

   ModelServerConfig config_ TF_GUARDED_BY(config_mu_);


   // A model_name->label->version# map.

   std::unique_ptr<std::map<string, std::map<string, int64_t>>>

       model_labels_to_versions_ TF_GUARDED_BY(model_labels_to_versions_mu_);


   struct StoragePathSourceAndRouter {

     FileSystemStoragePathSource* source;

     DynamicSourceRouter<StoragePath>* router;

   };


   // If the configuration uses a file-system source, this is populated with

   // pointers to the source and router (to enable reconfiguration later). Both

   // are owned by 'manager_'.

   absl::optional<StoragePathSourceAndRouter> storage_path_source_and_router_

       TF_GUARDED_BY(config_mu_);


   // A mutex for reconfiguration, used by ReloadConfig().

   mutable mutex config_mu_;


   // A mutex for swapping the model version label map. Should only be held for

   // a short time (i.e. pointer swap) to avoid holding up inference requests.

   mutable mutex model_labels_to_versions_mu_;

 };


 }  // namespace serving

 }  // namespace tensorflow


 #endif  // TENSORFLOW_SERVING_MODEL_SERVERS_SERVER_CORE_H_

tensorflow::serving::EventBus
Definition: event_bus.h:63

tensorflow::serving::Manager
Definition: manager.h:77

tensorflow::serving::ServableHandle
Definition: servable_handle.h:75

tensorflow::serving::ServableStateMonitor
Definition: servable_state_monitor.h:46

tensorflow::serving::ServerCore
Definition: server_core.h:74

tensorflow::serving::ServerCore::ServerRequestLoggerUpdater
std::function< Status(const ModelServerConfig &, ServerRequestLogger *)> ServerRequestLoggerUpdater
Function signature used to update the server_request_logger.
Definition: server_core.h:93

tensorflow::serving::ServerCore::Create
static Status Create(Options options, std::unique_ptr< ServerCore > *core)
Definition: server_core.cc:231

tensorflow::serving::ServerCore::Log
virtual Status Log(const google::protobuf::Message &request, const google::protobuf::Message &response, const LogMetadata &log_metadata)
Definition: server_core.h:299

tensorflow::serving::ServerCore::ReloadConfig
virtual Status ReloadConfig(const ModelServerConfig &config) TF_LOCKS_EXCLUDED(config_mu_)
Definition: server_core.cc:447

tensorflow::serving::ServerCore::servable_state_monitor
virtual ServableStateMonitor * servable_state_monitor() const
Returns ServableStateMonitor that can be used to query servable states.
Definition: server_core.h:251

tensorflow::serving::ServerCore::ListAvailableServableIds
std::vector< ServableId > ListAvailableServableIds() const override
Definition: server_core.h:234

tensorflow::serving::ServerCore::GetServableHandle
Status GetServableHandle(const ModelSpec &model_spec, ServableHandle< T > *const handle)
Definition: server_core.h:267

tensorflow::serving::ServerCore::CustomModelConfigLoader
std::function< Status(const ::google::protobuf::Any &any, EventBus< ServableState > *event_bus, UniquePtrWithDeps< AspiredVersionsManager > *manager)> CustomModelConfigLoader
Definition: server_core.h:89

tensorflow::serving::ServerRequestLogger
Definition: server_request_logger.h:40

tensorflow::serving::UniquePtrWithDeps
Definition: unique_ptr_with_deps.h:40

tensorflow::serving::ServableRequest
Definition: manager.h:39

tensorflow::serving::ServerCore::Options
Options for configuring a ServerCore object.
Definition: server_core.h:96