api/cpp/basic__manager_8h_source.html

 /* Copyright 2016 Google Inc. All Rights Reserved.


 Licensed under the Apache License, Version 2.0 (the "License");

 you may not use this file except in compliance with the License.

 You may obtain a copy of the License at


     http://www.apache.org/licenses/LICENSE-2.0


 Unless required by applicable law or agreed to in writing, software

 distributed under the License is distributed on an "AS IS" BASIS,

 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 See the License for the specific language governing permissions and

 limitations under the License.

 ==============================================================================*/


 #ifndef TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_

 #define TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_


 #include <atomic>

 #include <memory>

 #include <string>

 #include <unordered_map>

 #include <vector>


 #include "absl/types/optional.h"

 #include "tensorflow/core/lib/core/status.h"

 #include "tensorflow/core/lib/core/stringpiece.h"

 #include "tensorflow/core/lib/hash/hash.h"

 #include "tensorflow/core/platform/env.h"

 #include "tensorflow/core/platform/mutex.h"

 #include "tensorflow/core/platform/thread_annotations.h"

 #include "tensorflow/core/platform/types.h"

 #include "tensorflow_serving/core/loader.h"

 #include "tensorflow_serving/core/loader_harness.h"

 #include "tensorflow_serving/core/manager.h"

 #include "tensorflow_serving/core/servable_data.h"

 #include "tensorflow_serving/core/servable_handle.h"

 #include "tensorflow_serving/core/servable_id.h"

 #include "tensorflow_serving/core/servable_state.h"

 #include "tensorflow_serving/resources/resource_tracker.h"

 #include "tensorflow_serving/util/event_bus.h"

 #include "tensorflow_serving/util/executor.h"

 #include "tensorflow_serving/util/fast_read_dynamic_ptr.h"


 namespace tensorflow {

 namespace serving {


 namespace test_util {

 class BasicManagerTestAccess;

 }  // namespace test_util


 class BasicManager : public Manager {

  public:

   // Type of the callback to be called just before a servable is to be loaded.

   using PreLoadHook = std::function<void(const ServableId&)>;


   struct Options {

     // The resource tracker to use while managing servable resources. Optional.

     // If left as nullptr, we do not validate servable resource usage.

     std::unique_ptr<ResourceTracker> resource_tracker;


     // The number of threads in the thread-pool used to load servables.

     //

     // If set as 0, we don't use a thread-pool, and LoadServable() blocks.

     uint32 num_load_threads = 0;


     // The number of threads in the thread-pool used to unload servables.

     //

     // If set as 0, we don't use a thread-pool, and UnloadServable() blocks.

     uint32 num_unload_threads = 0;


     // Defines how we want to retry when model loading fails.

     std::function<bool(absl::Status)> should_retry_model_load;


     // EventBus to publish servable state changes. This is optional, if unset,

     // we don't publish.

     EventBus<ServableState>* servable_event_bus = nullptr;


     // Maximum number of times we retry loading a servable, after the first

     // failure, before we give up.

     //

     // If set to 0, a load is attempted only once.

     uint32 max_num_load_retries = 5;


     // The interval, in microseconds, between each servable load retry. If set

     // negative, we don't wait.

     // Default: 1 minute.

     int64_t load_retry_interval_micros = 1LL * 60 * 1000 * 1000;


     // If true, and there are not multiple load threads, filesystem caches will

     // be flushed after each servable is loaded. (Cache flush is skipped when

     // multiple load threads are active, in order to avoid setting back a

     // concurrent load on another thread.)

     bool flush_filesystem_caches = false;


     // The environment to use for starting threads in the thread-pool.

     Env* env = Env::Default();


     // Callback to be called just before a servable is to be loaded. This will

     // called on the same manager load thread which starts the load.

     PreLoadHook pre_load_hook;

   };

   static Status Create(Options options, std::unique_ptr<BasicManager>* manager);


   ~BasicManager() override;


   std::vector<ServableId> ListAvailableServableIds() const override;


   Status GetUntypedServableHandle(

       const ServableRequest& request,

       std::unique_ptr<UntypedServableHandle>* untyped_handle) override;


   std::map<ServableId, std::unique_ptr<UntypedServableHandle>>

   GetAvailableUntypedServableHandles() const override;


   Status ManageServable(ServableData<std::unique_ptr<Loader>> servable);


   template <typename T>

   Status ManageServableWithAdditionalState(

       ServableData<std::unique_ptr<Loader>> servable,

       std::unique_ptr<T> additional_state);


   Status StopManagingServable(const ServableId& id);


   std::vector<string> GetManagedServableNames() const;


   template <typename T = std::nullptr_t>

   std::vector<ServableStateSnapshot<T>> GetManagedServableStateSnapshots(

       const string& servable_name) const;


   template <typename T = std::nullptr_t>

   absl::optional<ServableStateSnapshot<T>> GetManagedServableStateSnapshot(

       const ServableId& id);


   template <typename T>

   T* GetAdditionalServableState(const ServableId& id);


   using DoneCallback = std::function<void(const Status& status)>;


   void LoadServable(const ServableId& id, DoneCallback done_callback);


   void CancelLoadServableRetry(const ServableId& id);


   void UnloadServable(const ServableId& id, DoneCallback done_callback);


  private:

   friend class AspiredVersionsManager;

   friend class test_util::BasicManagerTestAccess;


   BasicManager(Env* env, uint32 num_load_threads, uint32 num_unload_threads,

                uint32 max_num_load_retries,

                std::function<bool(absl::Status)> should_retry_model_load,

                int64_t load_retry_interval_micros, bool flush_filesystem_caches,

                std::unique_ptr<ResourceTracker> resource_tracker,

                EventBus<ServableState>* servable_event_bus,

                PreLoadHook pre_load_hook);


   // Starts managing the servable.

   //

   // If called multiple times with the same servable id, all of them are

   // accepted, but only the first one is used. We accept the servable even if

   // called with erroneous ServableData.

   //

   // Also accepts a closure to create the harness as a shared_ptr. The harness

   // has a different constructors for creating it with or without

   // additional_state.

   Status ManageServableInternal(ServableData<std::unique_ptr<Loader>> servable,

                                 std::function<std::shared_ptr<LoaderHarness>(

                                     const ServableId&, std::unique_ptr<Loader>)>

                                     harness_creator);


   // Obtains the harness associated with the given servable id. Returns an ok

   // status if a corresponding harness was found, else an error status.

   Status GetHealthyHarness(const ServableId& servable_id,

                            LoaderHarness** harness)

       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);


   // Obtains a pointer to every managed loader that is currently holding

   // resources, i.e. whose state is one of kApprovedForLoading, kLoading,

   // kReady, kUnloadRequested, kQuiescing, kQuiesced or kUnloading.

   std::vector<const Loader*> GetLoadersCurrentlyUsingResources() const

       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);


   // A load or unload request for a particular servable. Facilitates code

   // sharing across the two cases.

   struct LoadOrUnloadRequest {

     enum class Kind { kLoad, kUnload };

     Kind kind;

     ServableId servable_id;

   };


   // A unification of LoadServable() and UnloadServable().

   void LoadOrUnloadServable(const LoadOrUnloadRequest& request,

                             DoneCallback done_callback) TF_LOCKS_EXCLUDED(mu_);


   // The synchronous logic for handling a load/unload request, including both

   // the decision and execution phases. This is the method run in the executor.

   void HandleLoadOrUnloadRequest(const LoadOrUnloadRequest& request,

                                  DoneCallback done_callback)

       TF_LOCKS_EXCLUDED(mu_);


   // The decision phase of whether to approve a load/unload request. Delegates

   // to one of ApproveLoad() or ApproveUnload() -- see those methods' comments

   // for details.

   //

   // Upon approving the request, signals entrance to the execution phase by

   // incrementing 'num_ongoing_load_unload_executions_'.

   //

   // If returning "ok", populates 'harness' with the harness for the request's

   // servable. (Note that 'harness' is guaranteed to remain live for the

   // subsequent execution phase of the request because approval of this request

   // precludes concurrent execution of another request that could delete the

   // harness.)

   Status ApproveLoadOrUnload(const LoadOrUnloadRequest& request,

                              LoaderHarness** harness) TF_LOCKS_EXCLUDED(mu_);


   // The decision phase of whether to approve a load request.

   //

   // If it succeeds, places the servable into state kApprovedForLoad. Among

   // other things, that prevents a subsequent load request from proceeding

   // concurrently.

   //

   // Argument 'mu_lock' is a lock held on 'mu_'. It is released temporarily via

   // 'num_ongoing_load_unload_executions_cv_'.

   Status ApproveLoad(LoaderHarness* harness, mutex_lock* mu_lock)

       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);


   // The decision phase of whether to approve an unload request. If it succeeds,

   // places the servable into state kQuiescing. Among other things, that

   // prevents a subsequent unload request from proceeding concurrently.

   Status ApproveUnload(LoaderHarness* harness) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);


   // Attempts to reserve the resources required to load the servable in

   // 'harness'. Does not make any state transitions on 'harness' -- merely

   // reserves the resources in 'resource_tracker_' (upon success) or returns an

   // error.

   //

   // Argument 'mu_lock' is a lock held on 'mu_'. It is released temporarily via

   // 'num_ongoing_load_unload_executions_cv_'.

   Status ReserveResources(LoaderHarness* harness, mutex_lock* mu_lock)

       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);


   // The execution phase of loading/unloading a servable. Delegates to either

   // ExecuteLoad() or ExecuteUnload().

   //

   // Upon completion (and regardless of the outcome), signals exit of the

   // execution phase by decrementing 'num_ongoing_load_unload_executions_'.

   Status ExecuteLoadOrUnload(const LoadOrUnloadRequest& request,

                              LoaderHarness* harness);


   // The execution phase of loading a servable.

   Status ExecuteLoad(LoaderHarness* harness) TF_LOCKS_EXCLUDED(mu_);


   // The execution phase of loading a unservable.

   Status ExecuteUnload(LoaderHarness* harness) TF_LOCKS_EXCLUDED(mu_);


   // Unloads all the managed servables.

   Status UnloadAllServables() TF_LOCKS_EXCLUDED(mu_);


   // Updates the serving map by copying servables from the managed map, which

   // are ready to be served.

   void UpdateServingMap() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);


   // Sets the number of load threads.

   //

   // When either existing or target num_load_threads means single thread, we

   // block all new load requests while the old thread pool is destructed, a new

   // one is created and then swapped with the old one. Note that destructing

   // the old thread pool blocks until all threads are done, so it could block

   // for a long time.

   //

   // When both existing and target num_load_threads are multi-threaded, this

   // call still blocks until the old thread pool is destructed, but other loads

   // can happen concurrently, potentially increasing the number of running load

   // threads, up to the sum of existing and target num_load_threads.

   void SetNumLoadThreads(uint32 num_load_threads)

       TF_LOCKS_EXCLUDED(load_executor_mu_);

   uint32 num_load_threads() const;


   // Keys are the servable names.

   // Values are the harnesses for each servable version. The values when

   // fetched, are unordered.

   using ManagedMap =

       std::unordered_multimap<string, std::shared_ptr<LoaderHarness>>;


   // Fetches the harness with this id from the harness_map_. Returns

   // harness_map_.end(), if the harness is not found.

   ManagedMap::iterator FindHarnessInMap(const ServableId& id)

       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);


   // Publishes the state on the event bus, if an event bus was part of the

   // options, if not we ignore it.

   void PublishOnEventBus(const ServableState& state);


   LoaderHarness::Options harness_options_;


   // The event bus to which to publish servable state change events, or nullptr

   // if no bus has been configured.

   EventBus<ServableState>* servable_event_bus_;


   // Defines how we want to retry when model loading fails.

   std::function<bool(absl::Status)> should_retry_model_load_;


   // Used to protect access to 'managed_map_', 'resource_tracker_' and other

   // core state elements.

   mutable mutex mu_;


   // ManagedMap contains all the servables managed by this manager, in different

   // states.

   ManagedMap managed_map_ TF_GUARDED_BY(mu_);


   // ServingMap contains all the servables which are ready to be served, which

   // is a subset of those in the managed map.

   // This map is updated occasionally from the main manager loop thread while

   // being accessed from multiple threads to get ServableHandles.

   //

   // This class is thread-safe.

   class ServingMap {

    public:

     ServingMap();


     // Gets a list of all servable ids.

     std::vector<ServableId> ListAvailableServableIds() const;


     // Returns an UntypedServableHandle given a ServableRequest.

     // Returns error if no such Servable is available -- e.g. not yet loaded,

     // has been quiesced/unloaded, etc.

     Status GetUntypedServableHandle(

         const ServableRequest& request,

         std::unique_ptr<UntypedServableHandle>* untyped_handle);


     // Returns a map of all the currently available servable_ids to their

     // corresponding UntypedServableHandles.

     std::map<ServableId, std::unique_ptr<UntypedServableHandle>>

     GetAvailableUntypedServableHandles() const;


     // Updates the serving map by copying servables from the managed map, which

     // are ready to be served.

     void Update(const ManagedMap& managed_map);


    private:

     struct EqRequest;

     // Hash and equality functors for ServableRequest.

     // Forward-declared here and defined in the cc file.

     struct HashRequest;


     // Map from ServableRequest to corresponding harness. For the latest version

     // of a servable stream, we add an extra entry for it, where key is the

     // ServableRequest without the version set, so that requests for the latest,

     // can be directly queried on this map.

     using HandlesMap =

         std::unordered_multimap<ServableRequest,

                                 std::shared_ptr<const LoaderHarness>,

                                 HashRequest, EqRequest>;

     FastReadDynamicPtr<HandlesMap> handles_map_;

   };

   ServingMap serving_map_;


   // State associated with loading/unloading servables, and tracking their

   // resources.

   //

   // Load/unload requests have two phases: a decision phase and an execution

   // phase. The decision phase either accepts or rejects the request; if

   // accepted the execution phase executes the request (i.e. invokes Load() or

   // Unload() on the servable's loader).

   //

   // Given a stream of load/unload requests, we execute the decision phases

   // serially, which guarantees that request i’s decision phase can complete

   // before considering request i+1's so there’s no starvation.


   Env* const env_;


   // The number of load threads. Can be changed after instantiation of the

   // manager via SetNumLoadThreads().

   std::atomic<uint32> num_load_threads_;

   // Whether to flush filesystem caches (if num_load_threads_ == 1)

   const bool flush_filesystem_caches_ = false;

   // The executor (and associated mutex) used for executing loads of servables.

   mutable mutex load_executor_mu_;

   std::unique_ptr<Executor> load_executor_ TF_GUARDED_BY(load_executor_mu_);


   // The executor used for executing unloads of servables. (Unlike for loads,

   // the unload executor is fixed for the lifetime of the manager.)

   std::unique_ptr<Executor> unload_executor_;


   // Used to serialize the decision phases of the load/unload requests.

   mutable mutex load_unload_decision_phase_mu_;


   // A module that keeps track of available, used and reserved servable

   // resources (e.g. RAM).

   std::unique_ptr<ResourceTracker> resource_tracker_ TF_GUARDED_BY(mu_);


   // The number of load/unload requests currently in their execution phase.

   int num_ongoing_load_unload_executions_ TF_GUARDED_BY(mu_) = 0;


   // Used to wake up threads that are waiting for 'num_ongoing_executions' to

   // decrease.

   condition_variable num_ongoing_load_unload_executions_cv_;


   PreLoadHook pre_load_hook_;


   TF_DISALLOW_COPY_AND_ASSIGN(BasicManager);

 };


 // Implementation details. API readers may skip.


 template <typename T>

 Status BasicManager::ManageServableWithAdditionalState(

     ServableData<std::unique_ptr<Loader>> servable,

     std::unique_ptr<T> additional_state) {

   return ManageServableInternal(

       std::move(servable),

       [this, &additional_state](const ServableId& id,

                                 std::unique_ptr<Loader> loader) {

         return std::make_shared<LoaderHarness>(id, std::move(loader),

                                                std::move(additional_state),

                                                harness_options_);

       });

 }


 template <typename T>

 std::vector<ServableStateSnapshot<T>>

 BasicManager::GetManagedServableStateSnapshots(

     const string& servable_name) const {

   mutex_lock l(mu_);


   const auto range = managed_map_.equal_range(servable_name);

   std::vector<ServableStateSnapshot<T>> state_snapshots;

   state_snapshots.reserve(std::distance(range.first, range.second));

   for (auto it = range.first; it != range.second; ++it) {

     state_snapshots.push_back(it->second->loader_state_snapshot<T>());

   }


   return state_snapshots;

 }


 template <typename T>

 absl::optional<ServableStateSnapshot<T>>

 BasicManager::GetManagedServableStateSnapshot(const ServableId& id) {

   mutex_lock l(mu_);


   auto iter = FindHarnessInMap(id);

   if (iter == managed_map_.end()) {

     return absl::nullopt;

   }

   return iter->second->loader_state_snapshot<T>();

 }


 template <typename T>

 T* BasicManager::GetAdditionalServableState(const ServableId& id) {

   mutex_lock l(mu_);


   auto iter = FindHarnessInMap(id);

   if (iter == managed_map_.end()) {

     DCHECK(false) << "This servable is not being managed by the mananger: "

                   << id.DebugString();

     return nullptr;

   }

   return iter->second->additional_state<T>();

 }


 }  // namespace serving

 }  // namespace tensorflow


 #endif  // TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_

tensorflow::serving::AspiredVersionsManager
Definition: aspired_versions_manager.h:86

tensorflow::serving::BasicManager
Definition: basic_manager.h:106

tensorflow::serving::BasicManager::LoadServable
void LoadServable(const ServableId &id, DoneCallback done_callback)
Definition: basic_manager.cc:523

tensorflow::serving::BasicManager::GetManagedServableStateSnapshots
std::vector< ServableStateSnapshot< T > > GetManagedServableStateSnapshots(const string &servable_name) const
Definition: basic_manager.h:551

tensorflow::serving::BasicManager::StopManagingServable
Status StopManagingServable(const ServableId &id)
Definition: basic_manager.cc:390

tensorflow::serving::BasicManager::ListAvailableServableIds
std::vector< ServableId > ListAvailableServableIds() const override
Definition: basic_manager.cc:311

tensorflow::serving::BasicManager::~BasicManager
~BasicManager() override
Definition: basic_manager.cc:270

tensorflow::serving::BasicManager::ManageServableWithAdditionalState
Status ManageServableWithAdditionalState(ServableData< std::unique_ptr< Loader >> servable, std::unique_ptr< T > additional_state)
Definition: basic_manager.h:536

tensorflow::serving::BasicManager::CancelLoadServableRetry
void CancelLoadServableRetry(const ServableId &id)
Definition: basic_manager.cc:532

tensorflow::serving::BasicManager::DoneCallback
std::function< void(const Status &status)> DoneCallback
Definition: basic_manager.h:232

tensorflow::serving::BasicManager::UnloadServable
void UnloadServable(const ServableId &id, DoneCallback done_callback)
Definition: basic_manager.cc:563

tensorflow::serving::BasicManager::GetAdditionalServableState
T * GetAdditionalServableState(const ServableId &id)
Definition: basic_manager.h:578

tensorflow::serving::BasicManager::ManageServable
Status ManageServable(ServableData< std::unique_ptr< Loader >> servable)
Definition: basic_manager.cc:380

tensorflow::serving::BasicManager::GetManagedServableStateSnapshot
absl::optional< ServableStateSnapshot< T > > GetManagedServableStateSnapshot(const ServableId &id)
Definition: basic_manager.h:567

tensorflow::serving::BasicManager::GetManagedServableNames
std::vector< string > GetManagedServableNames() const
Definition: basic_manager.cc:477

tensorflow::serving::EventBus
Definition: event_bus.h:63

tensorflow::serving::LoaderHarness
Definition: loader_harness.h:49

tensorflow::serving::Manager
Definition: manager.h:77

tensorflow::serving::ServableData
Definition: servable_data.h:32

tensorflow::serving::test_util::BasicManagerTestAccess
Definition: manager_test_util.h:55

tensorflow::serving::BasicManager::Options
Definition: basic_manager.h:113

tensorflow::serving::ServableId
Definition: servable_id.h:33

tensorflow::serving::ServableRequest
Definition: manager.h:39