TensorFlow Serving C++ API Documentation
basic_manager.h
1 /* Copyright 2016 Google Inc. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7  http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_
17 #define TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_
18 
19 #include <atomic>
20 #include <memory>
21 #include <string>
22 #include <unordered_map>
23 #include <vector>
24 
25 #include "absl/types/optional.h"
26 #include "tensorflow/core/lib/core/status.h"
27 #include "tensorflow/core/lib/core/stringpiece.h"
28 #include "tensorflow/core/lib/hash/hash.h"
29 #include "tensorflow/core/platform/env.h"
30 #include "tensorflow/core/platform/mutex.h"
31 #include "tensorflow/core/platform/thread_annotations.h"
32 #include "tensorflow/core/platform/types.h"
33 #include "tensorflow_serving/core/loader.h"
34 #include "tensorflow_serving/core/loader_harness.h"
35 #include "tensorflow_serving/core/manager.h"
36 #include "tensorflow_serving/core/servable_data.h"
37 #include "tensorflow_serving/core/servable_handle.h"
38 #include "tensorflow_serving/core/servable_id.h"
39 #include "tensorflow_serving/core/servable_state.h"
40 #include "tensorflow_serving/resources/resource_tracker.h"
41 #include "tensorflow_serving/util/event_bus.h"
42 #include "tensorflow_serving/util/executor.h"
43 #include "tensorflow_serving/util/fast_read_dynamic_ptr.h"
44 
45 namespace tensorflow {
46 namespace serving {
47 
48 namespace test_util {
49 class BasicManagerTestAccess;
50 } // namespace test_util
51 
106 class BasicManager : public Manager {
107  public:
108  // Type of the callback to be called just before a servable is to be loaded.
109  using PreLoadHook = std::function<void(const ServableId&)>;
110 
113  struct Options {
114  // The resource tracker to use while managing servable resources. Optional.
115  // If left as nullptr, we do not validate servable resource usage.
116  std::unique_ptr<ResourceTracker> resource_tracker;
117 
118  // The number of threads in the thread-pool used to load servables.
119  //
120  // If set as 0, we don't use a thread-pool, and LoadServable() blocks.
121  uint32 num_load_threads = 0;
122 
123  // The number of threads in the thread-pool used to unload servables.
124  //
125  // If set as 0, we don't use a thread-pool, and UnloadServable() blocks.
126  uint32 num_unload_threads = 0;
127 
128  // Defines how we want to retry when model loading fails.
129  std::function<bool(absl::Status)> should_retry_model_load;
130 
131  // EventBus to publish servable state changes. This is optional, if unset,
132  // we don't publish.
133  EventBus<ServableState>* servable_event_bus = nullptr;
134 
135  // Maximum number of times we retry loading a servable, after the first
136  // failure, before we give up.
137  //
138  // If set to 0, a load is attempted only once.
139  uint32 max_num_load_retries = 5;
140 
141  // The interval, in microseconds, between each servable load retry. If set
142  // negative, we don't wait.
143  // Default: 1 minute.
144  int64_t load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
145 
146  // If true, and there are not multiple load threads, filesystem caches will
147  // be flushed after each servable is loaded. (Cache flush is skipped when
148  // multiple load threads are active, in order to avoid setting back a
149  // concurrent load on another thread.)
150  bool flush_filesystem_caches = false;
151 
152  // The environment to use for starting threads in the thread-pool.
153  Env* env = Env::Default();
154 
155  // Callback to be called just before a servable is to be loaded. This will
156  // called on the same manager load thread which starts the load.
157  PreLoadHook pre_load_hook;
158  };
159  static Status Create(Options options, std::unique_ptr<BasicManager>* manager);
160 
163  ~BasicManager() override;
164 
165  std::vector<ServableId> ListAvailableServableIds() const override;
166 
167  Status GetUntypedServableHandle(
168  const ServableRequest& request,
169  std::unique_ptr<UntypedServableHandle>* untyped_handle) override;
170 
171  std::map<ServableId, std::unique_ptr<UntypedServableHandle>>
172  GetAvailableUntypedServableHandles() const override;
173 
184  Status ManageServable(ServableData<std::unique_ptr<Loader>> servable);
185 
190  template <typename T>
192  ServableData<std::unique_ptr<Loader>> servable,
193  std::unique_ptr<T> additional_state);
194 
198  Status StopManagingServable(const ServableId& id);
199 
202  std::vector<string> GetManagedServableNames() const;
203 
208  template <typename T = std::nullptr_t>
209  std::vector<ServableStateSnapshot<T>> GetManagedServableStateSnapshots(
210  const string& servable_name) const;
211 
217  template <typename T = std::nullptr_t>
218  absl::optional<ServableStateSnapshot<T>> GetManagedServableStateSnapshot(
219  const ServableId& id);
220 
227  template <typename T>
229 
232  using DoneCallback = std::function<void(const Status& status)>;
233 
246  void LoadServable(const ServableId& id, DoneCallback done_callback);
247 
255  void CancelLoadServableRetry(const ServableId& id);
256 
269  void UnloadServable(const ServableId& id, DoneCallback done_callback);
270 
271  private:
272  friend class AspiredVersionsManager;
274 
275  BasicManager(Env* env, uint32 num_load_threads, uint32 num_unload_threads,
276  uint32 max_num_load_retries,
277  std::function<bool(absl::Status)> should_retry_model_load,
278  int64_t load_retry_interval_micros, bool flush_filesystem_caches,
279  std::unique_ptr<ResourceTracker> resource_tracker,
280  EventBus<ServableState>* servable_event_bus,
281  PreLoadHook pre_load_hook);
282 
283  // Starts managing the servable.
284  //
285  // If called multiple times with the same servable id, all of them are
286  // accepted, but only the first one is used. We accept the servable even if
287  // called with erroneous ServableData.
288  //
289  // Also accepts a closure to create the harness as a shared_ptr. The harness
290  // has a different constructors for creating it with or without
291  // additional_state.
292  Status ManageServableInternal(ServableData<std::unique_ptr<Loader>> servable,
293  std::function<std::shared_ptr<LoaderHarness>(
294  const ServableId&, std::unique_ptr<Loader>)>
295  harness_creator);
296 
297  // Obtains the harness associated with the given servable id. Returns an ok
298  // status if a corresponding harness was found, else an error status.
299  Status GetHealthyHarness(const ServableId& servable_id,
300  LoaderHarness** harness)
301  TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
302 
303  // Obtains a pointer to every managed loader that is currently holding
304  // resources, i.e. whose state is one of kApprovedForLoading, kLoading,
305  // kReady, kUnloadRequested, kQuiescing, kQuiesced or kUnloading.
306  std::vector<const Loader*> GetLoadersCurrentlyUsingResources() const
307  TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
308 
309  // A load or unload request for a particular servable. Facilitates code
310  // sharing across the two cases.
311  struct LoadOrUnloadRequest {
312  enum class Kind { kLoad, kUnload };
313  Kind kind;
314  ServableId servable_id;
315  };
316 
317  // A unification of LoadServable() and UnloadServable().
318  void LoadOrUnloadServable(const LoadOrUnloadRequest& request,
319  DoneCallback done_callback) TF_LOCKS_EXCLUDED(mu_);
320 
321  // The synchronous logic for handling a load/unload request, including both
322  // the decision and execution phases. This is the method run in the executor.
323  void HandleLoadOrUnloadRequest(const LoadOrUnloadRequest& request,
324  DoneCallback done_callback)
325  TF_LOCKS_EXCLUDED(mu_);
326 
327  // The decision phase of whether to approve a load/unload request. Delegates
328  // to one of ApproveLoad() or ApproveUnload() -- see those methods' comments
329  // for details.
330  //
331  // Upon approving the request, signals entrance to the execution phase by
332  // incrementing 'num_ongoing_load_unload_executions_'.
333  //
334  // If returning "ok", populates 'harness' with the harness for the request's
335  // servable. (Note that 'harness' is guaranteed to remain live for the
336  // subsequent execution phase of the request because approval of this request
337  // precludes concurrent execution of another request that could delete the
338  // harness.)
339  Status ApproveLoadOrUnload(const LoadOrUnloadRequest& request,
340  LoaderHarness** harness) TF_LOCKS_EXCLUDED(mu_);
341 
342  // The decision phase of whether to approve a load request.
343  //
344  // If it succeeds, places the servable into state kApprovedForLoad. Among
345  // other things, that prevents a subsequent load request from proceeding
346  // concurrently.
347  //
348  // Argument 'mu_lock' is a lock held on 'mu_'. It is released temporarily via
349  // 'num_ongoing_load_unload_executions_cv_'.
350  Status ApproveLoad(LoaderHarness* harness, mutex_lock* mu_lock)
351  TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
352 
353  // The decision phase of whether to approve an unload request. If it succeeds,
354  // places the servable into state kQuiescing. Among other things, that
355  // prevents a subsequent unload request from proceeding concurrently.
356  Status ApproveUnload(LoaderHarness* harness) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
357 
358  // Attempts to reserve the resources required to load the servable in
359  // 'harness'. Does not make any state transitions on 'harness' -- merely
360  // reserves the resources in 'resource_tracker_' (upon success) or returns an
361  // error.
362  //
363  // Argument 'mu_lock' is a lock held on 'mu_'. It is released temporarily via
364  // 'num_ongoing_load_unload_executions_cv_'.
365  Status ReserveResources(LoaderHarness* harness, mutex_lock* mu_lock)
366  TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
367 
368  // The execution phase of loading/unloading a servable. Delegates to either
369  // ExecuteLoad() or ExecuteUnload().
370  //
371  // Upon completion (and regardless of the outcome), signals exit of the
372  // execution phase by decrementing 'num_ongoing_load_unload_executions_'.
373  Status ExecuteLoadOrUnload(const LoadOrUnloadRequest& request,
374  LoaderHarness* harness);
375 
376  // The execution phase of loading a servable.
377  Status ExecuteLoad(LoaderHarness* harness) TF_LOCKS_EXCLUDED(mu_);
378 
379  // The execution phase of loading a unservable.
380  Status ExecuteUnload(LoaderHarness* harness) TF_LOCKS_EXCLUDED(mu_);
381 
382  // Unloads all the managed servables.
383  Status UnloadAllServables() TF_LOCKS_EXCLUDED(mu_);
384 
385  // Updates the serving map by copying servables from the managed map, which
386  // are ready to be served.
387  void UpdateServingMap() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
388 
389  // Sets the number of load threads.
390  //
391  // When either existing or target num_load_threads means single thread, we
392  // block all new load requests while the old thread pool is destructed, a new
393  // one is created and then swapped with the old one. Note that destructing
394  // the old thread pool blocks until all threads are done, so it could block
395  // for a long time.
396  //
397  // When both existing and target num_load_threads are multi-threaded, this
398  // call still blocks until the old thread pool is destructed, but other loads
399  // can happen concurrently, potentially increasing the number of running load
400  // threads, up to the sum of existing and target num_load_threads.
401  void SetNumLoadThreads(uint32 num_load_threads)
402  TF_LOCKS_EXCLUDED(load_executor_mu_);
403  uint32 num_load_threads() const;
404 
405  // Keys are the servable names.
406  // Values are the harnesses for each servable version. The values when
407  // fetched, are unordered.
408  using ManagedMap =
409  std::unordered_multimap<string, std::shared_ptr<LoaderHarness>>;
410 
411  // Fetches the harness with this id from the harness_map_. Returns
412  // harness_map_.end(), if the harness is not found.
413  ManagedMap::iterator FindHarnessInMap(const ServableId& id)
414  TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
415 
416  // Publishes the state on the event bus, if an event bus was part of the
417  // options, if not we ignore it.
418  void PublishOnEventBus(const ServableState& state);
419 
420  LoaderHarness::Options harness_options_;
421 
422  // The event bus to which to publish servable state change events, or nullptr
423  // if no bus has been configured.
424  EventBus<ServableState>* servable_event_bus_;
425 
426  // Defines how we want to retry when model loading fails.
427  std::function<bool(absl::Status)> should_retry_model_load_;
428 
429  // Used to protect access to 'managed_map_', 'resource_tracker_' and other
430  // core state elements.
431  mutable mutex mu_;
432 
433  // ManagedMap contains all the servables managed by this manager, in different
434  // states.
435  ManagedMap managed_map_ TF_GUARDED_BY(mu_);
436 
437  // ServingMap contains all the servables which are ready to be served, which
438  // is a subset of those in the managed map.
439  // This map is updated occasionally from the main manager loop thread while
440  // being accessed from multiple threads to get ServableHandles.
441  //
442  // This class is thread-safe.
443  class ServingMap {
444  public:
445  ServingMap();
446 
447  // Gets a list of all servable ids.
448  std::vector<ServableId> ListAvailableServableIds() const;
449 
450  // Returns an UntypedServableHandle given a ServableRequest.
451  // Returns error if no such Servable is available -- e.g. not yet loaded,
452  // has been quiesced/unloaded, etc.
453  Status GetUntypedServableHandle(
454  const ServableRequest& request,
455  std::unique_ptr<UntypedServableHandle>* untyped_handle);
456 
457  // Returns a map of all the currently available servable_ids to their
458  // corresponding UntypedServableHandles.
459  std::map<ServableId, std::unique_ptr<UntypedServableHandle>>
460  GetAvailableUntypedServableHandles() const;
461 
462  // Updates the serving map by copying servables from the managed map, which
463  // are ready to be served.
464  void Update(const ManagedMap& managed_map);
465 
466  private:
467  struct EqRequest;
468  // Hash and equality functors for ServableRequest.
469  // Forward-declared here and defined in the cc file.
470  struct HashRequest;
471 
472  // Map from ServableRequest to corresponding harness. For the latest version
473  // of a servable stream, we add an extra entry for it, where key is the
474  // ServableRequest without the version set, so that requests for the latest,
475  // can be directly queried on this map.
476  using HandlesMap =
477  std::unordered_multimap<ServableRequest,
478  std::shared_ptr<const LoaderHarness>,
479  HashRequest, EqRequest>;
480  FastReadDynamicPtr<HandlesMap> handles_map_;
481  };
482  ServingMap serving_map_;
483 
485  // State associated with loading/unloading servables, and tracking their
486  // resources.
487  //
488  // Load/unload requests have two phases: a decision phase and an execution
489  // phase. The decision phase either accepts or rejects the request; if
490  // accepted the execution phase executes the request (i.e. invokes Load() or
491  // Unload() on the servable's loader).
492  //
493  // Given a stream of load/unload requests, we execute the decision phases
494  // serially, which guarantees that request i’s decision phase can complete
495  // before considering request i+1's so there’s no starvation.
496 
497  Env* const env_;
498 
499  // The number of load threads. Can be changed after instantiation of the
500  // manager via SetNumLoadThreads().
501  std::atomic<uint32> num_load_threads_;
502  // Whether to flush filesystem caches (if num_load_threads_ == 1)
503  const bool flush_filesystem_caches_ = false;
504  // The executor (and associated mutex) used for executing loads of servables.
505  mutable mutex load_executor_mu_;
506  std::unique_ptr<Executor> load_executor_ TF_GUARDED_BY(load_executor_mu_);
507 
508  // The executor used for executing unloads of servables. (Unlike for loads,
509  // the unload executor is fixed for the lifetime of the manager.)
510  std::unique_ptr<Executor> unload_executor_;
511 
512  // Used to serialize the decision phases of the load/unload requests.
513  mutable mutex load_unload_decision_phase_mu_;
514 
515  // A module that keeps track of available, used and reserved servable
516  // resources (e.g. RAM).
517  std::unique_ptr<ResourceTracker> resource_tracker_ TF_GUARDED_BY(mu_);
518 
519  // The number of load/unload requests currently in their execution phase.
520  int num_ongoing_load_unload_executions_ TF_GUARDED_BY(mu_) = 0;
521 
522  // Used to wake up threads that are waiting for 'num_ongoing_executions' to
523  // decrease.
524  condition_variable num_ongoing_load_unload_executions_cv_;
525 
526  PreLoadHook pre_load_hook_;
527 
528  TF_DISALLOW_COPY_AND_ASSIGN(BasicManager);
529 };
530 
532 // Implementation details. API readers may skip.
534 
535 template <typename T>
537  ServableData<std::unique_ptr<Loader>> servable,
538  std::unique_ptr<T> additional_state) {
539  return ManageServableInternal(
540  std::move(servable),
541  [this, &additional_state](const ServableId& id,
542  std::unique_ptr<Loader> loader) {
543  return std::make_shared<LoaderHarness>(id, std::move(loader),
544  std::move(additional_state),
545  harness_options_);
546  });
547 }
548 
549 template <typename T>
550 std::vector<ServableStateSnapshot<T>>
552  const string& servable_name) const {
553  mutex_lock l(mu_);
554 
555  const auto range = managed_map_.equal_range(servable_name);
556  std::vector<ServableStateSnapshot<T>> state_snapshots;
557  state_snapshots.reserve(std::distance(range.first, range.second));
558  for (auto it = range.first; it != range.second; ++it) {
559  state_snapshots.push_back(it->second->loader_state_snapshot<T>());
560  }
561 
562  return state_snapshots;
563 }
564 
565 template <typename T>
566 absl::optional<ServableStateSnapshot<T>>
568  mutex_lock l(mu_);
569 
570  auto iter = FindHarnessInMap(id);
571  if (iter == managed_map_.end()) {
572  return absl::nullopt;
573  }
574  return iter->second->loader_state_snapshot<T>();
575 }
576 
577 template <typename T>
579  mutex_lock l(mu_);
580 
581  auto iter = FindHarnessInMap(id);
582  if (iter == managed_map_.end()) {
583  DCHECK(false) << "This servable is not being managed by the mananger: "
584  << id.DebugString();
585  return nullptr;
586  }
587  return iter->second->additional_state<T>();
588 }
589 
590 } // namespace serving
591 } // namespace tensorflow
592 
593 #endif // TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_
void LoadServable(const ServableId &id, DoneCallback done_callback)
std::vector< ServableStateSnapshot< T > > GetManagedServableStateSnapshots(const string &servable_name) const
Status StopManagingServable(const ServableId &id)
std::vector< ServableId > ListAvailableServableIds() const override
Status ManageServableWithAdditionalState(ServableData< std::unique_ptr< Loader >> servable, std::unique_ptr< T > additional_state)
void CancelLoadServableRetry(const ServableId &id)
std::function< void(const Status &status)> DoneCallback
void UnloadServable(const ServableId &id, DoneCallback done_callback)
T * GetAdditionalServableState(const ServableId &id)
Status ManageServable(ServableData< std::unique_ptr< Loader >> servable)
absl::optional< ServableStateSnapshot< T > > GetManagedServableStateSnapshot(const ServableId &id)
std::vector< string > GetManagedServableNames() const