TensorFlow Serving C++ API Documentation
caching_manager.h
1 /* Copyright 2016 Google Inc. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7  http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_SERVING_CORE_CACHING_MANAGER_H_
17 #define TENSORFLOW_SERVING_CORE_CACHING_MANAGER_H_
18 
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <vector>
23 
24 #include "tensorflow_serving/core/basic_manager.h"
25 #include "tensorflow_serving/core/manager.h"
26 #include "tensorflow_serving/core/source_adapter.h"
27 
28 namespace tensorflow {
29 namespace serving {
30 
31 namespace test_util {
32 class CachingManagerTestAccess;
33 } // namespace test_util
34 
42 //
43 // TODO(b/25449742): Add support for evictions of loaded servables from the
44 // caching-manager.
45 class CachingManager : public Manager {
46  public:
49  struct Options {
50  // The resource tracker to use while managing servable resources. Optional.
51  // If left as nullptr, we do not validate servable resource usage.
52  std::unique_ptr<ResourceTracker> resource_tracker;
53 
54  // The number of threads in the thread-pool used to load servables.
55  //
56  // If set as 0, we don't use a thread-pool, and LoadServable() blocks.
57  uint32 num_load_threads = 0;
58 
59  // The number of threads in the thread-pool used to unload servables.
60  //
61  // If set as 0, we don't use a thread-pool.
62  uint32 num_unload_threads = 0;
63 
64  // EventBus to publish servable state changes. This is optional, if unset,
65  // we don't publish.
66  EventBus<ServableState>* servable_event_bus = nullptr;
67 
68  // Maximum number of times we retry loading a servable, after the first
69  // failure, before we give up. If set to 0, a load is attempted only once.
70  uint32 max_num_load_retries = 5;
71 
72  // The interval, in microseconds, between each servable load retry. If set
73  // negative, we don't wait.
74  // Default: 1 minute.
75  int64_t load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
76 
77  // The environment to use for starting threads in the thread-pool.
78  Env* env = Env::Default();
79  };
80 
83  class LoaderFactory {
84  public:
85  virtual ~LoaderFactory() = default;
86 
91  const ServableId& servable_id) = 0;
92 
95  virtual int64_t GetServableVersion(
96  const string& servable_name,
97  ServableRequest::AutoVersionPolicy policy) const = 0;
98  };
99 
100  static Status Create(Options options,
101  std::unique_ptr<LoaderFactory> loader_factory,
102  std::unique_ptr<CachingManager>* caching_manager);
103 
104  ~CachingManager() override;
105 
106  std::map<ServableId, std::unique_ptr<UntypedServableHandle>>
107  GetAvailableUntypedServableHandles() const override;
108 
109  std::vector<ServableId> ListAvailableServableIds() const override;
110 
111  private:
113 
114  CachingManager(std::unique_ptr<LoaderFactory> loader_factory,
115  std::unique_ptr<BasicManager> basic_manager);
116 
117  // Returns the untyped handle for the servable request.
118  //
119  // Semantics related to a ServableRequest for "latest":
120  // The manager forwards the "latest" request to the loader-factory, which
121  // emits its notion of the "latest" version. This is then managed and loaded
122  // by the manager, if not already available, and a handle to it is returned.
123  Status GetUntypedServableHandle(
124  const ServableRequest& request,
125  std::unique_ptr<UntypedServableHandle>* handle) override;
126 
127  // Returns the untyped handle for a servable-id.
128  Status GetUntypedServableHandleForId(
129  const ServableId& servable_id,
130  std::unique_ptr<UntypedServableHandle>* handle);
131 
132  // Transfer the given servable to 'basic_manager_', and ask it to load it. For
133  // multiple concurrent requests for the same servable-id, enforces that
134  // exactly one thread performs the load operation using the wrapped
135  // basic-manager. All other requests block until the load completes and then
136  // trivially succeed.
137  Status LoadServable(ServableData<std::unique_ptr<Loader>> loader_data)
138  TF_LOCKS_EXCLUDED(load_mutex_map_mu_);
139 
140  // Returns the size of the load_mutex_map_.
141  int64_t GetLoadMutexMapSize() const TF_LOCKS_EXCLUDED(load_mutex_map_mu_);
142 
143  // Erases the entry from the map corresponding to the servable-id if there is
144  // only one remaining reference to the mutex.
145  void MaybeEraseLoadMutexMapEntry(const ServableId& servable_id);
146 
147  std::unique_ptr<LoaderFactory> loader_factory_;
148 
149  std::unique_ptr<BasicManager> basic_manager_;
150 
151  // Used to protect access to the load_mutex_map_.
152  mutable mutex load_mutex_map_mu_;
153 
154  // Map of servable-id to a mutex, which is required to synchronize calls to
155  // load the servable using the wrapped basic-manager. The value in the map is
156  // a shared_ptr to allow for reference counting and consequent garbage
157  // collection.
158  std::map<ServableId, std::shared_ptr<mutex>> load_mutex_map_
159  TF_GUARDED_BY(load_mutex_map_mu_);
160 
161  TF_DISALLOW_COPY_AND_ASSIGN(CachingManager);
162 };
163 
167 class PathPrefixLoaderFactory : public CachingManager::LoaderFactory {
168  public:
169  PathPrefixLoaderFactory(const string& path_prefix,
170  std::unique_ptr<StoragePathSourceAdapter> adapter);
171  ~PathPrefixLoaderFactory() override = default;
172 
174  const ServableId& id) override;
175 
176  int64_t GetServableVersion(
177  const string& servable_name,
178  ServableRequest::AutoVersionPolicy policy) const override;
179 
180  private:
181  // The prefix of the path to the servables.
182  const string path_prefix_;
183 
184  // An adapter for creating a loader from a given path.
185  const std::unique_ptr<StoragePathSourceAdapter> adapter_;
186 
187  TF_DISALLOW_COPY_AND_ASSIGN(PathPrefixLoaderFactory);
188 };
189 
190 } // namespace serving
191 } // namespace tensorflow
192 
193 #endif // TENSORFLOW_SERVING_CORE_CACHING_MANAGER_H_
virtual ServableData< std::unique_ptr< Loader > > CreateLoader(const ServableId &servable_id)=0
virtual int64_t GetServableVersion(const string &servable_name, ServableRequest::AutoVersionPolicy policy) const =0
std::vector< ServableId > ListAvailableServableIds() const override