TensorFlow Serving C++ API Documentation
gzip_zlib.h
1 /* Copyright 2018 Google Inc. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7  http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_SERVING_UTIL_NET_HTTP_COMPRESSION_GZIP_ZLIB_H_
17 #define TENSORFLOW_SERVING_UTIL_NET_HTTP_COMPRESSION_GZIP_ZLIB_H_
18 
19 #include <zlib.h>
20 
21 #include <cstdint>
22 
23 namespace tensorflow {
24 namespace serving {
25 namespace net_http {
26 
27 class GZipHeader {
28  public:
29  GZipHeader() { Reset(); }
30  ~GZipHeader() {}
31 
32  // Wipe the slate clean and start from scratch.
33  void Reset() {
34  state_ = IN_HEADER_ID1;
35  flags_ = 0;
36  extra_length_ = 0;
37  }
38 
39  enum Status {
40  INCOMPLETE_HEADER,
41  COMPLETE_HEADER,
42  INVALID_HEADER,
43  };
44 
45  // If the bytes we've seen so far do not yet constitute a complete gzip
46  // header, return INCOMPLETE_HEADER. If these bytes do not constitute a valid
47  // gzip header, return INVALID_HEADER. When we've seen a complete
48  // gzip header, return COMPLETE_HEADER and set the pointer pointed
49  // to by header_end to the first byte beyond the gzip header.
50  Status ReadMore(const char *inbuf, int inbuf_len, const char **header_end);
51 
52  private:
53  enum { // flags (see RFC)
54  FLAG_FTEXT = 0x01, // bit 0 set: file probably ascii text
55  FLAG_FHCRC = 0x02, // bit 1 set: header CRC present
56  FLAG_FEXTRA = 0x04, // bit 2 set: extra field present
57  FLAG_FNAME = 0x08, // bit 3 set: original file name present
58  FLAG_FCOMMENT = 0x10, // bit 4 set: file comment present
59  FLAG_RESERVED = 0xE0, // bits 5..7: reserved
60  };
61 
62  enum State {
63  // The first 10 bytes are the fixed-size header:
64  IN_HEADER_ID1,
65  IN_HEADER_ID2,
66  IN_HEADER_CM,
67  IN_HEADER_FLG,
68  IN_HEADER_MTIME_BYTE_0,
69  IN_HEADER_MTIME_BYTE_1,
70  IN_HEADER_MTIME_BYTE_2,
71  IN_HEADER_MTIME_BYTE_3,
72  IN_HEADER_XFL,
73  IN_HEADER_OS,
74 
75  IN_XLEN_BYTE_0,
76  IN_XLEN_BYTE_1,
77  IN_FEXTRA,
78 
79  IN_FNAME,
80 
81  IN_FCOMMENT,
82 
83  IN_FHCRC_BYTE_0,
84  IN_FHCRC_BYTE_1,
85 
86  IN_DONE,
87  };
88 
89  int state_; // our current State in the parsing FSM: an int so we can ++
90  uint8_t flags_; // the flags byte of the header ("FLG" in the RFC)
91  uint16_t extra_length_; // how much of the "extra field" we have yet to read
92 };
93 
94 class ZLib {
95  public:
96  ZLib();
97  ~ZLib();
98 
99  // The max length of the buffer to store uncompressed data
100  static constexpr int64_t kMaxUncompressedBytes = 100 * 1024 * 1024; // 100MB
101 
102  // Wipe a ZLib object to a virgin state. This differs from Reset()
103  // in that it also breaks any dictionary, gzip, etc, state.
104  void Reinit();
105 
106  // Call this to make a zlib buffer as good as new. Here's the only
107  // case where they differ:
108  // CompressChunk(a); CompressChunk(b); CompressChunkDone(); vs
109  // CompressChunk(a); Reset(); CompressChunk(b); CompressChunkDone();
110  // You'll want to use Reset(), then, when you interrupt a compress
111  // (or uncompress) in the middle of a chunk and want to start over.
112  void Reset();
113 
114  // By default UncompressAtMostOrAll will return Z_OK upon hitting the end of
115  // the input stream. This function modifies that behavior by returning
116  // Z_STREAM_END instead. This is useful when getting multiple compressed
117  // documents in a single stream. Returning Z_STREAM_END will indicate the end
118  // of a document.
119  void SetDontHideStreamEnd();
120 
121  // Sets the compression level to be used
122  void SetCompressionLevel(int level) { settings_.compression_level_ = level; }
123 
124  // Sets the size of the window (history buffer) used by the compressor.
125  // The size is expressed in bits (log base 2 of the desired size).
126  void SetCompressionWindowSizeInBits(int bits) {
127  settings_.window_bits_ = bits;
128  }
129 
130  // Controls the amount of memory used by the compresser.
131  // Legal value are 1 through 9. See zlib.h for more info.
132  void SetCompressionMemLevel(int level) { settings_.mem_level_ = level; }
133 
134  // According to the zlib manual, when you Compress, the destination
135  // buffer must have size at least src + .1%*src + 12. This function
136  // helps you calculate that. Augment this to account for a potential
137  // gzip header and footer, plus a few bytes of slack.
138  static uLong MinCompressbufSize(uLong uncompress_size) {
139  return uncompress_size + uncompress_size / 1000 + 40;
140  }
141 
142  // The minimum size of footers written by CompressChunkDone().
143  int MinFooterSize() const;
144 
145  // Compresses the source buffer into the destination buffer.
146  // sourceLen is the byte length of the source buffer.
147  // Upon entry, destLen is the total size of the destination buffer,
148  // which must be of size at least MinCompressbufSize(sourceLen).
149  // Upon exit, destLen is the actual size of the compressed buffer.
150  //
151  // This function can be used to compress a whole file at once if the
152  // input file is mmap'ed.
153  //
154  // Returns Z_OK if success, Z_MEM_ERROR if there was not
155  // enough memory, Z_BUF_ERROR if there was not enough room in the
156  // output buffer. Note that if the output buffer is exactly the same
157  // size as the compressed result, we still return Z_BUF_ERROR.
158  // (check CL#1936076)
159  //
160  // If the values of *destLen or sourceLen do not fit in an unsigned int,
161  // Z_BUF_ERROR is returned.
162  int Compress(Bytef *dest, uLongf *destLen, const Bytef *source,
163  uLong sourceLen);
164 
165  // Uncompresses the source buffer into the destination buffer.
166  // The destination buffer must be long enough to hold the entire
167  // decompressed contents.
168  //
169  // Returns Z_OK on success, otherwise, it returns a zlib error code.
170  //
171  // If the values of *destLen or sourceLen do not fit in an unsigned int,
172  // Z_BUF_ERROR is returned.
173  int Uncompress(Bytef *dest, uLongf *destLen, const Bytef *source,
174  uLong sourceLen);
175 
176  // Get the uncompressed size from the gzip footer. Returns 0 if source is too
177  // short (len < 5).
178  uLongf GzipUncompressedLength(const Bytef *source, uLong len);
179 
180  // Special helper function to help uncompress gzipped documents:
181  // We'll allocate (via std::allocator) a destination buffer exactly big
182  // enough to hold the gzipped content. We set dest and destLen.
183  // If we don't return Z_OK, *dest will be NULL, otherwise you
184  // should free() it when you're done with it.
185  // Returns Z_OK on success, otherwise, it returns a zlib error code.
186  // Its the responsibility of the user to set *destLen to the
187  // expected maximum size of the uncompressed data. The size of the
188  // uncompressed data is read from the compressed buffer gzip footer.
189  // This value cannot be trusted, so we compare it to the expected
190  // maximum size supplied by the user, returning Z_MEM_ERROR if its
191  // greater than the expected maximum size.
192  int UncompressGzipAndAllocate(Bytef **dest, uLongf *destLen,
193  const Bytef *source, uLong sourceLen);
194 
195  // Streaming compression and decompression methods.
196  // {Unc,C}ompressAtMost() decrements sourceLen by the amount of data that was
197  // consumed: if it returns Z_BUF_ERROR, set the source of the next
198  // {Unc,C}ompressAtMost() to the unconsumed data.
199 
200  // Compresses data one chunk at a time -- ie you can call this more
201  // than once. This is useful for a webserver, for instance, which
202  // might want to use chunked encoding with compression. To get this
203  // to work you need to call start and finish routines.
204  //
205  // Returns Z_OK if success, Z_MEM_ERROR if there was not
206  // enough memory, Z_BUF_ERROR if there was not enough room in the
207  // output buffer.
208 
209  int CompressAtMost(Bytef *dest, uLongf *destLen, const Bytef *source,
210  uLong *sourceLen);
211 
212  // Emits gzip footer information, as needed.
213  // destLen should be at least MinFooterSize() long.
214  // Returns Z_OK, Z_MEM_ERROR, and Z_BUF_ERROR as in CompressChunk().
215  int CompressChunkDone(Bytef *dest, uLongf *destLen);
216 
217  // Uncompress data one chunk at a time -- ie you can call this
218  // more than once. To get this to work you need to call per-chunk
219  // and "done" routines.
220  //
221  // Returns Z_OK if success, Z_MEM_ERROR if there was not
222  // enough memory, Z_BUF_ERROR if there was not enough room in the
223  // output buffer.
224 
225  int UncompressAtMost(Bytef *dest, uLongf *destLen, const Bytef *source,
226  uLong *sourceLen);
227 
228  // Checks gzip footer information, as needed. Mostly this just
229  // makes sure the checksums match. Whenever you call this, it
230  // will assume the last 8 bytes from the previous UncompressChunk
231  // call are the footer. Returns true iff everything looks ok.
232  bool UncompressChunkDone();
233 
234  // Only meaningful for chunked compressing/uncompressing. It's true
235  // after initialization or reset and before the first chunk of
236  // user data is received.
237  bool first_chunk() const { return first_chunk_; }
238 
239  // Convenience method to check if a bytestream has a header. This
240  // is intended as a quick test: "Is this likely a GZip file?"
241  static bool HasGzipHeader(const char *source, int sourceLen);
242 
243  // Have we parsed the complete gzip footer? When this result is true, it is
244  // time to call IsGzipFooterValid() / UncompressChunkDone().
245  bool IsGzipFooterComplete() const;
246 
247  // Have we parsed the complete gzip footer, and does it match the
248  // length and CRC checksum of the content that we have uncompressed
249  // so far?
250  bool IsGzipFooterValid() const;
251 
252  // Accessor for the uncompressed size
253  uLong uncompressed_size() const { return uncompressed_size_; }
254 
255  private:
256  int InflateInit(); // sets up the zlib inflate structure
257  int DeflateInit(); // sets up the zlib deflate structure
258 
259  // These init the zlib data structures for compressing/uncompressing
260  int CompressInit(Bytef *dest, uLongf *destLen, const Bytef *source,
261  uLong *sourceLen);
262  int UncompressInit(Bytef *dest, uLongf *destLen, const Bytef *source,
263  uLong *sourceLen);
264  // Initialization method to be called if we hit an error while
265  // uncompressing. On hitting an error, call this method before
266  // returning the error.
267  void UncompressErrorInit();
268  // Helper functions to write gzip-specific data
269  int WriteGzipHeader();
270  int WriteGzipFooter(Bytef *dest, uLongf destLen);
271 
272  // Helper function for both Compress and CompressChunk
273  int CompressChunkOrAll(Bytef *dest, uLongf *destLen, const Bytef *source,
274  uLong sourceLen, int flush_mode);
275  int CompressAtMostOrAll(Bytef *dest, uLongf *destLen, const Bytef *source,
276  uLong *sourceLen, int flush_mode);
277 
278  // Likewise for UncompressAndUncompressChunk
279  int UncompressChunkOrAll(Bytef *dest, uLongf *destLen, const Bytef *source,
280  uLong sourceLen, int flush_mode);
281 
282  int UncompressAtMostOrAll(Bytef *dest, uLongf *destLen, const Bytef *source,
283  uLong *sourceLen, int flush_mode);
284 
285  // Initialization method to be called if we hit an error while
286  // compressing. On hitting an error, call this method before
287  // returning the error.
288  void CompressErrorInit();
289 
290  struct Settings {
291  // compression level
292  int compression_level_;
293 
294  // log base 2 of the window size used in compression
295  int window_bits_;
296 
297  // specifies the amount of memory to be used by compressor (1-9)
298  int mem_level_;
299 
300  // Controls behavior of UncompressAtMostOrAll with regards to returning
301  // Z_STREAM_END. See comments for SetDontHideStreamEnd.
302  bool dont_hide_zstream_end_;
303  };
304 
305  // "Current" settings. These will be used whenever we next configure zlib.
306  // For example changing compression level or header mode will be recorded
307  // in these, but don't usually get applied immediately but on next compress.
308  Settings settings_;
309 
310  // Settings last used to initialise and configure zlib. These are needed
311  // to know if the current desired configuration in settings_ is sufficiently
312  // compatible with the previous configuration and we can just reconfigure the
313  // underlying zlib objects, or have to recreate them from scratch.
314  Settings init_settings_;
315 
316  z_stream comp_stream_; // Zlib stream data structure
317  bool comp_init_; // True if we have initialized comp_stream_
318  z_stream uncomp_stream_; // Zlib stream data structure
319  bool uncomp_init_; // True if we have initialized uncomp_stream_
320 
321  // These are used only in gzip compression mode
322  uLong crc_; // stored in gzip footer, fitting 4 bytes
323  uLong uncompressed_size_;
324 
325  GZipHeader *gzip_header_; // our gzip header state
326 
327  Byte gzip_footer_[8]; // stored footer, used to uncompress
328  int gzip_footer_bytes_; // num of footer bytes read so far, or -1
329 
330  // These are used only with chunked compression.
331  bool first_chunk_; // true if we need to emit headers with this chunk
332 };
333 
334 } // namespace net_http
335 } // namespace serving
336 } // namespace tensorflow
337 
338 #endif // TENSORFLOW_SERVING_UTIL_NET_HTTP_COMPRESSION_GZIP_ZLIB_H_