TensorFlow Serving C++ API Documentation
gzip_zlib.cc
1 /* Copyright 2018 Google Inc. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7  http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // zlib based C++ wrapper to support gzip compression/uncompression
17 
18 #include "tensorflow_serving/util/net_http/compression/gzip_zlib.h"
19 
20 #include <algorithm>
21 #include <cassert>
22 #include <cstring>
23 #include <memory>
24 
25 #include "absl/base/casts.h"
26 #include "absl/base/macros.h"
27 #include "tensorflow_serving/util/net_http/internal/net_logging.h"
28 
29 namespace tensorflow {
30 namespace serving {
31 namespace net_http {
32 
33 // TODO(wenboz): disable setting change, free-list (no setting change)
34 
35 // The GZIP header (see RFC 1952):
36 // +---+---+---+---+---+---+---+---+---+---+
37 // |ID1|ID2|CM |FLG| MTIME |XFL|OS |
38 // +---+---+---+---+---+---+---+---+---+---+
39 // ID1 \037
40 // ID2 \213
41 // CM \010 (compression method == DEFLATE)
42 // FLG \000 (special flags that we do not support)
43 // MTIME Unix format modification time (0 means not available)
44 // XFL 2-4? DEFLATE flags
45 // OS ???? Operating system indicator (255 means unknown)
46 
47 constexpr char GZIP_HEADER[] = "\037\213\010\000\000\000\000\000\002\377";
48 constexpr uint8_t kMagicHeader[2] = {0x1f, 0x8b}; // gzip magic header
49 
50 GZipHeader::Status GZipHeader::ReadMore(const char *inbuf, int inbuf_len,
51  const char **header_end) {
52  auto pos = reinterpret_cast<const uint8_t *>(inbuf);
53  const uint8_t *const end = pos + inbuf_len;
54 
55  while (pos < end) {
56  switch (state_) {
57  case IN_HEADER_ID1:
58  if (*pos != kMagicHeader[0]) return INVALID_HEADER;
59  pos++;
60  state_++;
61  break;
62  case IN_HEADER_ID2:
63  if (*pos != kMagicHeader[1]) return INVALID_HEADER;
64  pos++;
65  state_++;
66  break;
67  case IN_HEADER_CM:
68  if (*pos != Z_DEFLATED) return INVALID_HEADER;
69  pos++;
70  state_++;
71  break;
72  case IN_HEADER_FLG:
73  flags_ =
74  (*pos) & (FLAG_FHCRC | FLAG_FEXTRA | FLAG_FNAME | FLAG_FCOMMENT);
75  pos++;
76  state_++;
77  break;
78 
79  case IN_HEADER_MTIME_BYTE_0:
80  pos++;
81  state_++;
82  break;
83  case IN_HEADER_MTIME_BYTE_1:
84  pos++;
85  state_++;
86  break;
87  case IN_HEADER_MTIME_BYTE_2:
88  pos++;
89  state_++;
90  break;
91  case IN_HEADER_MTIME_BYTE_3:
92  pos++;
93  state_++;
94  break;
95 
96  case IN_HEADER_XFL:
97  pos++;
98  state_++;
99  break;
100 
101  case IN_HEADER_OS:
102  pos++;
103  state_++;
104  break;
105 
106  case IN_XLEN_BYTE_0:
107  if (!(flags_ & FLAG_FEXTRA)) {
108  state_ = IN_FNAME;
109  break;
110  }
111  // We have a two-byte little-endian length, followed by a
112  // field of that length.
113  extra_length_ = *pos;
114  pos++;
115  state_++;
116  break;
117  case IN_XLEN_BYTE_1:
118  extra_length_ += (*pos) << 8;
119  pos++;
120  state_++;
121  ABSL_FALLTHROUGH_INTENDED;
122  // if we have a zero-length FEXTRA, we want to check
123  // to notice that we're done reading the FEXTRA before we exit the loop.
124 
125  case IN_FEXTRA: {
126  // Grab the rest of the bytes in the extra field, or as many
127  // of them as are actually present so far.
128  const int num_extra_bytes =
129  std::min<int>(extra_length_, absl::implicit_cast<int>(end - pos));
130  pos += num_extra_bytes;
131  extra_length_ -= num_extra_bytes;
132  if (extra_length_ == 0) {
133  state_ = IN_FNAME; // advance when we've seen extra_length_ bytes
134  flags_ &= ~FLAG_FEXTRA; // we're done with the FEXTRA stuff
135  }
136  break;
137  }
138 
139  case IN_FNAME:
140  if (!(flags_ & FLAG_FNAME)) {
141  state_ = IN_FCOMMENT;
142  break;
143  }
144  // See if we can find the end of the \0-terminated FNAME field.
145  pos = reinterpret_cast<const uint8_t *>(memchr(pos, '\0', (end - pos)));
146  if (pos != nullptr) {
147  pos++; // advance past the '\0'
148  flags_ &= ~FLAG_FNAME; // we're done with the FNAME stuff
149  state_ = IN_FCOMMENT;
150  } else {
151  pos = end; // everything we have so far is part of the FNAME
152  }
153  break;
154 
155  case IN_FCOMMENT:
156  if (!(flags_ & FLAG_FCOMMENT)) {
157  state_ = IN_FHCRC_BYTE_0;
158  break;
159  }
160  // See if we can find the end of the \0-terminated FCOMMENT field.
161  pos = reinterpret_cast<const uint8_t *>(memchr(pos, '\0', (end - pos)));
162  if (pos != nullptr) {
163  pos++; // advance past the '\0'
164  flags_ &= ~FLAG_FCOMMENT; // we're done with the FCOMMENT stuff
165  state_ = IN_FHCRC_BYTE_0;
166  } else {
167  pos = end; // everything we have so far is part of the FNAME
168  }
169  break;
170 
171  case IN_FHCRC_BYTE_0:
172  if (!(flags_ & FLAG_FHCRC)) {
173  state_ = IN_DONE;
174  break;
175  }
176  pos++;
177  state_++;
178  break;
179 
180  case IN_FHCRC_BYTE_1:
181  pos++;
182  flags_ &= ~FLAG_FHCRC; // we're done with the FHCRC stuff
183  state_++;
184  break;
185 
186  case IN_DONE:
187  *header_end = reinterpret_cast<const char *>(pos);
188  return COMPLETE_HEADER;
189 
190  default:
191  break;
192  }
193  }
194 
195  if ((state_ > IN_HEADER_OS) && (flags_ == 0)) {
196  *header_end = reinterpret_cast<const char *>(pos);
197  return COMPLETE_HEADER;
198  } else {
199  return INCOMPLETE_HEADER;
200  }
201 }
202 
203 ZLib::ZLib()
204  : comp_init_(false), uncomp_init_(false), gzip_header_(new GZipHeader) {
205  Reinit();
206  init_settings_ = settings_;
207 }
208 
209 ZLib::~ZLib() {
210  if (comp_init_) {
211  deflateEnd(&comp_stream_);
212  }
213  if (uncomp_init_) {
214  inflateEnd(&uncomp_stream_);
215  }
216  delete gzip_header_;
217 }
218 
219 void ZLib::Reinit() {
220  settings_.compression_level_ = Z_DEFAULT_COMPRESSION;
221  settings_.window_bits_ = MAX_WBITS;
222  settings_.mem_level_ = 8; // DEF_MEM_LEVEL
223  settings_.dont_hide_zstream_end_ = false;
224 
225  if (comp_init_) {
226  int err = deflateReset(&comp_stream_);
227  if (err != Z_OK) {
228  deflateEnd(&comp_stream_);
229  comp_init_ = false;
230  }
231  }
232  if (uncomp_init_) {
233  // Use negative window bits size to indicate bare stream with no header.
234  int wbits = -MAX_WBITS;
235  int err = inflateReset2(&uncomp_stream_, wbits);
236  if (err != Z_OK) {
237  inflateEnd(&uncomp_stream_);
238  uncomp_init_ = false;
239  }
240  }
241  crc_ = 0;
242  uncompressed_size_ = 0;
243  gzip_header_->Reset();
244  gzip_footer_bytes_ = -1;
245  first_chunk_ = true;
246 }
247 
248 void ZLib::Reset() {
249  first_chunk_ = true;
250  gzip_header_->Reset();
251 }
252 
253 void ZLib::SetDontHideStreamEnd() { settings_.dont_hide_zstream_end_ = true; }
254 
255 int ZLib::MinFooterSize() const {
256  int min_footer_size = 2; // Room for empty chunk.
257  min_footer_size += 8; // Room for actual footer for gzip
258  return min_footer_size;
259 }
260 
261 // --------- COMPRESS MODE
262 
263 // Initialization method to be called if we hit an error while
264 // compressing. On hitting an error, call this method before returning
265 // the error.
266 void ZLib::CompressErrorInit() {
267  if (comp_init_) {
268  deflateEnd(&comp_stream_);
269  comp_init_ = false;
270  }
271  Reset();
272 }
273 
274 // These probably return Z_OK, but may return Z_BUF_ERROR if outbuf is full
275 int ZLib::WriteGzipHeader() {
276  if (comp_stream_.avail_out < sizeof(GZIP_HEADER)) return Z_BUF_ERROR;
277  memcpy(comp_stream_.next_out, GZIP_HEADER, sizeof(GZIP_HEADER) - 1);
278  comp_stream_.next_out += sizeof(GZIP_HEADER) - 1;
279  comp_stream_.avail_out -= sizeof(GZIP_HEADER) - 1;
280  return Z_OK;
281 }
282 
283 int ZLib::WriteGzipFooter(Bytef *dest, uLongf destLen) {
284  if (destLen < 8) // not enough space for footer
285  return Z_BUF_ERROR;
286  *dest++ = (crc_ >> 0) & 255;
287  *dest++ = (crc_ >> 8) & 255;
288  *dest++ = (crc_ >> 16) & 255;
289  *dest++ = (crc_ >> 24) & 255;
290  *dest++ = (uncompressed_size_ >> 0) & 255;
291  *dest++ = (uncompressed_size_ >> 8) & 255;
292  *dest++ = (uncompressed_size_ >> 16) & 255;
293  *dest++ = (uncompressed_size_ >> 24) & 255;
294  return Z_OK;
295 }
296 
297 int ZLib::DeflateInit() {
298  int err = deflateInit2(&comp_stream_, settings_.compression_level_,
299  Z_DEFLATED, -settings_.window_bits_,
300  settings_.mem_level_, Z_DEFAULT_STRATEGY);
301  if (err == Z_OK) {
302  // Save parameters for later reusability checks
303  init_settings_.compression_level_ = settings_.compression_level_;
304  init_settings_.window_bits_ = settings_.window_bits_;
305  init_settings_.mem_level_ = settings_.mem_level_;
306  }
307  return err;
308 }
309 
310 int ZLib::CompressInit(Bytef *dest, uLongf *destLen, const Bytef *source,
311  uLong *sourceLen) {
312  int err;
313 
314  comp_stream_.next_in = (Bytef *)source;
315  comp_stream_.avail_in = (uInt)*sourceLen;
316  // Check for sourceLen (unsigned long) to fit into avail_in (unsigned int).
317  if ((uLong)comp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR;
318  comp_stream_.next_out = dest;
319  comp_stream_.avail_out = (uInt)*destLen;
320  // Check for destLen (unsigned long) to fit into avail_out (unsigned int).
321  if ((uLong)comp_stream_.avail_out != *destLen) return Z_BUF_ERROR;
322 
323  if (!first_chunk_) // only need to set up stream the first time through
324  return Z_OK;
325 
326  // Force full reinit if properties have changed in a way we can't adjust.
327  if (comp_init_ && (init_settings_.window_bits_ != settings_.window_bits_ ||
328  init_settings_.mem_level_ != settings_.mem_level_)) {
329  deflateEnd(&comp_stream_);
330  comp_init_ = false;
331  }
332 
333  // Reuse if we've already initted the object.
334  if (comp_init_) { // we've already initted it
335  err = deflateReset(&comp_stream_);
336  if (err != Z_OK) {
337  deflateEnd(&comp_stream_);
338  comp_init_ = false;
339  }
340  }
341 
342  // If compression level has changed, try to reconfigure instead of reinit
343  if (comp_init_ &&
344  init_settings_.compression_level_ != settings_.compression_level_) {
345  err = deflateParams(&comp_stream_, settings_.compression_level_,
346  Z_DEFAULT_STRATEGY);
347  if (err == Z_OK) {
348  init_settings_.compression_level_ = settings_.compression_level_;
349  } else {
350  deflateEnd(&comp_stream_);
351  comp_init_ = false;
352  }
353  }
354 
355  // First use or previous state was not reusable with current settings.
356  if (!comp_init_) {
357  comp_stream_.zalloc = (alloc_func)0;
358  comp_stream_.zfree = (free_func)0;
359  comp_stream_.opaque = (voidpf)0;
360  err = DeflateInit();
361  if (err != Z_OK) return err;
362  comp_init_ = true;
363  }
364  return Z_OK;
365 }
366 
367 // Supports chunked compression, using the chunked compression features of zlib.
368 int ZLib::CompressAtMostOrAll(Bytef *dest, uLongf *destLen, const Bytef *source,
369  uLong *sourceLen,
370  int flush_mode) { // Z_FULL_FLUSH or Z_FINISH
371  int err;
372 
373  if ((err = CompressInit(dest, destLen, source, sourceLen)) != Z_OK)
374  return err;
375 
376  // This is used to figure out how many bytes we wrote *this chunk*
377  uint64_t compressed_size = comp_stream_.total_out;
378 
379  // Some setup happens only for the first chunk we compress in a run
380  if (first_chunk_) {
381  if ((err = WriteGzipHeader()) != Z_OK) return err;
382  compressed_size -= sizeof(GZIP_HEADER) - 1; // -= is right: adds to size
383  crc_ = crc32(0, nullptr, 0); // initialize
384 
385  uncompressed_size_ = 0;
386  first_chunk_ = false; // so we don't do this again
387  }
388 
389  // flush_mode is Z_FINISH for all mode, Z_SYNC_FLUSH for incremental
390  // compression.
391  err = deflate(&comp_stream_, flush_mode);
392 
393  const uLong source_bytes_consumed = *sourceLen - comp_stream_.avail_in;
394  *sourceLen = comp_stream_.avail_in;
395 
396  if ((err == Z_STREAM_END || err == Z_OK) && comp_stream_.avail_in == 0 &&
397  comp_stream_.avail_out != 0) {
398  // we processed everything ok and the output buffer was large enough.
399  } else if (err == Z_STREAM_END && comp_stream_.avail_in > 0) {
400  return Z_BUF_ERROR; // should never happen
401  } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
402  // an error happened
403  CompressErrorInit();
404  return err;
405  } else if (comp_stream_.avail_out == 0) { // not enough space
406  err = Z_BUF_ERROR;
407  }
408 
409  assert(err == Z_OK || err == Z_STREAM_END || err == Z_BUF_ERROR);
410  if (err == Z_STREAM_END) err = Z_OK;
411 
412  // update the crc and other metadata
413  uncompressed_size_ += source_bytes_consumed;
414  compressed_size = comp_stream_.total_out - compressed_size; // delta
415  *destLen = compressed_size;
416 
417  crc_ = crc32(crc_, source, source_bytes_consumed);
418 
419  return err;
420 }
421 
422 int ZLib::CompressChunkOrAll(Bytef *dest, uLongf *destLen, const Bytef *source,
423  uLong sourceLen,
424  int flush_mode) { // Z_FULL_FLUSH or Z_FINISH
425  const int ret =
426  CompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode);
427  if (ret == Z_BUF_ERROR) CompressErrorInit();
428  return ret;
429 }
430 
431 int ZLib::CompressAtMost(Bytef *dest, uLongf *destLen, const Bytef *source,
432  uLong *sourceLen) {
433  return CompressAtMostOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH);
434 }
435 
436 // This writes the gzip footer info, if necessary.
437 // No matter what, we call Reset() so we can compress Chunks again.
438 int ZLib::CompressChunkDone(Bytef *dest, uLongf *destLen) {
439  // Make sure our buffer is of reasonable size.
440  if (*destLen < static_cast<uLongf>(MinFooterSize())) {
441  *destLen = 0;
442  return Z_BUF_ERROR;
443  }
444 
445  // The underlying zlib library requires a non-NULL source pointer, even if the
446  // source length is zero, otherwise it will generate an (incorrect) zero-
447  // valued CRC checksum.
448  char dummy = '\0';
449  int err;
450 
451  assert(!first_chunk_ && comp_init_);
452 
453  const uLongf orig_destLen = *destLen;
454  if ((err = CompressChunkOrAll(dest, destLen, (const Bytef *)&dummy, 0,
455  Z_FINISH)) != Z_OK) {
456  Reset(); // we assume they won't retry on error
457  return err;
458  }
459 
460  // Make sure that when we exit, we can start a new round of chunks later
461  // (This must be set after the call to CompressChunkOrAll() above.)
462  Reset();
463 
464  // Write gzip footer. They're explicitly in little-endian order
465  if ((err = WriteGzipFooter(dest + *destLen, orig_destLen - *destLen)) != Z_OK)
466  return err;
467  *destLen += 8; // zlib footer took up another 8 bytes
468 
469  return Z_OK; // stream_end is ok
470 }
471 
472 // This routine only initializes the compression stream once. Thereafter, it
473 // just does a deflateReset on the stream, which should be faster.
474 int ZLib::Compress(Bytef *dest, uLongf *destLen, const Bytef *source,
475  uLong sourceLen) {
476  int err;
477  const uLongf orig_destLen = *destLen;
478  if ((err = CompressChunkOrAll(dest, destLen, source, sourceLen, Z_FINISH)) !=
479  Z_OK)
480  return err;
481  Reset(); // reset for next call to Compress
482 
483  if ((err = WriteGzipFooter(dest + *destLen, orig_destLen - *destLen)) != Z_OK)
484  return err;
485  *destLen += 8; // zlib footer took up another 8 bytes
486 
487  return Z_OK;
488 }
489 
490 // --------- UNCOMPRESS MODE
491 
492 int ZLib::InflateInit() {
493  // Use negative window bits size to indicate bare stream with no header.
494  int wbits = (-MAX_WBITS);
495  int err = inflateInit2(&uncomp_stream_, wbits);
496  return err;
497 }
498 
499 // Initialization method to be called if we hit an error while
500 // uncompressing. On hitting an error, call this method before
501 // returning the error.
502 void ZLib::UncompressErrorInit() {
503  if (uncomp_init_) {
504  inflateEnd(&uncomp_stream_);
505  uncomp_init_ = false;
506  }
507  Reset();
508 }
509 
510 int ZLib::UncompressInit(Bytef *dest, uLongf *destLen, const Bytef *source,
511  uLong *sourceLen) {
512  int err;
513 
514  uncomp_stream_.next_in = (Bytef *)source;
515  uncomp_stream_.avail_in = (uInt)*sourceLen;
516  // Check for sourceLen (unsigned long) to fit into avail_in (unsigned int).
517  if ((uLong)uncomp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR;
518 
519  uncomp_stream_.next_out = dest;
520  uncomp_stream_.avail_out = (uInt)*destLen;
521  // Check for destLen (unsigned long) to fit into avail_out (unsigned int).
522  if ((uLong)uncomp_stream_.avail_out != *destLen) return Z_BUF_ERROR;
523 
524  if (!first_chunk_) // only need to set up stream the first time through
525  return Z_OK;
526 
527  // Reuse if we've already initted the object.
528  if (uncomp_init_) {
529  // Use negative window bits size to indicate bare stream with no header.
530  int wbits = -MAX_WBITS;
531  err = inflateReset2(&uncomp_stream_, wbits);
532  if (err != Z_OK) {
533  UncompressErrorInit();
534  }
535  }
536 
537  // First use or previous state was not reusable with current settings.
538  if (!uncomp_init_) {
539  uncomp_stream_.zalloc = (alloc_func)0;
540  uncomp_stream_.zfree = (free_func)0;
541  uncomp_stream_.opaque = (voidpf)0;
542  err = InflateInit();
543  if (err != Z_OK) return err;
544  uncomp_init_ = true;
545  }
546  return Z_OK;
547 }
548 
549 // If you compressed your data a chunk at a time, with CompressChunk,
550 // you can uncompress it a chunk at a time with UncompressChunk.
551 // Only difference between chunked and unchunked uncompression
552 // is the flush mode we use: Z_SYNC_FLUSH (chunked) or Z_FINISH (unchunked).
553 int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen,
554  const Bytef *source, uLong *sourceLen,
555  int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH
556  int err = Z_OK;
557 
558  if (first_chunk_) {
559  gzip_footer_bytes_ = -1;
560 
561  // If we haven't read our first chunk of actual compressed data,
562  // and we're expecting gzip headers, then parse some more bytes
563  // from the gzip headers.
564  const Bytef *bodyBegin = nullptr;
565  GZipHeader::Status status = gzip_header_->ReadMore(
566  reinterpret_cast<const char *>(source), *sourceLen,
567  reinterpret_cast<const char **>(&bodyBegin));
568  switch (status) {
569  case GZipHeader::INCOMPLETE_HEADER: // don't have the complete header
570  *destLen = 0;
571  *sourceLen = 0; // GZipHeader used all the input
572  return Z_OK;
573  case GZipHeader::INVALID_HEADER: // bogus header
574  Reset();
575  return Z_DATA_ERROR;
576  case GZipHeader::COMPLETE_HEADER: // we have the full header
577  *sourceLen -= (bodyBegin - source); // skip past header bytes
578  source = bodyBegin;
579  crc_ = crc32(0, nullptr, 0); // initialize CRC
580  break;
581  default:
582  NET_LOG(FATAL, "Unexpected gzip header parsing result: %d", status);
583  }
584  } else if (gzip_footer_bytes_ >= 0) {
585  // We're now just reading the gzip footer. We already read all the data.
586  if (gzip_footer_bytes_ + *sourceLen > sizeof(gzip_footer_)) {
587  Reset();
588  return Z_DATA_ERROR;
589  }
590  uLong len = sizeof(gzip_footer_) - gzip_footer_bytes_;
591  if (len > *sourceLen) len = *sourceLen;
592  if (len > 0) {
593  memcpy(gzip_footer_ + gzip_footer_bytes_, source, len);
594  gzip_footer_bytes_ += len;
595  }
596  *sourceLen -= len;
597  *destLen = 0;
598  return Z_OK;
599  }
600 
601  if ((err = UncompressInit(dest, destLen, source, sourceLen)) != Z_OK) {
602  NET_LOG(WARNING,
603  "UncompressInit: Error: %d "
604  " SourceLen: %zu",
605  err, *sourceLen);
606  return err;
607  }
608 
609  // This is used to figure out how many output bytes we wrote *this chunk*:
610  const uLong old_total_out = uncomp_stream_.total_out;
611 
612  // This is used to figure out how many input bytes we read *this chunk*:
613  const uLong old_total_in = uncomp_stream_.total_in;
614 
615  if (first_chunk_) {
616  first_chunk_ = false; // so we don't do this again
617 
618  // For the first chunk *only* (to avoid infinite troubles), we let
619  // there be no actual data to uncompress. This sometimes triggers
620  // when the input is only the gzip header.
621  if (*sourceLen == 0) {
622  *destLen = 0;
623  return Z_OK;
624  }
625  }
626 
627  // We'll uncompress as much as we can. If we end OK great, otherwise
628  // if we get an error that seems to be the gzip footer, we store the
629  // gzip footer and return OK, otherwise we return the error.
630 
631  // flush_mode is Z_SYNC_FLUSH for chunked mode, Z_FINISH for all mode.
632  err = inflate(&uncomp_stream_, flush_mode);
633 
634  // Figure out how many bytes of the input zlib slurped up:
635  const uLong bytes_read = uncomp_stream_.total_in - old_total_in;
636  assert((source + bytes_read) <= (source + *sourceLen));
637  *sourceLen = uncomp_stream_.avail_in;
638 
639  // Next we look at the footer, if any. Note that we might currently
640  // have just part of the footer (eg, if this data is arriving over a
641  // socket). After looking for a footer, log a warning if there is data.
642  if ((err == Z_STREAM_END) &&
643  ((gzip_footer_bytes_ == -1) ||
644  (static_cast<size_t>(gzip_footer_bytes_) < sizeof(gzip_footer_))) &&
645  (uncomp_stream_.avail_in <= sizeof(gzip_footer_))) {
646  // Store gzip footer bytes so we can check for footer consistency
647  // in UncompressChunkDone(). (If we have the whole footer, we
648  // could do the checking here, but we don't to keep consistency
649  // with CompressChunkDone().)
650  gzip_footer_bytes_ =
651  std::min(absl::implicit_cast<size_t>(uncomp_stream_.avail_in),
652  sizeof(gzip_footer_));
653  memcpy(gzip_footer_, source + bytes_read, gzip_footer_bytes_);
654  *sourceLen -= gzip_footer_bytes_;
655  } else if ((err == Z_STREAM_END || err == Z_OK) // everything went ok
656  && uncomp_stream_.avail_in == 0) { // and we read it all
657  } else if (err == Z_STREAM_END && uncomp_stream_.avail_in > 0) {
658  UncompressErrorInit();
659  return Z_DATA_ERROR;
660  } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
661  UncompressErrorInit();
662  return err;
663  } else if (uncomp_stream_.avail_out == 0) {
664  err = Z_BUF_ERROR;
665  }
666 
667  assert(err == Z_OK || err == Z_BUF_ERROR || err == Z_STREAM_END);
668  if (err == Z_STREAM_END && !settings_.dont_hide_zstream_end_) err = Z_OK;
669 
670  // update the crc and other metadata
671  uncompressed_size_ = uncomp_stream_.total_out;
672  *destLen = uncomp_stream_.total_out - old_total_out; // size for this call
673 
674  crc_ = crc32(crc_, dest, *destLen);
675 
676  return err;
677 }
678 
679 int ZLib::UncompressChunkOrAll(Bytef *dest, uLongf *destLen,
680  const Bytef *source, uLong sourceLen,
681  int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH
682  const int ret =
683  UncompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode);
684  if (ret == Z_BUF_ERROR) UncompressErrorInit();
685  return ret;
686 }
687 
688 int ZLib::UncompressAtMost(Bytef *dest, uLongf *destLen, const Bytef *source,
689  uLong *sourceLen) {
690  return UncompressAtMostOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH);
691 }
692 
693 // We make sure we've uncompressed everything, that is, the current
694 // uncompress stream is at a compressed-buffer-EOF boundary. In gzip
695 // mode, we also check the gzip footer to make sure we pass the gzip
696 // consistency checks. We RETURN true iff both types of checks pass.
697 bool ZLib::UncompressChunkDone() {
698  if (first_chunk_ || !uncomp_init_) {
699  return false;
700  }
701  // Make sure we're at the end-of-compressed-data point. This means
702  // if we call inflate with Z_FINISH we won't consume any input or
703  // write any output
704  Bytef dummyin, dummyout;
705  uLongf dummylen = 0;
706  if (UncompressChunkOrAll(&dummyout, &dummylen, &dummyin, 0, Z_FINISH) !=
707  Z_OK) {
708  return false;
709  }
710 
711  // Make sure that when we exit, we can start a new round of chunks later
712  Reset();
713 
714  // Whether we were hoping for a gzip footer or not, we allow a gzip
715  // footer. (See the note above about bugs in old zlibwrappers.) But
716  // by the time we've seen all the input, it has to be either a
717  // complete gzip footer, or no footer at all.
718  if ((gzip_footer_bytes_ != -1) && (gzip_footer_bytes_ != 0) &&
719  (static_cast<size_t>(gzip_footer_bytes_) != sizeof(gzip_footer_)))
720  return false;
721 
722  return IsGzipFooterValid();
723 }
724 
725 bool ZLib::IsGzipFooterComplete() const {
726  return gzip_footer_bytes_ != -1 &&
727  static_cast<size_t>(gzip_footer_bytes_) >= sizeof(gzip_footer_);
728 }
729 
730 bool ZLib::IsGzipFooterValid() const {
731  if (!IsGzipFooterComplete()) return false;
732 
733  // The footer holds the lower four bytes of the length.
734  uLong uncompressed_size = 0;
735  uncompressed_size += static_cast<uLong>(gzip_footer_[7]) << 24;
736  uncompressed_size += gzip_footer_[6] << 16;
737  uncompressed_size += gzip_footer_[5] << 8;
738  uncompressed_size += gzip_footer_[4] << 0;
739  if (uncompressed_size != (uncompressed_size_ & 0xffffffff)) {
740  return false;
741  }
742 
743  uLong checksum = 0;
744  checksum += static_cast<uLong>(gzip_footer_[3]) << 24;
745  checksum += gzip_footer_[2] << 16;
746  checksum += gzip_footer_[1] << 8;
747  checksum += gzip_footer_[0] << 0;
748  if (crc_ != checksum) return false;
749 
750  return true;
751 }
752 
753 // Uncompresses the source buffer into the destination buffer.
754 // The destination buffer must be long enough to hold the entire
755 // decompressed contents.
756 //
757 // We only initialize the uncomp_stream once. Thereafter, we use
758 // inflateReset2, which should be faster.
759 //
760 // Returns Z_OK on success, otherwise, it returns a zlib error code.
761 int ZLib::Uncompress(Bytef *dest, uLongf *destLen, const Bytef *source,
762  uLong sourceLen) {
763  int err;
764  if ((err = UncompressChunkOrAll(dest, destLen, source, sourceLen,
765  Z_FINISH)) != Z_OK) {
766  Reset(); // let us try to compress again
767  return err;
768  }
769  if (!UncompressChunkDone()) // calls Reset()
770  return Z_DATA_ERROR;
771  return Z_OK; // stream_end is ok
772 }
773 
774 // read uncompress length from gzip footer
775 uLongf ZLib::GzipUncompressedLength(const Bytef *source, uLong len) {
776  if (len <= 4) return 0; // malformed data.
777  return (static_cast<uLongf>(source[len - 1]) << 24) +
778  (static_cast<uLongf>(source[len - 2]) << 16) +
779  (static_cast<uLongf>(source[len - 3]) << 8) +
780  (static_cast<uLongf>(source[len - 4]) << 0);
781 }
782 
783 int ZLib::UncompressGzipAndAllocate(Bytef **dest, uLongf *destLen,
784  const Bytef *source, uLong sourceLen) {
785  *dest = nullptr; // until we successfully allocate
786 
787  uLongf uncompress_length = GzipUncompressedLength(source, sourceLen);
788 
789  // Do not trust the uncompress size reported by the compressed buffer.
790  if (uncompress_length > *destLen) {
791  if (!HasGzipHeader(reinterpret_cast<const char *>(source), sourceLen)) {
792  return Z_DATA_ERROR;
793  }
794  return Z_MEM_ERROR; // probably a corrupted gzip buffer
795  }
796 
797  *destLen = uncompress_length;
798 
799  *dest = std::allocator<Bytef>().allocate(*destLen);
800  if (*dest == nullptr) {
801  return Z_MEM_ERROR;
802  }
803 
804  const int retval = Uncompress(*dest, destLen, source, sourceLen);
805  if (retval != Z_OK) { // just to make life easier for them
806  std::allocator<Bytef>().deallocate(*dest, *destLen);
807  *dest = nullptr;
808  }
809  return retval;
810 }
811 
812 // Convenience method to check if a bytestream has a gzip header.
813 bool ZLib::HasGzipHeader(const char *source, int sourceLen) {
814  GZipHeader gzh;
815  const char *ptr = nullptr;
816  return gzh.ReadMore(source, sourceLen, &ptr) == GZipHeader::COMPLETE_HEADER;
817 }
818 
819 } // namespace net_http
820 } // namespace serving
821 } // namespace tensorflow