0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
BlockCompressionCodecBmz.cc
Go to the documentation of this file.
1 /* -*- c++ -*-
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
26 
27 #include <Common/Compat.h>
28 
30 
31 #include <Common/Checksum.h>
32 #include <Common/Thread.h>
33 #include <Common/Logger.h>
34 
35 #include <Hypertable/Lib/bmz/bmz.h>
36 
37 using namespace Hypertable;
38 using namespace std;
39 
41  : m_workmem(0), m_offset(0), m_fp_len(19) {
43  set_args(args);
44 }
45 
47 }
48 
49 #define _NEXT_ARG(_code_) do { \
50  ++it; \
51  HT_EXPECT(it != arg_end, Error::BLOCK_COMPRESSOR_INVALID_ARG); \
52  _code_; \
53 } while (0)
54 
55 void
57  Args::const_iterator it = args.begin(), arg_end = args.end();
58 
59  for (; it != arg_end; ++it) {
60  if (*it == "--fp-len")
61  _NEXT_ARG(m_fp_len = atoi((*it).c_str()));
62  else if (*it == "--offset")
63  _NEXT_ARG(m_offset = atoi((*it).c_str()));
64  else HT_ERRORF("unknown bmz compressor argument: %s", (*it).c_str());
65  }
66 }
67 
68 void
70  DynamicBuffer &output,
71  BlockHeader &header,
72  size_t reserve) {
73  size_t inlen = input.fill();
74  size_t headerlen = header.encoded_length();
75  size_t outlen = bmz_pack_buflen(inlen);
76 
77  output.reserve(outlen + headerlen + reserve);
79 
80  HT_EXPECT(bmz_pack(input.base, inlen, output.base + headerlen, &outlen,
83 
84  // in case of an incompressible block
85  if (outlen >= inlen) {
86  header.set_compression_type(NONE);
87  memcpy(output.base + headerlen, input.base, inlen);
88  header.set_data_length(inlen);
89  header.set_data_zlength(inlen);
90  }
91  else {
92  header.set_compression_type(BMZ);
93  header.set_data_length(inlen);
94  header.set_data_zlength(outlen);
95  }
96  header.set_data_checksum(fletcher32(output.base + headerlen,
97  header.get_data_zlength()));
98  output.ptr = output.base;
99  header.encode(&output.ptr);
100  output.ptr += header.get_data_zlength();
101 }
102 
103 void
105  DynamicBuffer &output,
106  BlockHeader &header) {
107  const uint8_t *ip = input.base;
108  size_t remain = input.fill();
109 
110  header.decode(&ip, &remain);
111  HT_EXPECT(header.get_data_zlength() <= remain,
113  HT_EXPECT(header.get_data_checksum() == fletcher32(ip, header.get_data_zlength()),
115 
116  size_t outlen = header.get_data_length();
117 
118  output.reserve(outlen);
119 
120  if (header.get_compression_type() == NONE)
121  memcpy(output.base, ip, outlen);
122  else {
123  m_workmem.reserve(bmz_unpack_worklen(outlen), true);
124 
125  HT_EXPECT(bmz_unpack(ip, header.get_data_zlength(), output.base, &outlen,
126  m_workmem.base) == BMZ_E_OK,
128  HT_EXPECT(outlen == header.get_data_length(),
130  }
131  output.ptr = output.base + outlen;
132 }
int bmz_pack(const void *in, size_t in_len, void *out, size_t *out_len_p, size_t offset, size_t fp_len, unsigned flags, void *work_mem)
Perform bmz compression.
Definition: bmz.c:1254
virtual void set_args(const Args &args)
Sets arguments to control compression behavior.
void set_data_length(uint32_t length)
Sets the uncompressed data length field.
Definition: BlockHeader.h:91
void set_data_checksum(uint32_t checksum)
Sets the checksum field.
Definition: BlockHeader.h:113
size_t m_offset
Starting offset of fingerprints.
uint16_t get_compression_type()
Gets the compression type field.
Definition: BlockHeader.h:128
void set_data_zlength(uint32_t zlength)
Sets the compressed data length field.
Definition: BlockHeader.h:101
STL namespace.
uint8_t * ptr
Pointer to the end of the used part of the buffer.
A dynamic, resizable and reference counted memory buffer.
Definition: DynamicBuffer.h:42
uint32_t get_data_checksum()
Gets the checksum field.
Definition: BlockHeader.h:118
std::vector< String > Args
Compression codec argument vector.
#define HT_EXPECT(_e_, _code_)
Definition: Logger.h:388
uint32_t fletcher32(const void *data8, size_t len8)
Compute fletcher32 checksum for arbitary data.
Definition: Checksum.cc:42
Bentley-McIlroy large common substring compression.
BlockCompressionCodecBmz(const Args &args)
Constructor.
uint32_t get_data_length()
Gets the uncompressed data length field.
Definition: BlockHeader.h:96
virtual void deflate(const DynamicBuffer &input, DynamicBuffer &output, BlockHeader &header, size_t reserve=0)
Compresses a buffer using the BMZ algorithm.
Logging routines and macros.
Compatibility Macros for C/C++.
int bmz_init()
Perform bmz initialization only needs to be called once, mostly for sanity checks.
Definition: bmz.c:1445
int bmz_unpack(const void *in, size_t in_len, void *out, size_t *out_len_p, void *work_mem)
Perform bmz decompression.
Definition: bmz.c:1280
Importing boost::thread and boost::thread_group into the Hypertable namespace.
virtual void decode(const uint8_t **bufp, size_t *remainp)
Decodes serialized block header.
Definition: BlockHeader.cc:104
#define BMZ_E_OK
Definition: bmz.h:32
size_t bmz_pack_buflen(size_t in_len)
Compute bmz compression output buffer length.
Definition: bmz.c:1164
Hypertable definitions
size_t bmz_unpack_worklen(size_t out_len)
Return size of work memory for bmz decompression.
Definition: bmz.c:1192
Implementation of checksum routines.
void set_compression_type(uint16_t type)
Sets the compression type field.
Definition: BlockHeader.h:123
virtual void inflate(const DynamicBuffer &input, DynamicBuffer &output, BlockHeader &header)
Decompresses a buffer compressed with the BMZ algorithm.
DynamicBuffer m_workmem
Working memory buffer used by deflate() and inflate()
uint8_t * base
Pointer to the allocated memory buffer.
size_t fill() const
Returns the size of the used portion.
Definition: DynamicBuffer.h:70
virtual size_t encoded_length()
Returns length of serizlized block header.
Definition: BlockHeader.cc:76
#define HT_ERRORF(msg,...)
Definition: Logger.h:300
uint32_t get_data_zlength()
Gets the compressed data length field.
Definition: BlockHeader.h:106
#define _NEXT_ARG(_code_)
Base class for block headers.
Definition: BlockHeader.h:48
Declarations for BlockCompressionCodecBmz.
size_t bmz_pack_worklen(size_t in_len, size_t fp_len)
Return size of work memory for bmz compression.
Definition: bmz.c:1186
void reserve(size_t len, bool nocopy=false)
Reserve space for additional data Will grow the space to exactly what's needed.
Definition: DynamicBuffer.h:95
virtual void encode(uint8_t **bufp)
Encodes serialized representation of block header.
Definition: BlockHeader.cc:82