ScummVM API documentation
huffman.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 // Based on xoreos' Huffman code
23 
24 #ifndef COMMON_HUFFMAN_H
25 #define COMMON_HUFFMAN_H
26 
27 #include "common/array.h"
28 #include "common/list.h"
29 #include "common/queue.h"
30 #include "common/types.h"
31 
32 namespace Common {
33 
46 inline uint32 REVERSEBITS(uint32 x) {
47  x = (((x & ~0x55555555) >> 1) | ((x & 0x55555555) << 1));
48  x = (((x & ~0x33333333) >> 2) | ((x & 0x33333333) << 2));
49  x = (((x & ~0x0F0F0F0F) >> 4) | ((x & 0x0F0F0F0F) << 4));
50  x = (((x & ~0x00FF00FF) >> 8) | ((x & 0x00FF00FF) << 8));
51 
52  return ((x >> 16) | (x << 16));
53 }
54 
59 template<class BITSTREAM>
60 class Huffman {
61 public:
70  Huffman(uint8 maxLength, uint32 codeCount, const uint32 *codes, const uint8 *lengths, const uint32 *symbols = nullptr);
71 
78  static Huffman fromFrequencies(uint32 freqCount, const uint32 *freq, const uint32 *symbols);
79 
85  static Huffman fromFrequencies(std::initializer_list<uint32> freqs);
86 
88  uint32 getSymbol(BITSTREAM &bits) const;
89 
90 private:
91  struct Symbol {
92  uint32 code;
93  uint32 symbol;
94 
95  Symbol(uint32 c, uint32 s) : code(c), symbol(s) {}
96  };
97 
98  typedef List<Symbol> CodeList;
99  typedef Array<CodeList> CodeLists;
100 
102  CodeLists _codes;
103 
105  struct PrefixEntry {
106  uint32 symbol;
107  uint8 length;
108 
109  PrefixEntry() : length(0xFF) {}
110  };
111 
112  static const uint8 _prefixTableBits = 8;
113  PrefixEntry _prefixTable[1 << _prefixTableBits];
114 };
115 
116 template<class BITSTREAM>
117 Huffman<BITSTREAM> Huffman<BITSTREAM>::fromFrequencies(std::initializer_list<uint32> init) {
118  Common::Array<uint32> freqs;
119  Common::Array<uint32> symbols;
120  uint32 sym = 0;
121  for (auto freq : init) {
122  if (freq != 0) {
123  freqs.push_back(freq);
124  symbols.push_back(sym);
125  }
126  ++sym;
127  }
128  return fromFrequencies(freqs.size(), freqs.data(), symbols.data());
129 }
130 
131 template<class BITSTREAM>
132 Huffman<BITSTREAM> Huffman<BITSTREAM>::fromFrequencies(uint32 freqCount, const uint32 *freq, const uint32 *symbols) {
133  assert(freqCount > 0);
134  assert(freq);
135  assert(symbols);
136 
137  Common::Array<uint32> codes(freqCount, 0);
138  Common::Array<uint8> lengths(freqCount, 0);
139 
140  static constexpr uint32 End = ~uint32(0);
141  struct Symbol {
142  uint32 zero, one;
143  uint32 freq;
144  };
145 
147  for (uint32_t i = 0; i != freqCount; ++i)
148  syms.push_back(Symbol{End, End, freq[i]});
149 
150  auto appendBit = [&](uint32 top, bool bit) {
151  Common::Queue<uint32> queue;
152  queue.push(top);
153  while (!queue.empty()) {
154  auto idx = queue.front();
155  queue.pop();
156  if (idx < freqCount) {
157  auto &len = lengths[idx];
158  if (bit)
159  codes[idx] |= (1 << len);
160  ++len;
161  } else {
162  assert(syms[idx].zero != End);
163  queue.push(syms[idx].zero);
164  assert(syms[idx].one != End);
165  queue.push(syms[idx].one);
166  }
167  }
168  };
169 
170  while (true) {
171  uint32 smallest1 = End, smallest2 = End;
172  for (uint32 idx = 0; idx != syms.size(); ++idx) {
173  auto &sym = syms[idx];
174  if (sym.freq != 0) {
175  if (smallest1 != End && sym.freq >= syms[smallest1].freq) {
176  if (smallest2 == End || sym.freq < syms[smallest2].freq) {
177  smallest2 = idx;
178  }
179  } else {
180  smallest2 = smallest1;
181  smallest1 = idx;
182  }
183  }
184  }
185  if (smallest2 == End)
186  break;
187 
188  auto &zero = syms[smallest1];
189  auto &one = syms[smallest2];
190  auto sum = zero.freq + one.freq;
191  zero.freq = 0;
192  one.freq = 0;
193  syms.push_back(Symbol{smallest1, smallest2, sum});
194  appendBit(smallest1, false);
195  appendBit(smallest2, true);
196  }
197 
198  return Huffman<BITSTREAM>{0, freqCount, codes.data(), lengths.data(), symbols};
199 }
200 
201 template<class BITSTREAM>
202 Huffman<BITSTREAM>::Huffman(uint8 maxLength, uint32 codeCount, const uint32 *codes, const uint8 *lengths, const uint32 *symbols) {
203  assert(codeCount > 0);
204 
205  assert(codes);
206  assert(lengths);
207 
208  if (maxLength == 0)
209  for (uint32 i = 0; i < codeCount; i++)
210  maxLength = MAX(maxLength, lengths[i]);
211 
212  assert(maxLength <= 32);
213 
214  // Codes that do not fit in the prefix table are stored in the _codes array.
215  _codes.resize(MAX(maxLength - _prefixTableBits, 0));
216 
217  for (uint i = 0; i < codeCount; i++) {
218  uint8 length = lengths[i];
219  assert(length != 0);
220 
221  // The symbol. If none was specified, assume it is identical to the code index.
222  uint32 symbol = symbols ? symbols[i] : i;
223 
224  if (length <= _prefixTableBits) {
225  // Short codes go in the prefix lookup table. Set all the entries in the table
226  // with an index starting with the code to the symbol value.
227  uint32 startIndex;
228  if (BITSTREAM::isMSB2LSB()) {
229  startIndex = codes[i] << (_prefixTableBits - length);
230  } else {
231  startIndex = REVERSEBITS(codes[i]) >> (32 - _prefixTableBits);
232  }
233 
234  uint32 endIndex = startIndex | ((1 << (_prefixTableBits - length)) - 1);
235 
236  for (uint32 j = startIndex; j <= endIndex; j++) {
237  uint32 index = BITSTREAM::isMSB2LSB() ? j : REVERSEBITS(j) >> (32 - _prefixTableBits);
238  _prefixTable[index].symbol = symbol;
239  _prefixTable[index].length = length;
240  }
241  } else {
242  // Put the code and symbol into the correct list for the length.
243  _codes[lengths[i] - 1 - _prefixTableBits].push_back(Symbol(codes[i], symbol));
244  }
245  }
246 }
247 
248 template<class BITSTREAM>
249 uint32 Huffman<BITSTREAM>::getSymbol(BITSTREAM &bits) const {
250  uint32 code = bits.peekBits(_prefixTableBits);
251 
252  uint8 length = _prefixTable[code].length;
253 
254  if (length != 0xFF) {
255  bits.skip(length);
256  return _prefixTable[code].symbol;
257  } else {
258  bits.skip(_prefixTableBits);
259 
260  for (uint32 i = 0; i < _codes.size(); i++) {
261  bits.addBit(code, i + _prefixTableBits);
262 
263  for (typename CodeList::const_iterator cCode = _codes[i].begin(); cCode != _codes[i].end(); ++cCode)
264  if (code == cCode->code)
265  return cCode->symbol;
266  }
267  }
268 
269  error("Unknown Huffman code");
270  return 0;
271 }
272 
275 } // End of namespace Common
276 
277 #endif // COMMON_HUFFMAN_H
const T * data() const
Definition: array.h:208
Definition: list.h:44
uint32 getSymbol(BITSTREAM &bits) const
Definition: huffman.h:249
Definition: queue.h:42
Huffman(uint8 maxLength, uint32 codeCount, const uint32 *codes, const uint8 *lengths, const uint32 *symbols=nullptr)
Definition: huffman.h:202
void push_back(const T &element)
Definition: array.h:181
static Huffman fromFrequencies(uint32 freqCount, const uint32 *freq, const uint32 *symbols)
Definition: huffman.h:132
Definition: huffman.h:60
Definition: algorithm.h:29
size_type size() const
Definition: array.h:316
void NORETURN_PRE error(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
Definition: list_intern.h:51
T MAX(T a, T b)
Definition: util.h:64