BitMagic-C++
encoding.h
Go to the documentation of this file.
1 #ifndef BMENCODING_H__INCLUDED__
2 #define BMENCODING_H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file encoding.h
22  \brief Encoding utilities for serialization (internal)
23 */
24 
25 
26 #include <memory.h>
27 #include "bmutil.h"
28 
29 
30 #ifdef _MSVC_VER
31 #pragma warning (push)
32 #pragma warning (disable : 4702)
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 // ----------------------------------------------------------------
40 
41 /*!
42  \brief Memory encoding.
43 
44  Class for encoding data into memory.
45  Class handles aligment issues with the integer data types.
46 
47  \ingroup gammacode
48 */
49 class encoder
50 {
51 public:
52  typedef unsigned char* position_type;
53 public:
54  encoder(unsigned char* buf, size_t size) BMNOEXCEPT;
55  void put_8(unsigned char c) BMNOEXCEPT;
57  void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT;
60  void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT;
63  void put_prefixed_array_32(unsigned char c,
64  const bm::word_t* w, unsigned count) BMNOEXCEPT;
65  void put_prefixed_array_16(unsigned char c,
66  const bm::short_t* s, unsigned count,
67  bool encode_count) BMNOEXCEPT;
68  void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT;
69  size_t size() const BMNOEXCEPT;
70  unsigned char* get_pos() const BMNOEXCEPT;
71  void set_pos(unsigned char* buf_pos) BMNOEXCEPT;
72 private:
73  unsigned char* buf_;
74  unsigned char* start_;
75  size_t size_;
76 };
77 
78 // ----------------------------------------------------------------
79 /**
80  Base class for all decoding functionality
81  \ingroup gammacode
82 */
84 {
85 public:
86  decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; }
87 
88  /// Reads character from the decoding buffer.
89  unsigned char get_8() BMNOEXCEPT { return *buf_++; }
90 
91  /// Returns size of the current decoding stream.
92  size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); }
93 
94  /// change current position
95  void seek(int delta) BMNOEXCEPT { buf_ += delta; }
96 
97  /// read bytes from the decode buffer
98  void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT;
99 
100  /// Return current buffer pointer
101  const unsigned char* get_pos() const BMNOEXCEPT { return buf_; }
102 
103  /// Set current buffer pointer
104  void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; }
105 protected:
106  const unsigned char* buf_;
107  const unsigned char* start_;
108 };
109 
110 
111 // ----------------------------------------------------------------
112 /**
113  Class for decoding data from memory buffer.
114  Properly handles aligment issues with integer data types.
115  \ingroup gammacode
116 */
117 class decoder : public decoder_base
118 {
119 public:
120  decoder(const unsigned char* buf) BMNOEXCEPT;
121  bm::short_t get_16() BMNOEXCEPT;
122  bm::word_t get_24() BMNOEXCEPT;
123  bm::word_t get_32() BMNOEXCEPT;
124  bm::id64_t get_48() BMNOEXCEPT;
125  bm::id64_t get_64() BMNOEXCEPT;
126  void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT;
127  bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT;
128  void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT;
129  void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT;
130 };
131 
132 // ----------------------------------------------------------------
133 /**
134  Class for decoding data from memory buffer.
135  Properly handles aligment issues with integer data types.
136  Converts data to big endian architecture
137  (presumed it was encoded as little endian)
138  \ingroup gammacode
139 */
141 
142 
143 // ----------------------------------------------------------------
144 /**
145  Class for decoding data from memory buffer.
146  Properly handles aligment issues with integer data types.
147  Converts data to little endian architecture
148  (presumed it was encoded as big endian)
149  \ingroup gammacode
150 */
152 {
153 public:
154  decoder_little_endian(const unsigned char* buf);
155  bm::short_t get_16();
156  bm::word_t get_24();
157  bm::word_t get_32();
158  bm::id64_t get_48();
159  bm::id64_t get_64();
160  void get_32(bm::word_t* w, unsigned count);
161  bool get_32_OR(bm::word_t* w, unsigned count);
162  void get_32_AND(bm::word_t* w, unsigned count);
163  void get_16(bm::short_t* s, unsigned count);
164 };
165 
166 
167 /**
168  Byte based writer for un-aligned bit streaming
169 
170  @ingroup gammacode
171  @sa encoder
172 */
173 template<class TEncoder>
174 class bit_out
175 {
176 public:
177  bit_out(TEncoder& dest)
178  : dest_(dest), used_bits_(0), accum_(0)
179  {}
180 
181  ~bit_out() { flush(); }
182 
183  /// issue single bit into encode bit-stream
184  void put_bit(unsigned value) BMNOEXCEPT;
185 
186  /// issue count bits out of value
187  void put_bits(unsigned value, unsigned count) BMNOEXCEPT;
188 
189  /// issue 0 into output stream
190  void put_zero_bit() BMNOEXCEPT;
191 
192  /// issue specified number of 0s
193  void put_zero_bits(unsigned count) BMNOEXCEPT;
194 
195  /// Elias Gamma encode the specified value
196  void gamma(unsigned value) BMNOEXCEPT;
197 
198  /// Binary Interpolative array decode
199  void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
201  {
202  bic_encode_u16_cm(arr, sz, lo, hi);
203  }
204 
205  /// Binary Interpolative encoding (array of 16-bit ints)
206  void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
207  bm::gap_word_t lo,
209 
210  /// Binary Interpolative encoding (array of 16-bit ints)
211  /// cm - "center-minimal"
212  void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
213  bm::gap_word_t lo,
215 
216  /// Binary Interpolative encoding (array of 32-bit ints)
217  /// cm - "center-minimal"
218  void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
220 
221  /// Flush the incomplete 32-bit accumulator word
222  void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); }
223 
224 private:
225  void flush_accum() BMNOEXCEPT
226  {
227  dest_.put_32(accum_);
228  used_bits_ = accum_ = 0;
229  }
230 private:
231  bit_out(const bit_out&);
232  bit_out& operator=(const bit_out&);
233 
234 private:
235  TEncoder& dest_; ///< Bit stream target
236  unsigned used_bits_; ///< Bits used in the accumulator
237  unsigned accum_; ///< write bit accumulator
238 };
239 
240 
241 /**
242  Byte based reader for un-aligned bit streaming
243 
244  @ingroup gammacode
245  @sa encoder
246 */
247 template<class TDecoder>
248 class bit_in
249 {
250 public:
252  : src_(decoder),
253  used_bits_(unsigned(sizeof(accum_) * 8)),
254  accum_(0)
255  {}
256 
257  /// decode unsigned value using Elias Gamma coding
258  unsigned gamma() BMNOEXCEPT;
259 
260  /// read number of bits out of the stream
261  unsigned get_bits(unsigned count) BMNOEXCEPT;
262 
263  /// Binary Interpolative array decode
264  void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
266  {
267  bic_decode_u16_cm(arr, sz, lo, hi);
268  }
269 
270  void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
272  {
273  bic_decode_u16_cm_bitset(block, sz, lo, hi);
274  }
275  void bic_decode_u16_dry(unsigned sz,
277  {
278  bic_decode_u16_cm_dry(sz, lo, hi);
279  }
280 
281 
282  /// Binary Interpolative array decode
283  void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
285  /// Binary Interpolative array decode
286  void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
288 
289  /// Binary Interpolative array decode (32-bit)
290  void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
292 
293 
294  /// Binary Interpolative array decode into bitset (32-bit based)
295  void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
297 
298  /// Binary Interpolative array decode into /dev/null
299  void bic_decode_u16_rg_dry(unsigned sz,
301 
302  /// Binary Interpolative array decode into bitset (32-bit based)
303  void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
304  bm::gap_word_t lo,
306 
307  /// Binary Interpolative array decode into /dev/null
308  void bic_decode_u16_cm_dry(unsigned sz,
310 
311 private:
312  bit_in(const bit_in&);
313  bit_in& operator=(const bit_in&);
314 private:
315  TDecoder& src_; ///< Source of bytes
316  unsigned used_bits_; ///< Bits used in the accumulator
317  unsigned accum_; ///< read bit accumulator
318 };
319 
320 
321 /**
322  Functor for Elias Gamma encoding
323  @ingroup gammacode
324 */
325 template<typename T, typename TBitIO>
327 {
328 public:
329  gamma_encoder(TBitIO& bout) : bout_(bout)
330  {}
331 
332  /** Encode word */
333  void operator()(T value) { bout_.gamma(value); }
334 private:
336  gamma_encoder& operator=(const gamma_encoder&);
337 private:
338  TBitIO& bout_;
339 };
340 
341 
342 /**
343  Elias Gamma decoder
344  @ingroup gammacode
345 */
346 template<typename T, typename TBitIO>
348 {
349 public:
350  gamma_decoder(TBitIO& bin) : bin_(bin) {}
351 
352  /**
353  Start encoding sequence
354  */
355  void start() {}
356 
357  /**
358  Stop decoding sequence
359  */
360  void stop() {}
361 
362  /**
363  Decode word
364  */
365  T operator()(void) { return (T)bin_.gamma(); }
366 private:
368  gamma_decoder& operator=(const gamma_decoder&);
369 private:
370  TBitIO& bin_;
371 };
372 
373 
374 // ----------------------------------------------------------------
375 // Implementation details.
376 // ----------------------------------------------------------------
377 
378 /*!
379  \fn encoder::encoder(unsigned char* buf, unsigned size)
380  \brief Construction.
381  \param buf - memory buffer pointer.
382  \param size - size of the buffer
383 */
384 inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT
385 : buf_(buf), start_(buf)
386 {
387  size_ = a_size;
388 }
389 /*!
390  \brief Encode 8-bit prefix + an array
391 */
392 inline void encoder::put_prefixed_array_32(unsigned char c,
393  const bm::word_t* w,
394  unsigned count) BMNOEXCEPT
395 {
396  put_8(c);
397  put_32(w, count);
398 }
399 
400 /*!
401  \brief Encode 8-bit prefix + an array
402 */
403 inline void encoder::put_prefixed_array_16(unsigned char c,
404  const bm::short_t* s,
405  unsigned count,
406  bool encode_count) BMNOEXCEPT
407 {
408  put_8(c);
409  if (encode_count)
410  put_16((bm::short_t) count);
411  put_16(s, count);
412 }
413 
414 
415 /*!
416  \fn void encoder::put_8(unsigned char c)
417  \brief Puts one character into the encoding buffer.
418  \param c - character to encode
419 */
421 {
422  *buf_++ = c;
423 }
424 
425 /*!
426  \fn encoder::put_16(bm::short_t s)
427  \brief Puts short word (16 bits) into the encoding buffer.
428  \param s - short word to encode
429 */
431 {
432 #if (BM_UNALIGNED_ACCESS_OK == 1)
433  ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
434  buf_ += sizeof(bm::short_t);
435 #else
436  *buf_++ = (unsigned char) s;
437  s >>= 8;
438  *buf_++ = (unsigned char) s;
439 #endif
440 }
441 
442 /*!
443  \brief Method puts array of short words (16 bits) into the encoding buffer.
444 */
445 inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT
446 {
447 #if (BM_UNALIGNED_ACCESS_OK == 1)
448  ::memcpy(buf_, s, sizeof(bm::short_t)*count);
449  buf_ += sizeof(bm::short_t) * count;
450 #else
451  unsigned char* buf = buf_;
452  const bm::short_t* s_end = s + count;
453  do
454  {
455  bm::short_t w16 = *s++;
456  unsigned char a = (unsigned char) w16;
457  unsigned char b = (unsigned char) (w16 >> 8);
458 
459  *buf++ = a;
460  *buf++ = b;
461 
462  } while (s < s_end);
463 
464  buf_ = (unsigned char*)buf;
465 #endif
466 }
467 
468 /*!
469  \brief copy bytes into target buffer or just rewind if src is NULL
470 */
471 inline
472 void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT
473 {
474  BM_ASSERT((buf_ + count) < (start_ + size_));
475  if (src)
476  ::memcpy(buf_, src, count);
477  buf_ += count;
478 }
479 
480 
481 /*!
482  \fn unsigned encoder::size() const
483  \brief Returns size of the current encoding stream.
484 */
485 inline size_t encoder::size() const BMNOEXCEPT
486 {
487  return size_t(buf_ - start_);
488 }
489 
490 /**
491  \brief Get current memory stream position
492 */
494 {
495  return buf_;
496 }
497 
498 /**
499  \brief Set current memory stream position
500 */
502 {
503  buf_ = buf_pos;
504 }
505 
506 /*!
507  \fn void encoder::put_24(bm::word_t w)
508  \brief Puts 24 bits word into encoding buffer.
509  \param w - word to encode.
510 */
512 {
513  BM_ASSERT((w & ~(0xFFFFFFU)) == 0);
514 
515  buf_[0] = (unsigned char)w;
516  buf_[1] = (unsigned char)(w >> 8);
517  buf_[2] = (unsigned char)(w >> 16);
518  buf_ += 3;
519 }
520 
521 
522 /*!
523  \fn void encoder::put_32(bm::word_t w)
524  \brief Puts 32 bits word into encoding buffer.
525  \param w - word to encode.
526 */
528 {
529 #if (BM_UNALIGNED_ACCESS_OK == 1)
530  ::memcpy(buf_, &w, sizeof(bm::word_t));
531  buf_ += sizeof(bm::word_t);
532 #else
533  *buf_++ = (unsigned char) w;
534  *buf_++ = (unsigned char) (w >> 8);
535  *buf_++ = (unsigned char) (w >> 16);
536  *buf_++ = (unsigned char) (w >> 24);
537 #endif
538 }
539 
540 /*!
541  \fn void encoder::put_48(bm::id64_t w)
542  \brief Puts 48 bits word into encoding buffer.
543  \param w - word to encode.
544 */
546 {
547  BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
548  *buf_++ = (unsigned char)w;
549  *buf_++ = (unsigned char)(w >> 8);
550  *buf_++ = (unsigned char)(w >> 16);
551  *buf_++ = (unsigned char)(w >> 24);
552  *buf_++ = (unsigned char)(w >> 32);
553  *buf_++ = (unsigned char)(w >> 40);
554 }
555 
556 
557 /*!
558  \fn void encoder::put_64(bm::id64_t w)
559  \brief Puts 64 bits word into encoding buffer.
560  \param w - word to encode.
561 */
563 {
564 #if (BM_UNALIGNED_ACCESS_OK == 1)
565  ::memcpy(buf_, &w, sizeof(bm::id64_t));
566  buf_ += sizeof(bm::id64_t);
567 #else
568  *buf_++ = (unsigned char) w;
569  *buf_++ = (unsigned char) (w >> 8);
570  *buf_++ = (unsigned char) (w >> 16);
571  *buf_++ = (unsigned char) (w >> 24);
572  *buf_++ = (unsigned char) (w >> 32);
573  *buf_++ = (unsigned char) (w >> 40);
574  *buf_++ = (unsigned char) (w >> 48);
575  *buf_++ = (unsigned char) (w >> 56);
576 #endif
577 }
578 
579 
580 /*!
581  \brief Encodes array of 32-bit words
582 */
583 inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT
584 {
585 #if (BM_UNALIGNED_ACCESS_OK == 1)
586  // use memcpy() because compilers now understand it as an idiom and inline
587  ::memcpy(buf_, w, sizeof(bm::word_t) * count);
588  buf_ += sizeof(bm::word_t) * count;
589 #else
590  unsigned char* buf = buf_;
591  const bm::word_t* w_end = w + count;
592  do
593  {
594  bm::word_t w32 = *w++;
595  unsigned char a = (unsigned char) w32;
596  unsigned char b = (unsigned char) (w32 >> 8);
597  unsigned char c = (unsigned char) (w32 >> 16);
598  unsigned char d = (unsigned char) (w32 >> 24);
599 
600  *buf++ = a;
601  *buf++ = b;
602  *buf++ = c;
603  *buf++ = d;
604  } while (w < w_end);
605 
606  buf_ = (unsigned char*)buf;
607 #endif
608 }
609 
610 
611 // ---------------------------------------------------------------------
612 
613 
614 /*!
615  Load bytes from the decode buffer
616 */
617 inline
618 void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT
619 {
620  if (dst)
621  ::memcpy(dst, buf_, count);
622  buf_ += count;
623 }
624 
625 /*!
626  \fn decoder::decoder(const unsigned char* buf)
627  \brief Construction
628  \param buf - pointer to the decoding memory.
629 */
630 inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT
631 : decoder_base(buf)
632 {
633 }
634 
635 /*!
636  \fn bm::short_t decoder::get_16()
637  \brief Reads 16-bit word from the decoding buffer.
638 */
640 {
641 #if (BM_UNALIGNED_ACCESS_OK == 1)
642  bm::short_t a;
643  ::memcpy(&a, buf_, sizeof(bm::short_t));
644 #else
645  bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8));
646 #endif
647  buf_ += sizeof(a);
648  return a;
649 }
650 
651 /*!
652  \fn bm::word_t decoder::get_24()
653  \brief Reads 32-bit word from the decoding buffer.
654 */
656 {
657  bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
658  ((unsigned)buf_[2] << 16);
659  buf_ += 3;
660  return a;
661 }
662 
663 
664 /*!
665  \fn bm::word_t decoder::get_32()
666  \brief Reads 32-bit word from the decoding buffer.
667 */
669 {
670 #if (BM_UNALIGNED_ACCESS_OK == 1)
671  bm::word_t a;
672  ::memcpy(&a, buf_, sizeof(bm::word_t));
673 #else
674  bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
675  ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
676 #endif
677  buf_+=sizeof(a);
678  return a;
679 }
680 
681 /*!
682  \fn bm::word_t decoder::get_48()
683  \brief Reads 64-bit word from the decoding buffer.
684 */
685 inline
687 {
688  bm::id64_t a = buf_[0] +
689  ((bm::id64_t)buf_[1] << 8) +
690  ((bm::id64_t)buf_[2] << 16) +
691  ((bm::id64_t)buf_[3] << 24) +
692  ((bm::id64_t)buf_[4] << 32) +
693  ((bm::id64_t)buf_[5] << 40);
694  buf_ += 6;
695  return a;
696 }
697 
698 /*!
699  \fn bm::word_t decoder::get_64()
700  \brief Reads 64-bit word from the decoding buffer.
701 */
702 inline
704 {
705 #if (BM_UNALIGNED_ACCESS_OK == 1)
706  bm::id64_t a;
707  ::memcpy(&a, buf_, sizeof(bm::id64_t));
708 #else
709  bm::id64_t a = buf_[0]+
710  ((bm::id64_t)buf_[1] << 8) +
711  ((bm::id64_t)buf_[2] << 16) +
712  ((bm::id64_t)buf_[3] << 24) +
713  ((bm::id64_t)buf_[4] << 32) +
714  ((bm::id64_t)buf_[5] << 40) +
715  ((bm::id64_t)buf_[6] << 48) +
716  ((bm::id64_t)buf_[7] << 56);
717 #endif
718  buf_ += sizeof(a);
719  return a;
720 }
721 
722 
723 /*!
724  \fn void decoder::get_32(bm::word_t* w, unsigned count)
725  \brief Reads block of 32-bit words from the decoding buffer.
726  \param w - pointer on memory block to read into.
727  \param count - size of memory block in words.
728 */
729 inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT
730 {
731  if (!w)
732  {
733  seek(int(count * sizeof(bm::word_t)));
734  return;
735  }
736 #if (BM_UNALIGNED_ACCESS_OK == 1)
737  ::memcpy(w, buf_, count * sizeof(bm::word_t));
738  seek(int(count * sizeof(bm::word_t)));
739  return;
740 #else
741  const unsigned char* buf = buf_;
742  const bm::word_t* w_end = w + count;
743  do
744  {
745  bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
746  ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
747  *w++ = a;
748  buf += sizeof(a);
749  } while (w < w_end);
750  buf_ = (unsigned char*)buf;
751 #endif
752 }
753 
754 /*!
755  \brief Reads block of 32-bit words from the decoding buffer and ORs
756  to the destination
757  \param w - pointer on memory block to read into
758  \param count - should match bm::set_block_size
759 */
760 inline
761 bool decoder::get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT
762 {
763  if (!w)
764  {
765  seek(int(count * sizeof(bm::word_t)));
766  return false;
767  }
768 #if defined(BMAVX2OPT)
769  __m256i* buf_start = (__m256i*)buf_;
770  seek(int(count * sizeof(bm::word_t)));
771  __m256i* buf_end = (__m256i*)buf_;
772 
773  return bm::avx2_or_arr_unal((__m256i*)w, buf_start, buf_end);
774 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
775  __m128i* buf_start = (__m128i*)buf_;
776  seek(int(count * sizeof(bm::word_t)));
777  __m128i* buf_end = (__m128i*)buf_;
778 
779  return bm::sse2_or_arr_unal((__m128i*)w, buf_start, buf_end);
780 #else
781  bm::word_t acc = 0;
782  const bm::word_t not_acc = acc = ~acc;
783 
784  for (unsigned i = 0; i < count; i+=4)
785  {
786  acc &= (w[i+0] |= get_32());
787  acc &= (w[i+1] |= get_32());
788  acc &= (w[i+2] |= get_32());
789  acc &= (w[i+3] |= get_32());
790  }
791  return acc == not_acc;
792 #endif
793 }
794 
795 /*!
796  \brief Reads block of 32-bit words from the decoding buffer and ANDs
797  to the destination
798  \param w - pointer on memory block to read into
799  \param count - should match bm::set_block_size
800 */
801 inline
802 void decoder::get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT
803 {
804  if (!w)
805  {
806  seek(int(count * sizeof(bm::word_t)));
807  return;
808  }
809 #if defined(BMAVX2OPT)
810  __m256i* buf_start = (__m256i*)buf_;
811  seek(int(count * sizeof(bm::word_t)));
812  __m256i* buf_end = (__m256i*)buf_;
813 
814  bm::avx2_and_arr_unal((__m256i*)w, buf_start, buf_end);
815 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
816  __m128i* buf_start = (__m128i*)buf_;
817  seek(int(count * sizeof(bm::word_t)));
818  __m128i* buf_end = (__m128i*)buf_;
819 
820  bm::sse2_and_arr_unal((__m128i*)w, buf_start, buf_end);
821 #else
822  for (unsigned i = 0; i < count; i+=4)
823  {
824  w[i+0] &= get_32();
825  w[i+1] &= get_32();
826  w[i+2] &= get_32();
827  w[i+3] &= get_32();
828  }
829 #endif
830 }
831 
832 
833 
834 /*!
835  \fn void decoder::get_16(bm::short_t* s, unsigned count)
836  \brief Reads block of 32-bit words from the decoding buffer.
837  \param s - pointer on memory block to read into.
838  \param count - size of memory block in words.
839 */
840 inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT
841 {
842  if (!s)
843  {
844  seek(int(count * sizeof(bm::short_t)));
845  return;
846  }
847 #if (BM_UNALIGNED_ACCESS_OK == 1)
848  ::memcpy(s, buf_, sizeof(bm::short_t) * count);
849  buf_ += sizeof(bm::short_t) * count;
850 #else
851  const unsigned char* buf = buf_;
852  const bm::short_t* s_end = s + count;
853  do
854  {
855  bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8));
856  *s++ = a;
857  buf += sizeof(a);
858  } while (s < s_end);
859  buf_ = (unsigned char*)buf;
860 #endif
861 
862 }
863 
864 
865 
866 // ---------------------------------------------------------------------
867 
868 inline decoder_little_endian::decoder_little_endian(const unsigned char* buf)
869 : decoder_base(buf)
870 {
871 }
872 
873 inline
875 {
876  bm::short_t v1 = bm::short_t(buf_[0]);
877  bm::short_t v2 = bm::short_t(buf_[1]);
878  bm::short_t a = bm::short_t((v1 << 8) + v2);
879  buf_ += sizeof(a);
880  return a;
881 }
882 
884 {
885  // TODO: validate if this is a correct for cross endian opts
886  bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
887  ((unsigned)buf_[2] << 16);
888  buf_ += 3;
889  return a;
890 }
891 
892 inline
894 {
895  bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) +
896  ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]);
897  buf_+=sizeof(a);
898  return a;
899 }
900 
901 inline
903 {
904  bm::id64_t a = buf_[0] +
905  ((bm::id64_t)buf_[1] << 8) +
906  ((bm::id64_t)buf_[2] << 16) +
907  ((bm::id64_t)buf_[3] << 24) +
908  ((bm::id64_t)buf_[4] << 32) +
909  ((bm::id64_t)buf_[5] << 40);
910  buf_ += 6;
911  return a;
912 }
913 
914 inline
916 {
917  bm::id64_t a = buf_[0]+
918  ((bm::id64_t)buf_[1] << 56) +
919  ((bm::id64_t)buf_[2] << 48) +
920  ((bm::id64_t)buf_[3] << 40) +
921  ((bm::id64_t)buf_[4] << 32) +
922  ((bm::id64_t)buf_[5] << 24) +
923  ((bm::id64_t)buf_[6] << 16) +
924  ((bm::id64_t)buf_[7] << 8);
925  buf_+=sizeof(a);
926  return a;
927 }
928 
929 inline
931 {
932  if (!w)
933  {
934  seek(int(count * sizeof(bm::word_t)));
935  return;
936  }
937 
938  const unsigned char* buf = buf_;
939  const bm::word_t* w_end = w + count;
940  do
941  {
942  bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) +
943  ((unsigned)buf[2] << 8) + ((unsigned)buf[3]);
944  *w++ = a;
945  buf += sizeof(a);
946  } while (w < w_end);
947  buf_ = (unsigned char*)buf;
948 }
949 
950 inline
952 {
953  if (!w)
954  {
955  seek(int(count * sizeof(bm::word_t)));
956  return false;
957  }
958 
959  bm::word_t acc = 0;
960  const bm::word_t not_acc = acc = ~acc;
961 
962  for (unsigned i = 0; i < count; i+=4)
963  {
964  acc &= (w[i+0] |= get_32());
965  acc &= (w[i+1] |= get_32());
966  acc &= (w[i+2] |= get_32());
967  acc &= (w[i+3] |= get_32());
968  }
969  return acc == not_acc;
970 }
971 
972 inline
974 {
975  for (unsigned i = 0; i < count; i+=4)
976  {
977  w[i+0] &= get_32();
978  w[i+1] &= get_32();
979  w[i+2] &= get_32();
980  w[i+3] &= get_32();
981  }
982 }
983 
984 
985 inline
987 {
988  if (!s)
989  {
990  seek(int(count * sizeof(bm::short_t)));
991  return;
992  }
993 
994  const unsigned char* buf = buf_;
995  const bm::short_t* s_end = s + count;
996  do
997  {
998  bm::short_t v1 = bm::short_t(buf_[0]);
999  bm::short_t v2 = bm::short_t(buf_[1]);
1000  bm::short_t a = bm::short_t((v1 << 8) + v2);
1001  *s++ = a;
1002  buf += sizeof(a);
1003  } while (s < s_end);
1004  buf_ = (unsigned char*)buf;
1005 }
1006 
1007 // ----------------------------------------------------------------------
1008 //
1009 
1010 template<typename TEncoder>
1012 {
1013  BM_ASSERT(value <= 1);
1014  accum_ |= (value << used_bits_);
1015  if (++used_bits_ == (sizeof(accum_) * 8))
1016  flush_accum();
1017 }
1018 
1019 // ----------------------------------------------------------------------
1020 
1021 template<typename TEncoder>
1022 void bit_out<TEncoder>::put_bits(unsigned value, unsigned count) BMNOEXCEPT
1023 {
1024  unsigned used = used_bits_;
1025  unsigned acc = accum_;
1026 
1027  {
1028  unsigned mask = ~0u;
1029  mask >>= (sizeof(accum_) * 8) - count;
1030  value &= mask;
1031  }
1032  for (;count;)
1033  {
1034  unsigned free_bits = unsigned(sizeof(accum_) * 8) - used;
1035  BM_ASSERT(free_bits);
1036  acc |= value << used;
1037 
1038  if (count <= free_bits)
1039  {
1040  used += count;
1041  break;
1042  }
1043  else
1044  {
1045  value >>= free_bits;
1046  count -= free_bits;
1047  dest_.put_32(acc);
1048  acc = used = 0;
1049  continue;
1050  }
1051  }
1052  if (used == (sizeof(accum_) * 8))
1053  {
1054  dest_.put_32(acc);
1055  acc = used = 0;
1056  }
1057  used_bits_ = used;
1058  accum_ = acc;
1059 }
1060 
1061 // ----------------------------------------------------------------------
1062 
1063 template<typename TEncoder>
1065 {
1066  if (++used_bits_ == (sizeof(accum_) * 8))
1067  flush_accum();
1068 }
1069 
1070 // ----------------------------------------------------------------------
1071 
1072 template<typename TEncoder>
1074 {
1075  unsigned used = used_bits_;
1076  unsigned free_bits = (sizeof(accum_) * 8) - used;
1077  if (count >= free_bits)
1078  {
1079  flush_accum();
1080  count -= free_bits;
1081  used = 0;
1082 
1083  for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8)
1084  {
1085  dest_.put_32(0);
1086  }
1087  used += count;
1088  }
1089  else
1090  {
1091  used += count;
1092  }
1093  accum_ |= (1u << used);
1094  if (++used == (sizeof(accum_) * 8))
1095  flush_accum();
1096  else
1097  used_bits_ = used;
1098 }
1099 
1100 // ----------------------------------------------------------------------
1101 
1102 template<typename TEncoder>
1104 {
1105  BM_ASSERT(value);
1106 
1107  unsigned logv = bm::bit_scan_reverse32(value);
1108 
1109  // Put zeroes + 1 bit
1110 
1111  unsigned used = used_bits_;
1112  unsigned acc = accum_;
1113  const unsigned acc_bits = (sizeof(acc) * 8);
1114  unsigned free_bits = acc_bits - used;
1115 
1116  {
1117  unsigned count = logv;
1118  if (count >= free_bits)
1119  {
1120  dest_.put_32(acc);
1121  acc = used ^= used;
1122  count -= free_bits;
1123 
1124  for ( ;count >= acc_bits; count -= acc_bits)
1125  {
1126  dest_.put_32(0);
1127  }
1128  used += count;
1129  }
1130  else
1131  {
1132  used += count;
1133  }
1134  acc |= (1 << used);
1135  if (++used == acc_bits)
1136  {
1137  dest_.put_32(acc);
1138  acc = used ^= used;
1139  }
1140  }
1141 
1142  // Put the value bits
1143  //
1144  {
1145  unsigned mask = (~0u);
1146  mask >>= acc_bits - logv;
1147  value &= mask;
1148  }
1149  for (;logv;)
1150  {
1151  acc |= value << used;
1152  free_bits = acc_bits - used;
1153  if (logv <= free_bits)
1154  {
1155  used += logv;
1156  break;
1157  }
1158  else
1159  {
1160  value >>= free_bits;
1161  logv -= free_bits;
1162  dest_.put_32(acc);
1163  acc = used ^= used;
1164  continue;
1165  }
1166  } // for
1167 
1168  used_bits_ = used;
1169  accum_ = acc;
1170 }
1171 
1172 // ----------------------------------------------------------------------
1173 
1174 template<typename TEncoder>
1176  const bm::gap_word_t* arr,
1177  unsigned sz,
1179 {
1180  for (;sz;)
1181  {
1182  BM_ASSERT(lo <= hi);
1183  unsigned mid_idx = sz >> 1;
1184  bm::gap_word_t val = arr[mid_idx];
1185 
1186  // write the interpolated value
1187  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1188  {
1189  unsigned r = hi - lo - sz + 1;
1190  if (r)
1191  {
1192  unsigned value = val - lo - mid_idx;
1193  unsigned logv = bm::bit_scan_reverse32(r);
1194  put_bits(value, logv+1);
1195  }
1196  }
1197 
1198  bic_encode_u16_rg(arr, mid_idx, lo, gap_word_t(val-1));
1199  // tail recursion
1200  // bic_encode_u16(arr + mid_idx + 1, sz - mid_idx - 1, gap_word_t(val+1), hi);
1201  arr += mid_idx + 1;
1202  sz -= mid_idx + 1;
1203  lo = gap_word_t(val + 1);
1204  } // for sz
1205 }
1206 
1207 // ----------------------------------------------------------------------
1208 
1209 template<typename TEncoder>
1211  unsigned sz,
1212  bm::word_t lo,
1214 {
1215  for (;sz;)
1216  {
1217  BM_ASSERT(lo <= hi);
1218  unsigned mid_idx = sz >> 1;
1219  bm::word_t val = arr[mid_idx];
1220 
1221  // write the interpolated value
1222  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1223  {
1224  unsigned r = hi - lo - sz + 1;
1225  if (r)
1226  {
1227  unsigned value = val - lo - mid_idx;
1228 
1229  unsigned n = r + 1;
1230  unsigned logv = bm::bit_scan_reverse32(n);
1231  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1232  int64_t half_c = c >> 1; // c / 2;
1233  int64_t half_r = r >> 1; // r / 2;
1234  int64_t lo1 = half_r - half_c;
1235  int64_t hi1 = half_r + half_c + 1;
1236  lo1 -= (n & 1);
1237  logv += (value <= lo1 || value >= hi1);
1238 
1239  put_bits(value, logv);
1240  }
1241  }
1242 
1243  bic_encode_u32_cm(arr, mid_idx, lo, val-1);
1244  // tail recursive call:
1245  // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1246  arr += mid_idx + 1;
1247  sz -= mid_idx + 1;
1248  lo = val + 1;
1249  } // for sz
1250 }
1251 
1252 // ----------------------------------------------------------------------
1253 
1254 #if 0
1255 /**
1256  Shuffle structure for BIC
1257  @internal
1258 */
1259 template<unsigned N>
1260 struct bic_encode_stack_u16
1261 {
1262  bm::gap_word_t val_[N];
1263  bm::gap_word_t mid_[N];
1264  bm::gap_word_t lo_[N];
1265  bm::gap_word_t r_[N];
1266 
1267  unsigned stack_size_ = 0;
1268 
1269  void bic_shuffle(const bm::gap_word_t* arr,
1271  {
1272  for (;sz;)
1273  {
1274  BM_ASSERT(lo <= hi);
1275  bm::gap_word_t mid_idx = sz >> 1;
1276  bm::gap_word_t val = arr[mid_idx];
1277  unsigned r = hi - lo - sz + 1;
1278  if (r)
1279  {
1280  unsigned s = stack_size_++;
1281  r_[s] = (bm::gap_word_t)r;
1282  val_[s] = val;
1283  mid_[s] = mid_idx;
1284  lo_[s] = lo;
1285  }
1286 
1287  bic_shuffle(arr, mid_idx, lo, bm::gap_word_t(val-1));
1288  // tail recursive call:
1289  // bic_shuffle(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1290  arr += mid_idx + 1;
1291  sz -= mid_idx + 1;
1292  lo = bm::gap_word_t(val + 1);
1293  } // for sz
1294  }
1295 };
1296 
1297 template<typename TEncoder>
1299  unsigned sz_i,
1300  bm::gap_word_t lo_i,
1302 {
1303  BM_ASSERT(sz_i <= 65535);
1304 
1305  bic_encode_stack_u16<bm::bie_cut_off> u16_stack;
1306  // BIC re-ordering
1307  u16_stack.bic_shuffle(arr, bm::gap_word_t(sz_i), lo_i, hi_i);
1308 
1309  BM_ASSERT(sz_i == u16_stack.stack_size_);
1310  for (unsigned i = 0; i < sz_i; ++i)
1311  {
1312  bm::gap_word_t val = u16_stack.val_[i];
1313  bm::gap_word_t mid_idx = u16_stack.mid_[i];
1314  bm::gap_word_t lo = u16_stack.lo_[i];
1315  unsigned r = u16_stack.r_[i];
1316 
1317  unsigned value = val - lo - mid_idx;
1318  unsigned n = r + 1;
1319  unsigned logv = bm::bit_scan_reverse32(n);
1320  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1321 
1322  int64_t half_c = c >> 1; // c / 2;
1323  int64_t half_r = r >> 1; // r / 2;
1324  int64_t lo1 = half_r - half_c;
1325  int64_t hi1 = half_r + half_c + 1;
1326  lo1 -= (n & 1);
1327  logv += (value <= lo1 || value >= hi1);
1328 
1329  put_bits(value, logv);
1330  } // for i
1331 }
1332 #endif
1333 
1334 
1335 template<typename TEncoder>
1337  unsigned sz,
1338  bm::gap_word_t lo,
1340 {
1341  for (;sz;)
1342  {
1343  BM_ASSERT(lo <= hi);
1344  unsigned mid_idx = sz >> 1;
1345  bm::gap_word_t val = arr[mid_idx];
1346 
1347  // write the interpolated value
1348  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1349  {
1350  unsigned r = hi - lo - sz + 1;
1351  if (r)
1352  {
1353  unsigned value = val - lo - mid_idx;
1354 
1355  unsigned n = r + 1;
1356  unsigned logv = bm::bit_scan_reverse32(n);
1357  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1358  unsigned half_c = c >> 1; // c / 2;
1359  unsigned half_r = r >> 1; // r / 2;
1360  int64_t lo1 = (int64_t(half_r) - half_c - (n & 1u));
1361  unsigned hi1 = (half_r + half_c);
1362  logv += (value <= lo1 || value > hi1);
1363 
1364  put_bits(value, logv);
1365  }
1366  }
1367 
1368  bic_encode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1369  // tail recursive call:
1370  // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1371  arr += ++mid_idx;
1372  sz -= mid_idx;
1373  lo = bm::gap_word_t(val + 1);
1374  } // for sz
1375 }
1376 
1377 
1378 
1379 
1380 
1381 
1382 // ----------------------------------------------------------------------
1383 //
1384 // ----------------------------------------------------------------------
1385 
1386 
1387 template<class TDecoder>
1389  bm::gap_word_t lo,
1391 {
1392  for (;sz;)
1393  {
1394  BM_ASSERT(lo <= hi);
1395 
1396  unsigned val;
1397  // read the value
1398  {
1399  unsigned r = hi - lo - sz + 1;
1400  if (r)
1401  {
1402  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1403  val = get_bits(logv);
1404  BM_ASSERT(val <= r);
1405  }
1406  else
1407  {
1408  val = 0;
1409  }
1410  }
1411  unsigned mid_idx = sz >> 1;
1412  val += lo + mid_idx;
1413 
1414  BM_ASSERT(val < 65536);
1415  BM_ASSERT(mid_idx < 65536);
1416 
1417  arr[mid_idx] = bm::gap_word_t(val);
1418  if (sz == 1)
1419  return;
1420  bic_decode_u16_rg(arr, mid_idx, lo, bm::gap_word_t(val - 1));
1421  //bic_decode_u16(arr + mid_idx + 1, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1422  arr += mid_idx + 1;
1423  sz -= mid_idx + 1;
1424  lo = bm::gap_word_t(val + 1);
1425  } // for sz
1426 }
1427 
1428 // ----------------------------------------------------------------------
1429 
1430 template<class TDecoder>
1432  bm::word_t lo,
1434 {
1435  for (;sz;)
1436  {
1437  BM_ASSERT(lo <= hi);
1438 
1439  unsigned val;
1440 
1441  // read the interpolated value
1442  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1443  {
1444  unsigned r = hi - lo - sz + 1;
1445  if (r)
1446  {
1447  unsigned logv = bm::bit_scan_reverse32(r+1);
1448 
1449  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1450  int64_t half_c = c >> 1; // c / 2;
1451  int64_t half_r = r >> 1; // r / 2;
1452  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1453  int64_t hi1 = half_r + half_c + 1;
1454  val = get_bits(logv);
1455  if (val <= lo1 || val >= hi1)
1456  val += (get_bits(1) << logv);
1457  BM_ASSERT(val <= r);
1458  }
1459  else
1460  {
1461  val = 0;
1462  }
1463  }
1464 
1465  unsigned mid_idx = sz >> 1;
1466  val += lo + mid_idx;
1467  arr[mid_idx] = val;
1468  if (sz == 1)
1469  return;
1470 
1471  bic_decode_u32_cm(arr, mid_idx, lo, val-1);
1472  // tail recursive call:
1473  // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1474  arr += mid_idx + 1;
1475  sz -= mid_idx + 1;
1476  lo = val + 1;
1477  } // for sz
1478 }
1479 
1480 // ----------------------------------------------------------------------
1481 
1482 template<class TDecoder>
1484  bm::gap_word_t lo,
1486 {
1487  for (;sz;)
1488  {
1489  BM_ASSERT(lo <= hi);
1490 
1491  unsigned val;
1492 
1493  // read the interpolated value
1494  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1495  {
1496  unsigned r = hi - lo - sz + 1;
1497  if (r)
1498  {
1499  unsigned logv = bm::bit_scan_reverse32(r+1);
1500 
1501  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1502  int64_t half_c = c >> 1; // c / 2;
1503  int64_t half_r = r >> 1; // r / 2;
1504  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1505  int64_t hi1 = half_r + half_c + 1;
1506  val = get_bits(logv);
1507  if (val <= lo1 || val >= hi1)
1508  val += (get_bits(1) << logv);
1509  BM_ASSERT(val <= r);
1510  }
1511  else
1512  {
1513  val = 0;
1514  }
1515  }
1516 
1517  unsigned mid_idx = sz >> 1;
1518  val += lo + mid_idx;
1519  arr[mid_idx] = bm::gap_word_t(val);
1520  if (sz == 1)
1521  return;
1522 
1523  bic_decode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1524  // tail recursive call:
1525  // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1526  arr += mid_idx + 1;
1527  sz -= mid_idx + 1;
1528  lo = bm::gap_word_t(val + 1);
1529  } // for sz
1530 }
1531 
1532 // ----------------------------------------------------------------------
1533 
1534 template<class TDecoder>
1536  bm::gap_word_t lo,
1538 {
1539  for (;sz;)
1540  {
1541  BM_ASSERT(lo <= hi);
1542 
1543  unsigned val;
1544 
1545  // read the interpolated value
1546  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1547  {
1548  unsigned r = hi - lo - sz + 1;
1549  if (r)
1550  {
1551  unsigned logv = bm::bit_scan_reverse32(r+1);
1552 
1553  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1554  int64_t half_c = c >> 1; // c / 2;
1555  int64_t half_r = r >> 1; // r / 2;
1556  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1557  int64_t hi1 = half_r + half_c + 1;
1558  val = get_bits(logv);
1559  if (val <= lo1 || val >= hi1)
1560  val += (get_bits(1) << logv);
1561  BM_ASSERT(val <= r);
1562  }
1563  else
1564  {
1565  val = 0;
1566  }
1567  }
1568 
1569  unsigned mid_idx = sz >> 1;
1570  val += lo + mid_idx;
1571 
1572  // set bit in the target block
1573  {
1574  unsigned nword = (val >> bm::set_word_shift);
1575  block[nword] |= (1u << (val & bm::set_word_mask));
1576  }
1577 
1578  if (sz == 1)
1579  return;
1580 
1581  bic_decode_u16_cm_bitset(block, mid_idx, lo, bm::gap_word_t(val-1));
1582  // tail recursive call:
1583  // bic_decode_u32_cm(block, sz - mid_idx - 1, val + 1, hi);
1584  sz -= mid_idx + 1;
1585  lo = bm::gap_word_t(val + 1);
1586  } // for sz
1587 }
1588 
1589 // ----------------------------------------------------------------------
1590 
1591 template<class TDecoder>
1593  bm::gap_word_t lo,
1595 {
1596  for (;sz;)
1597  {
1598  BM_ASSERT(lo <= hi);
1599 
1600  unsigned val;
1601 
1602  // read the interpolated value
1603  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1604  {
1605  unsigned r = hi - lo - sz + 1;
1606  if (r)
1607  {
1608  unsigned logv = bm::bit_scan_reverse32(r+1);
1609 
1610  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1611  int64_t half_c = c >> 1; // c / 2;
1612  int64_t half_r = r >> 1; // r / 2;
1613  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1614  int64_t hi1 = half_r + half_c + 1;
1615  val = get_bits(logv);
1616  if (val <= lo1 || val >= hi1)
1617  val += (get_bits(1) << logv);
1618  BM_ASSERT(val <= r);
1619  }
1620  else
1621  {
1622  val = 0;
1623  }
1624  }
1625 
1626  unsigned mid_idx = sz >> 1;
1627  val += lo + mid_idx;
1628 
1629  if (sz == 1)
1630  return;
1631 
1632  bic_decode_u16_cm_dry(mid_idx, lo, bm::gap_word_t(val-1));
1633  // tail recursive call:
1634  // bic_decode_u32_cm_dry(sz - mid_idx - 1, val + 1, hi);
1635  sz -= mid_idx + 1;
1636  lo = bm::gap_word_t(val + 1);
1637  } // for sz
1638 }
1639 
1640 
1641 // ----------------------------------------------------------------------
1642 
1643 template<class TDecoder>
1645  bm::gap_word_t lo,
1647 {
1648  for (;sz;)
1649  {
1650  BM_ASSERT(lo <= hi);
1651 
1652  unsigned val;
1653  // read the value
1654  {
1655  unsigned r = hi - lo - sz + 1;
1656  if (r)
1657  {
1658  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1659  val = get_bits(logv);
1660  BM_ASSERT(val <= r);
1661  }
1662  else
1663  {
1664  val = 0;
1665  }
1666  }
1667  unsigned mid_idx = sz >> 1;
1668  val += lo + mid_idx;
1669  BM_ASSERT(val < 65536);
1670  BM_ASSERT(mid_idx < 65536);
1671 
1672  // set bit in the target block
1673  {
1674  unsigned nword = (val >> bm::set_word_shift);
1675  block[nword] |= (1u << (val & bm::set_word_mask));
1676  }
1677 
1678  if (sz == 1)
1679  return;
1680  bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
1681  // tail recursion of:
1682  //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1683  sz -= mid_idx + 1;
1684  lo = bm::gap_word_t(val + 1);
1685  } // for sz
1686 }
1687 
1688 // ----------------------------------------------------------------------
1689 
1690 template<class TDecoder>
1692  bm::gap_word_t lo,
1694 {
1695  for (;sz;)
1696  {
1697  BM_ASSERT(lo <= hi);
1698 
1699  unsigned val;
1700  // read the value
1701  {
1702  unsigned r = hi - lo - sz + 1;
1703  if (r)
1704  {
1705  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1706  val = get_bits(logv);
1707  BM_ASSERT(val <= r);
1708  }
1709  else
1710  {
1711  val = 0;
1712  }
1713  }
1714  unsigned mid_idx = sz >> 1;
1715  val += lo + mid_idx;
1716  BM_ASSERT(val < 65536);
1717  BM_ASSERT(mid_idx < 65536);
1718 
1719  if (sz == 1)
1720  return;
1721  bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
1722  sz -= mid_idx + 1;
1723  lo = bm::gap_word_t(val + 1);
1724  } // for sz
1725 }
1726 
1727 
1728 
1729 // ----------------------------------------------------------------------
1730 
1731 template<class TDecoder>
1733 {
1734  unsigned acc = accum_;
1735  unsigned used = used_bits_;
1736 
1737  if (used == (sizeof(acc) * 8))
1738  {
1739  acc = src_.get_32();
1740  used ^= used;
1741  }
1742  unsigned zero_bits = 0;
1743  while (true)
1744  {
1745  if (acc == 0)
1746  {
1747  zero_bits = unsigned(zero_bits +(sizeof(acc) * 8) - used);
1748  used = 0;
1749  acc = src_.get_32();
1750  continue;
1751  }
1752  unsigned first_bit_idx =
1753  #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
1754  bm::bsf_asm32(acc);
1755  #else
1756  bm::bit_scan_fwd(acc);
1757  #endif
1758  acc >>= first_bit_idx;
1759  zero_bits += first_bit_idx;
1760  used += first_bit_idx;
1761  break;
1762  } // while
1763 
1764  // eat the border bit
1765  //
1766  if (used == (sizeof(acc) * 8))
1767  {
1768  acc = src_.get_32();
1769  used = 1;
1770  }
1771  else
1772  {
1773  ++used;
1774  }
1775  acc >>= 1;
1776 
1777  // get the value
1778  unsigned current;
1779 
1780  unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1781  if (zero_bits <= free_bits)
1782  {
1783  take_accum:
1784  current =
1785  (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits);
1786  acc >>= zero_bits;
1787  used += zero_bits;
1788  goto ret;
1789  }
1790 
1791  if (used == (sizeof(acc) * 8))
1792  {
1793  acc = src_.get_32();
1794  used ^= used;
1795  goto take_accum;
1796  }
1797 
1798  // take the part
1799  current = acc;
1800  // read the next word
1801  acc = src_.get_32();
1802  used = zero_bits - free_bits;
1803  current |=
1804  ((acc & block_set_table<true>::_left[used]) << free_bits) |
1805  (1 << zero_bits);
1806 
1807  acc >>= used;
1808 ret:
1809  accum_ = acc;
1810  used_bits_ = used;
1811  return current;
1812 }
1813 
1814 // ----------------------------------------------------------------------
1815 
1816 template<class TDecoder>
1817 unsigned bit_in<TDecoder>::get_bits(unsigned count) BMNOEXCEPT
1818 {
1819  BM_ASSERT(count);
1820  const unsigned maskFF = ~0u;
1821  unsigned acc = accum_;
1822  unsigned used = used_bits_;
1823 
1824  unsigned value = 0;
1825  unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1826  if (count <= free_bits)
1827  {
1828  take_accum:
1829  value = acc & (maskFF >> (32 - count));
1830  acc >>= count;
1831  used += count;
1832  goto ret;
1833  }
1834  if (used == (sizeof(acc) * 8))
1835  {
1836  acc = src_.get_32();
1837  used = 0;
1838  goto take_accum;
1839  }
1840  value = acc;
1841  acc = src_.get_32();
1842  used = count - free_bits;
1843  value |= ((acc & (maskFF >> (32 - used))) << free_bits);
1844  acc >>= used;
1845 ret:
1846  accum_ = acc;
1847  used_bits_ = used;
1848  return value;
1849 }
1850 
1851 // ----------------------------------------------------------------------
1852 
1853 } // namespace bm
1854 
1855 #ifdef _MSVC_VER
1856 #pragma warning(pop)
1857 #endif
1858 
1859 #endif
bm::bit_in::bic_decode_u16_rg_dry
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1691
bm::bit_in::bic_decode_u16_rg
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition: encoding.h:1388
bm::bit_in::bic_decode_u32_cm
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit)
Definition: encoding.h:1431
bm::bit_in::gamma
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition: encoding.h:1732
bm::encoder::put_32
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition: encoding.h:527
bm::gamma_decoder::gamma_decoder
gamma_decoder(TBitIO &bin)
Definition: encoding.h:350
bm::decoder_little_endian::get_32_OR
bool get_32_OR(bm::word_t *w, unsigned count)
Definition: encoding.h:951
bm::decoder::get_24
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:655
bm::gamma_decoder::stop
void stop()
Stop decoding sequence.
Definition: encoding.h:360
bm::decoder::get_48
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:686
bm::decoder_base::get_pos
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition: encoding.h:101
bm::decoder_little_endian::get_64
bm::id64_t get_64()
Definition: encoding.h:915
bm::encoder::put_64
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition: encoding.h:562
bm::bit_in::bic_decode_u16_cm
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition: encoding.h:1483
bm::gamma_decoder::operator()
T operator()(void)
Decode word.
Definition: encoding.h:365
bm::decoder_little_endian
Class for decoding data from memory buffer.
Definition: encoding.h:151
bm::bit_out
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:174
bm::bit_in::bic_decode_u16_bitset
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition: encoding.h:270
bm::bit_out::bic_encode_u16_rg
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints)
Definition: encoding.h:1175
bm::encoder::put_prefixed_array_32
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition: encoding.h:392
bm::bit_out::bic_encode_u32_cm
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition: encoding.h:1210
bm::encoder::encoder
encoder(unsigned char *buf, size_t size) BMNOEXCEPT
Construction.
Definition: encoding.h:384
bm::id64_t
unsigned long long int id64_t
Definition: bmconst.h:34
bm::gamma_encoder::operator()
void operator()(T value)
Encode word.
Definition: encoding.h:333
bm::encoder::size
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition: encoding.h:485
bm::gamma_encoder::gamma_encoder
gamma_encoder(TBitIO &bout)
Definition: encoding.h:329
bm::set_word_shift
const unsigned set_word_shift
Definition: bmconst.h:71
bm::decoder_base::memcpy
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition: encoding.h:618
bm::decoder::get_32
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:668
bm::decoder_little_endian::get_16
bm::short_t get_16()
Definition: encoding.h:874
bm::gamma_decoder::start
void start()
Start encoding sequence.
Definition: encoding.h:355
bm::decoder_base::set_pos
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Definition: encoding.h:104
bm::decoder_base::get_8
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition: encoding.h:89
bm::short_t
unsigned short short_t
Definition: bmconst.h:39
bm::decoder_base
Base class for all decoding functionality.
Definition: encoding.h:83
bm::bit_in::bic_decode_u16_cm_dry
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1592
bm::decoder_little_endian::get_32_AND
void get_32_AND(bm::word_t *w, unsigned count)
Definition: encoding.h:973
bm::decoder_big_endian
decoder decoder_big_endian
Class for decoding data from memory buffer.
Definition: encoding.h:140
bm::encoder::put_8
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition: encoding.h:420
bm::bit_in
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:248
bm::decoder::get_64
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:703
bm::encoder::set_pos
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition: encoding.h:501
bm::sse2_and_arr_unal
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
AND array elements against another array (unaligned) dst &= *src.
Definition: bmsse_util.h:175
bm::decoder_little_endian::decoder_little_endian
decoder_little_endian(const unsigned char *buf)
Definition: encoding.h:868
BMNOEXCEPT
#define BMNOEXCEPT
Definition: bmdef.h:79
bm::encoder::put_16
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:430
bm::gap_word_t
unsigned short gap_word_t
Definition: bmconst.h:77
bm::bit_out::put_bits
void put_bits(unsigned value, unsigned count) BMNOEXCEPT
issue count bits out of value
Definition: encoding.h:1022
BM_ASSERT
#define BM_ASSERT
Definition: bmdef.h:130
bm::decoder_little_endian::get_24
bm::word_t get_24()
Definition: encoding.h:883
bm::block_set_table
Structure keeps all-left/right ON bits masks.
Definition: bmconst.h:342
bm::gamma_decoder
Elias Gamma decoder.
Definition: encoding.h:347
bm::bit_in::bic_decode_u16_cm_bitset
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1535
bm::bit_out::put_zero_bits
void put_zero_bits(unsigned count) BMNOEXCEPT
issue specified number of 0s
Definition: encoding.h:1073
bm::bit_out::flush
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
Definition: encoding.h:222
bm::decoder::get_32_AND
void get_32_AND(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ANDs to the destination.
Definition: encoding.h:802
bm::bit_in::bic_decode_u16_rg_bitset
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1644
bm::bit_out::bit_out
bit_out(TEncoder &dest)
Definition: encoding.h:177
bmutil.h
Bit manipulation primitives (internal)
bm::bit_scan_reverse32
unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT
Definition: bmutil.h:301
bm::bit_out::put_bit
void put_bit(unsigned value) BMNOEXCEPT
issue single bit into encode bit-stream
Definition: encoding.h:1011
bm::bit_in::get_bits
unsigned get_bits(unsigned count) BMNOEXCEPT
read number of bits out of the stream
Definition: encoding.h:1817
bm::bit_in::bic_decode_u16_dry
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition: encoding.h:275
bm::decoder_little_endian::get_32
bm::word_t get_32()
Definition: encoding.h:893
bm::encoder::put_24
void put_24(bm::word_t w) BMNOEXCEPT
Puts 24 bits word into encoding buffer.
Definition: encoding.h:511
bm::decoder_base::buf_
const unsigned char * buf_
Definition: encoding.h:106
bm::encoder::put_48
void put_48(bm::id64_t w) BMNOEXCEPT
Puts 48 bits word into encoding buffer.
Definition: encoding.h:545
BMFORCEINLINE
#define BMFORCEINLINE
Definition: bmdef.h:203
bm
Definition: bm.h:76
bm::decoder_base::decoder_base
decoder_base(const unsigned char *buf) BMNOEXCEPT
Definition: encoding.h:86
bm::encoder::get_pos
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
Definition: encoding.h:493
bm::bit_in::bit_in
bit_in(TDecoder &decoder) BMNOEXCEPT
Definition: encoding.h:251
bm::gamma_encoder
Functor for Elias Gamma encoding.
Definition: encoding.h:326
bm::decoder_base::size
size_t size() const BMNOEXCEPT
Returns size of the current decoding stream.
Definition: encoding.h:92
bm::set_word_mask
const unsigned set_word_mask
Definition: bmconst.h:72
bm::decoder::get_16
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:639
bm::word_t
unsigned int word_t
Definition: bmconst.h:38
bm::decoder::get_32_OR
bool get_32_OR(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
Definition: encoding.h:761
bm::encoder::put_prefixed_array_16
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition: encoding.h:403
bm::bit_out::put_zero_bit
void put_zero_bit() BMNOEXCEPT
issue 0 into output stream
Definition: encoding.h:1064
bm::decoder_base::start_
const unsigned char * start_
Definition: encoding.h:107
bm::decoder::decoder
decoder(const unsigned char *buf) BMNOEXCEPT
Construction.
Definition: encoding.h:630
bm::bit_out::bic_encode_u16_cm
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints) cm - "center-minimal".
Definition: encoding.h:1336
bm::encoder
Memory encoding.
Definition: encoding.h:49
bm::bit_scan_fwd
T bit_scan_fwd(T v) BMNOEXCEPT
Definition: bmutil.h:294
bm::decoder_little_endian::get_48
bm::id64_t get_48()
Definition: encoding.h:902
bm::decoder
Class for decoding data from memory buffer.
Definition: encoding.h:117
bm::decoder_base::seek
void seek(int delta) BMNOEXCEPT
change current position
Definition: encoding.h:95
bm::sse2_or_arr_unal
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
OR array elements against another array (unaligned) dst |= *src.
Definition: bmsse_util.h:342
bm::encoder::memcpy
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition: encoding.h:472
bm::bit_out::gamma
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition: encoding.h:1103
bm::bit_out::~bit_out
~bit_out()
Definition: encoding.h:181
bm::encoder::position_type
unsigned char * position_type
Definition: encoding.h:52