Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Data Structures | Directories | File List | Namespace Members | Data Fields | Globals | Examples

bmalgo.h

Go to the documentation of this file.
00001 /*
00002 Copyright(c) 2002-2005 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
00003 
00004 Permission is hereby granted, free of charge, to any person 
00005 obtaining a copy of this software and associated documentation 
00006 files (the "Software"), to deal in the Software without restriction, 
00007 including without limitation the rights to use, copy, modify, merge, 
00008 publish, distribute, sublicense, and/or sell copies of the Software, 
00009 and to permit persons to whom the Software is furnished to do so, 
00010 subject to the following conditions:
00011 
00012 The above copyright notice and this permission notice shall be included 
00013 in all copies or substantial portions of the Software.
00014 
00015 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
00016 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
00017 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
00018 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
00019 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
00020 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
00021 OTHER DEALINGS IN THE SOFTWARE.
00022 
00023 For more information please visit:  http://bmagic.sourceforge.net
00024 
00025 */
00026 
00027 #ifndef BMALGO__H__INCLUDED__
00028 #define BMALGO__H__INCLUDED__
00029 
00030 #include "bm.h"
00031 #include "bmfunc.h"
00032 #include "bmdef.h"
00033 
00034 namespace bm
00035 {
00036 
00037 /*! \defgroup setalgo Set algorithms 
00038  *  Set algorithms 
00039  *  \ingroup bmagic
00040  */
00041 
00042 /*! \defgroup distance Distance metrics 
00043  *  Algorithms to compute binary distance metrics
00044  *  \ingroup setalgo
00045  */
00046 
00047 
00048 /*! 
00049     \brief    Distance metrics codes defined for vectors A and B
00050     \ingroup  distance
00051 */
00052 enum distance_metric
00053 {
00054     COUNT_AND,       //!< (A & B).count()
00055     COUNT_XOR,       //!< (A ^ B).count()
00056     COUNT_OR,        //!< (A | B).count()
00057     COUNT_SUB_AB,    //!< (A - B).count()
00058     COUNT_SUB_BA,    //!< (B - A).count()
00059     COUNT_A,         //!< A.count()
00060     COUNT_B          //!< B.count()
00061 };
00062 
00063 /*! 
00064     \brief Distance metric descriptor, holds metric code and result.
00065     \sa distance_operation
00066 */
00067 
00068 struct distance_metric_descriptor
00069 {
00070      distance_metric   metric;
00071      bm::id_t          result;
00072      
00073      distance_metric_descriptor(distance_metric m)
00074      : metric(m),
00075        result(0)
00076     {}
00077     distance_metric_descriptor()
00078     : metric(bm::COUNT_XOR),
00079       result(0)
00080     {}
00081     
00082     /*! 
00083         \brief Sets metric result to 0
00084     */
00085     void reset()
00086     {
00087         result = 0;
00088     }
00089 };
00090 
00091 
00092 /*!
00093     \brief Internal function computes different distance metrics.
00094     \internal 
00095     \ingroup  distance
00096      
00097 */
00098 inline
00099 void combine_count_operation_with_block(const bm::word_t* blk,
00100                                         unsigned gap,
00101                                         const bm::word_t* arg_blk,
00102                                         int arg_gap,
00103                                         bm::word_t* temp_blk,
00104                                         distance_metric_descriptor* dmit,
00105                                         distance_metric_descriptor* dmit_end)
00106                                             
00107 {
00108      gap_word_t* res=0;
00109      
00110      gap_word_t* g1 = BMGAP_PTR(blk);
00111      gap_word_t* g2 = BMGAP_PTR(arg_blk);
00112      
00113      if (gap) // first block GAP-type
00114      {
00115          if (arg_gap)  // both blocks GAP-type
00116          {
00117              gap_word_t tmp_buf[bm::gap_max_buff_len * 3]; // temporary result
00118              
00119              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00120              {
00121                  distance_metric_descriptor& dmd = *it;
00122                  
00123                  switch (dmd.metric)
00124                  {
00125                  case bm::COUNT_AND:
00126                      res = gap_operation_and(g1, g2, tmp_buf);
00127                      break;
00128                  case bm::COUNT_OR:
00129                      res = gap_operation_or(g1, g2, tmp_buf);
00130                      break;
00131                  case bm::COUNT_SUB_AB:
00132                      res = gap_operation_sub(g1, g2, tmp_buf); 
00133                      break;
00134                  case bm::COUNT_SUB_BA:
00135                      res = gap_operation_sub(g2, g1, tmp_buf); 
00136                      break;
00137                  case bm::COUNT_XOR:
00138                      res = gap_operation_xor(g1, g2, tmp_buf); 
00139                     break;
00140                  case bm::COUNT_A:
00141                     res = g1;
00142                     break;
00143                  case bm::COUNT_B:
00144                     res = g2;
00145                     break;
00146                  } // switch
00147                  
00148                  if (res)
00149                      dmd.result += gap_bit_count(res);
00150                     
00151              } // for it
00152              
00153              return;
00154 
00155          }
00156          else // first block - GAP, argument - BITSET
00157          {
00158              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00159              {
00160                  distance_metric_descriptor& dmd = *it;
00161                  
00162                  switch (dmd.metric)
00163                  {
00164                  case bm::COUNT_AND:
00165                      if (arg_blk)
00166                         dmd.result += gap_bitset_and_count(arg_blk, g1);
00167                      break;
00168                  case bm::COUNT_OR:
00169                      if (!arg_blk)
00170                         dmd.result += gap_bit_count(g1);
00171                      else
00172                         dmd.result += gap_bitset_or_count(arg_blk, g1); 
00173                      break;
00174                  case bm::COUNT_SUB_AB:
00175                      gap_convert_to_bitset((bm::word_t*) temp_blk, g1);
00176                      dmd.result += 
00177                        bit_operation_sub_count((bm::word_t*)temp_blk, 
00178                           ((bm::word_t*)temp_blk) + bm::set_block_size,
00179                            arg_blk);
00180                  
00181                      break;
00182                  case bm::COUNT_SUB_BA:
00183                      dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
00184                      combine_count_operation_with_block(arg_blk,
00185                                                         arg_gap,
00186                                                         blk,
00187                                                         gap,
00188                                                         temp_blk,
00189                                                         it, it+1);
00190                      dmd.metric = bm::COUNT_SUB_BA; // restore status quo
00191                      break;
00192                  case bm::COUNT_XOR:
00193                      if (!arg_blk)
00194                         dmd.result += gap_bit_count(g1);
00195                      else
00196                         dmd.result += gap_bitset_xor_count(arg_blk, g1);
00197                      break;
00198                  case bm::COUNT_A:
00199                     if (g1)
00200                         dmd.result += gap_bit_count(g1);
00201                     break;
00202                  case bm::COUNT_B:
00203                     if (arg_blk)
00204                     {
00205                         dmd.result += 
00206                           bit_block_calc_count(arg_blk, 
00207                                                arg_blk + bm::set_block_size);
00208                     }
00209                     break;
00210                  } // switch
00211                                      
00212              } // for it
00213              
00214              return;
00215          
00216          }
00217      } 
00218      else // first block is BITSET-type
00219      {     
00220          if (arg_gap) // second argument block is GAP-type
00221          {
00222              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00223              {
00224                  distance_metric_descriptor& dmd = *it;
00225                  
00226                  switch (dmd.metric)
00227                  {
00228                  case bm::COUNT_AND:
00229                      if (blk) 
00230                         dmd.result += gap_bitset_and_count(blk, g2);                         
00231                      break;
00232                  case bm::COUNT_OR:
00233                      if (!blk)
00234                         dmd.result += gap_bit_count(g2);
00235                      else
00236                         dmd.result += gap_bitset_or_count(blk, g2);
00237                      break;
00238                  case bm::COUNT_SUB_AB:
00239                      if (blk)
00240                         dmd.result += gap_bitset_sub_count(blk, g2);
00241                      break;
00242                  case bm::COUNT_SUB_BA:
00243                      dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
00244                      combine_count_operation_with_block(arg_blk,
00245                                                         arg_gap,
00246                                                         blk,
00247                                                         gap,
00248                                                         temp_blk,
00249                                                         it, it+1);
00250                      dmd.metric = bm::COUNT_SUB_BA; // restore status quo
00251                      break;
00252                  case bm::COUNT_XOR:
00253                      if (!blk)
00254                         dmd.result += gap_bit_count(g2);
00255                      else
00256                         dmd.result += gap_bitset_xor_count(blk, g2); 
00257                     break;
00258                  case bm::COUNT_A:
00259                     if (blk)
00260                     {
00261                         dmd.result += 
00262                             bit_block_calc_count(blk, 
00263                                                  blk + bm::set_block_size);
00264                     }
00265                     break;
00266                  case bm::COUNT_B:
00267                     if (g2)
00268                         dmd.result += gap_bit_count(g2);
00269                     break;
00270                  } // switch
00271                                      
00272              } // for it
00273              
00274              return;
00275          }
00276      }
00277 
00278      // --------------------------------------------
00279      //
00280      // Here we combine two plain bitblocks 
00281 
00282      const bm::word_t* blk_end;
00283      const bm::word_t* arg_end;
00284 
00285      blk_end = blk + (bm::set_block_size);
00286      arg_end = arg_blk + (bm::set_block_size);
00287 
00288 
00289      for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00290      {
00291          distance_metric_descriptor& dmd = *it;
00292 
00293          switch (dmd.metric)
00294          {
00295          case bm::COUNT_AND:
00296              dmd.result += 
00297                 bit_operation_and_count(blk, blk_end, arg_blk);
00298              break;
00299          case bm::COUNT_OR:
00300              dmd.result += 
00301                 bit_operation_or_count(blk, blk_end, arg_blk);
00302              break;
00303          case bm::COUNT_SUB_AB:
00304              dmd.result += 
00305                 bit_operation_sub_count(blk, blk_end, arg_blk);
00306              break;
00307          case bm::COUNT_SUB_BA:
00308              dmd.result += 
00309                 bit_operation_sub_count(arg_blk, arg_end, blk);
00310              break;
00311          case bm::COUNT_XOR:
00312              dmd.result += 
00313                 bit_operation_xor_count(blk, blk_end, arg_blk);
00314              break;
00315          case bm::COUNT_A:
00316             if (blk)
00317                 dmd.result += bit_block_calc_count(blk, blk_end);
00318             break;
00319          case bm::COUNT_B:
00320             if (arg_blk)
00321                 dmd.result += bit_block_calc_count(arg_blk, arg_end);
00322             break;
00323          } // switch
00324 
00325      } // for it
00326      
00327      
00328 
00329 }
00330 
00331 /*!
00332     \brief Distance computing template function.
00333 
00334     Function receives two bitvectors and an array of distance metrics
00335     (metrics pipeline). Function computes all metrics saves result into
00336     corresponding pipeline results (distance_metric_descriptor::result)
00337     An important detail is that function reuses metric descriptors, 
00338     incrementing received values. It allows you to accumulate results 
00339     from different calls in the pipeline.
00340     
00341     \param bv1      - argument bitvector 1 (A)
00342     \param bv2      - argument bitvector 2 (B)
00343     \param dmit     - pointer to first element of metric descriptors array
00344                       Input-Output parameter, receives metric code as input,
00345                       computation is added to "result" field
00346     \param dmit_end - pointer to (last+1) element of metric descriptors array
00347     \ingroup  distance
00348     
00349 */
00350 
00351 template<class BV>
00352 void distance_operation(const BV& bv1, 
00353                         const BV& bv2, 
00354                         distance_metric_descriptor* dmit,
00355                         distance_metric_descriptor* dmit_end)
00356 {
00357     const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
00358     const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
00359     
00360     bm::word_t* temp_blk = 0;
00361     
00362     {
00363         for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00364         {
00365             if (it->metric == bm::COUNT_SUB_AB || 
00366                 it->metric == bm::COUNT_SUB_BA)
00367             {
00368                 temp_blk = bv1.allocate_tempblock();
00369                 break;
00370             }
00371         }
00372     }
00373     
00374   
00375     bm::word_t*** blk_root = bman1.get_rootblock();
00376     unsigned block_idx = 0;
00377     unsigned i, j;
00378     
00379     const bm::word_t* blk;
00380     const bm::word_t* arg_blk;
00381     bool  blk_gap;
00382     bool  arg_gap;
00383 
00384     BM_SET_MMX_GUARD
00385 
00386     for (i = 0; i < bman1.top_block_size(); ++i)
00387     {
00388         bm::word_t** blk_blk = blk_root[i];
00389 
00390         if (blk_blk == 0) // not allocated
00391         {
00392             const bm::word_t* const* bvbb = bman2.get_topblock(i);
00393             if (bvbb == 0) 
00394             {
00395                 block_idx += bm::set_array_size;
00396                 continue;
00397             }
00398 
00399             blk = 0;
00400             blk_gap = false;
00401 
00402             for (j = 0; j < bm::set_array_size; ++j,++block_idx)
00403             {                
00404                 arg_blk = bman2.get_block(i, j);
00405                 arg_gap = BM_IS_GAP(bman2, arg_blk, block_idx);
00406 
00407                 if (!arg_blk) 
00408                     continue;
00409                 combine_count_operation_with_block(blk, blk_gap,
00410                                                    arg_blk, arg_gap,
00411                                                    temp_blk,
00412                                                    dmit, dmit_end);
00413             } // for j
00414             continue;
00415         }
00416 
00417         for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
00418         {
00419             blk = blk_blk[j];
00420             arg_blk = bman2.get_block(i, j);
00421 
00422             if (blk == 0 && arg_blk == 0)
00423                 continue;
00424                 
00425             arg_gap = BM_IS_GAP(bman2, arg_blk, block_idx);
00426             blk_gap = BM_IS_GAP(bman1, blk, block_idx);
00427             
00428             combine_count_operation_with_block(blk, blk_gap,
00429                                                arg_blk, arg_gap,
00430                                                temp_blk,
00431                                                dmit, dmit_end);
00432             
00433 
00434         } // for j
00435 
00436     } // for i
00437     
00438     if (temp_blk)
00439     {
00440         bv1.free_tempblock(temp_blk);
00441     }
00442 
00443 }
00444 
00445 
00446 
00447 /*!
00448    \brief Computes bitcount of AND operation of two bitsets
00449    \param bv1 - Argument bit-vector.
00450    \param bv2 - Argument bit-vector.
00451    \return bitcount of the result
00452    \ingroup  distance
00453 */
00454 template<class BV>
00455 bm::id_t count_and(const BV& bv1, const BV& bv2)
00456 {
00457     distance_metric_descriptor dmd(bm::COUNT_AND);
00458     
00459     distance_operation(bv1, bv2, &dmd, &dmd+1);
00460     return dmd.result;
00461 }
00462 
00463 
00464 /*!
00465    \brief Computes bitcount of XOR operation of two bitsets
00466    \param bv1 - Argument bit-vector.
00467    \param bv2 - Argument bit-vector.
00468    \return bitcount of the result
00469    \ingroup  distance
00470 */
00471 template<class BV>
00472 bm::id_t count_xor(const BV& bv1, const BV& bv2)
00473 {
00474     distance_metric_descriptor dmd(bm::COUNT_XOR);
00475     
00476     distance_operation(bv1, bv2, &dmd, &dmd+1);
00477     return dmd.result;
00478 }
00479 
00480 
00481 /*!
00482    \brief Computes bitcount of SUB operation of two bitsets
00483    \param bv1 - Argument bit-vector.
00484    \param bv2 - Argument bit-vector.
00485    \return bitcount of the result
00486    \ingroup  distance
00487 */
00488 template<class BV>
00489 bm::id_t count_sub(const BV& bv1, const BV& bv2)
00490 {
00491     distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
00492     
00493     distance_operation(bv1, bv2, &dmd, &dmd+1);
00494     return dmd.result;
00495 }
00496 
00497 
00498 /*!    
00499    \brief Computes bitcount of OR operation of two bitsets
00500    \param bv1 - Argument bit-vector.
00501    \param bv2 - Argument bit-vector.
00502    \return bitcount of the result
00503    \ingroup  distance
00504 */
00505 template<class BV>
00506 bm::id_t count_or(const BV& bv1, const BV& bv2)
00507 {
00508     distance_metric_descriptor dmd(bm::COUNT_OR);
00509     
00510     distance_operation(bv1, bv2, &dmd, &dmd+1);
00511     return dmd.result;
00512 }
00513 
00514 
00515 /**
00516     \brief Internal algorithms scans the input for the block range limit
00517     \internal
00518 */
00519 template<class It>
00520 It block_range_scan(It  first, It last, unsigned nblock, unsigned* max_id)
00521 {
00522     It right;
00523     for (right = first; right != last; ++right)
00524     {
00525         unsigned v = unsigned(*right);
00526         BM_ASSERT(v < bm::id_max);
00527         if (v >= *max_id)
00528             *max_id = v;
00529         unsigned nb = v >> bm::set_block_shift;
00530         if (nb != nblock)
00531             break;
00532     }
00533     return right;
00534 }
00535 
00536 /**
00537     \brief OR Combine bitvector and the iterable sequence 
00538 
00539     Algorithm combines bvector with sequence of integers 
00540     (represents another set). When the agrument set is sorted 
00541     this method can give serious increase in performance.
00542     
00543     \param bv     - destination bitvector
00544     \param first  - first element of the iterated sequence
00545     \param last   - last element of the iterated sequence
00546     
00547     \ingroup setalgo
00548 */
00549 template<class BV, class It>
00550 void combine_or(BV& bv, It  first, It last)
00551 {
00552     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
00553     unsigned max_id = bv.size();
00554 
00555     while (first < last)
00556     {
00557         unsigned nblock = unsigned((*first) >> bm::set_block_shift);     
00558         It right = block_range_scan(first, last, nblock, &max_id);
00559 
00560         if (max_id >= bv.size())
00561             bv.resize(max_id + 1);
00562 
00563         // now we have one in-block array of bits to set
00564         
00565         label1:
00566         
00567         int block_type;
00568         bm::word_t* blk = 
00569             bman.check_allocate_block(nblock, 
00570                                       true, 
00571                                       bv.get_new_blocks_strat(), 
00572                                       &block_type);
00573         if (!blk) 
00574             continue;
00575                         
00576         if (block_type == 1) // gap
00577         {            
00578             bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
00579             unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
00580             
00581             for (; first < right; ++first)
00582             {
00583                 unsigned is_set;
00584                 unsigned nbit   = (*first) & bm::set_block_mask; 
00585                 
00586                 unsigned new_block_len =
00587                     gap_set_value(true, gap_blk, nbit, &is_set);
00588                 if (new_block_len > threshold) 
00589                 {
00590                     bman.extend_gap_block(nblock, gap_blk);
00591                     ++first;
00592                     goto label1;  // block pointer changed - goto reset
00593                 }
00594             }
00595         }
00596         else // bit block
00597         {
00598             for (; first < right; ++first)
00599             {
00600                 unsigned nbit   = unsigned(*first & bm::set_block_mask); 
00601                 unsigned nword  = unsigned(nbit >> bm::set_word_shift); 
00602                 nbit &= bm::set_word_mask;
00603                 blk[nword] |= (bm::word_t)1 << nbit;
00604             } // for
00605         }
00606     } // while
00607     
00608     bv.forget_count();
00609 }
00610 
00611 
00612 /**
00613     \brief XOR Combine bitvector and the iterable sequence 
00614 
00615     Algorithm combines bvector with sequence of integers 
00616     (represents another set). When the agrument set is sorted 
00617     this method can give serious increase in performance.
00618     
00619     \param bv     - destination bitvector
00620     \param first  - first element of the iterated sequence
00621     \param last   - last element of the iterated sequence
00622     
00623     \ingroup setalgo
00624 */
00625 template<class BV, class It>
00626 void combine_xor(BV& bv, It  first, It last)
00627 {
00628     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
00629     unsigned max_id = bv.size();
00630 
00631     while (first < last)
00632     {
00633         unsigned nblock = unsigned((*first) >> bm::set_block_shift);     
00634         It right = block_range_scan(first, last, nblock, &max_id);
00635 
00636         if (max_id >= bv.size())
00637             bv.resize(max_id + 1);
00638 
00639         // now we have one in-block array of bits to set
00640         
00641         label1:
00642         
00643         int block_type;
00644         bm::word_t* blk = 
00645             bman.check_allocate_block(nblock, 
00646                                       true, 
00647                                       bv.get_new_blocks_strat(), 
00648                                       &block_type,
00649                                       false /* no null return */);
00650         BM_ASSERT(blk); 
00651                         
00652         if (block_type == 1) // gap
00653         {
00654             bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
00655             unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
00656             
00657             for (; first < right; ++first)
00658             {
00659                 unsigned is_set;
00660                 unsigned nbit   = (*first) & bm::set_block_mask; 
00661                 
00662                 is_set = gap_test(gap_blk, nbit);
00663                 BM_ASSERT(is_set <= 1);
00664                 is_set ^= 1; 
00665                 
00666                 unsigned new_block_len =
00667                     gap_set_value(is_set, gap_blk, nbit, &is_set);
00668                 if (new_block_len > threshold) 
00669                 {
00670                     bman.extend_gap_block(nblock, gap_blk);
00671                     ++first;
00672                     goto label1;  // block pointer changed - goto reset
00673                 }
00674             }
00675         }
00676         else // bit block
00677         {
00678             for (; first < right; ++first)
00679             {
00680                 unsigned nbit   = unsigned(*first & bm::set_block_mask); 
00681                 unsigned nword  = unsigned(nbit >> bm::set_word_shift); 
00682                 nbit &= bm::set_word_mask;
00683                 blk[nword] ^= (bm::word_t)1 << nbit;
00684             } // for
00685         }
00686     } // while
00687     
00688     bv.forget_count();
00689 }
00690 
00691 
00692 
00693 /**
00694     \brief SUB Combine bitvector and the iterable sequence 
00695 
00696     Algorithm combines bvector with sequence of integers 
00697     (represents another set). When the agrument set is sorted 
00698     this method can give serious increase in performance.
00699     
00700     \param bv     - destination bitvector
00701     \param first  - first element of the iterated sequence
00702     \param last   - last element of the iterated sequence
00703     
00704     \ingroup setalgo
00705 */
00706 template<class BV, class It>
00707 void combine_sub(BV& bv, It  first, It last)
00708 {
00709     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
00710     unsigned max_id = bv.size();
00711 
00712     while (first < last)
00713     {
00714         unsigned nblock = unsigned((*first) >> bm::set_block_shift);     
00715         It right = block_range_scan(first, last, nblock, &max_id);
00716 
00717         if (max_id >= bv.size())
00718             bv.resize(max_id + 1);
00719 
00720         // now we have one in-block array of bits to set
00721         
00722         label1:
00723         
00724         int block_type;
00725         bm::word_t* blk = 
00726             bman.check_allocate_block(nblock, 
00727                                       false, 
00728                                       bv.get_new_blocks_strat(), 
00729                                       &block_type);
00730 
00731         if (!blk)
00732             continue;
00733                         
00734         if (block_type == 1) // gap
00735         {
00736             bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
00737             unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
00738             
00739             for (; first < right; ++first)
00740             {
00741                 unsigned is_set;
00742                 unsigned nbit   = (*first) & bm::set_block_mask; 
00743                 
00744                 is_set = gap_test(gap_blk, nbit);
00745                 if (!is_set)
00746                     continue;
00747                 
00748                 unsigned new_block_len =
00749                     gap_set_value(false, gap_blk, nbit, &is_set);
00750                 if (new_block_len > threshold) 
00751                 {
00752                     bman.extend_gap_block(nblock, gap_blk);
00753                     ++first;
00754                     goto label1;  // block pointer changed - goto reset
00755                 }
00756             }
00757         }
00758         else // bit block
00759         {
00760             for (; first < right; ++first)
00761             {
00762                 unsigned nbit   = unsigned(*first & bm::set_block_mask); 
00763                 unsigned nword  = unsigned(nbit >> bm::set_word_shift); 
00764                 nbit &= bm::set_word_mask;
00765                 blk[nword] &= ~((bm::word_t)1 << nbit);
00766             } // for
00767         }
00768     } // while
00769     
00770     bv.forget_count();
00771 }
00772 
00773 /**
00774     \brief AND Combine bitvector and the iterable sequence 
00775 
00776     Algorithm combines bvector with sequence of integers 
00777     (represents another set). When the agrument set is sorted 
00778     this method can give serious increase in performance.
00779     
00780     \param bv     - destination bitvector
00781     \param first  - first element of the iterated sequence
00782     \param last   - last element of the iterated sequence
00783     
00784     \ingroup setalgo
00785 */
00786 template<class BV, class It>
00787 void combine_and(BV& bv, It  first, It last)
00788 {
00789     BV bv_tmp;
00790     combine_or(bv_tmp, first, last);
00791     bv &= bv_tmp;
00792 }
00793 
00794 /*!
00795     \brief Compute number of bit intervals (GAPs) in the bitvector
00796     
00797     Algorithm traverses bit vector and count number of uninterrupted
00798     intervals of 1s and 0s.
00799     <pre>
00800     For example: 
00801       00001111100000 - gives us 3 intervals
00802       10001111100000 - 4 intervals
00803       00000000000000 - 1 interval
00804       11111111111111 - 1 interval
00805     </pre>
00806     \return Number of intervals
00807     \ingroup setalgo
00808 */
00809 template<class BV>
00810 bm::id_t count_intervals(const BV& bv)
00811 {
00812     const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
00813 
00814     bm::word_t*** blk_root = bman.get_rootblock();
00815     typename BV::blocks_manager_type::block_count_change_func func(bman);
00816     for_each_block(blk_root, bman.top_block_size(), bm::set_array_size, func);
00817 
00818     return func.count();        
00819 }
00820 
00821 /*!
00822     \brief Export bitset from an array of binary data representing
00823     the bit vector. 
00824 
00825     Input array can be array of ints, chars or other native C types.
00826     Method works with iterators, which makes it compatible with 
00827     STL containers and C arrays
00828 
00829     \param bv     - destination bitvector
00830     \param first  - first element of the iterated sequence
00831     \param last   - last element of the iterated sequence
00832 
00833     \ingroup setalgo
00834 */
00835 template<class BV, class It>
00836 void export_array(BV& bv, It first, It last)
00837 {
00838     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
00839     unsigned inp_word_size = sizeof(*first);
00840     size_t array_size = last - first;
00841     size_t bit_cnt = array_size * inp_word_size * 8;
00842     int block_type;
00843     bm::word_t tmp;
00844     unsigned b1, b2, b3, b4;
00845 
00846     if (bit_cnt >= bv.size())
00847         bv.resize((bm::id_t)bit_cnt + 1);
00848     else 
00849         bv.set_range((bm::id_t)bit_cnt, bv.size() - 1, false);
00850     
00851     switch (inp_word_size)
00852     {
00853     case 1:
00854         {
00855             size_t word_cnt = array_size / 4;
00856             for (unsigned i = 0; i < bm::set_total_blocks; ++i)
00857             {
00858                 bm::word_t* blk = 
00859                     bman.check_allocate_block(i, 
00860                                               false, 
00861                                               BM_BIT, 
00862                                               &block_type,
00863                                               false /*no NULL ret*/);
00864                 if (block_type == 1) // gap
00865                 {
00866                     blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
00867                 }
00868                 
00869                 bm::word_t* wrd_ptr = blk;
00870                 if (word_cnt >= bm::set_block_size) {
00871                     bm::word_t* wrd_end = blk + bm::set_block_size;
00872                     do {
00873                         b1 = *first++; b2 = *first++;
00874                         b3 = *first++; b4 = *first++;
00875                         tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
00876                         *wrd_ptr++ = tmp;
00877                     } while (wrd_ptr < wrd_end);
00878                     word_cnt -= bm::set_block_size;
00879                 } 
00880                 else 
00881                 {
00882                     size_t to_convert = last - first;
00883                     for (size_t j = 0; j < to_convert / 4; ++j)
00884                     {
00885                         b1 = *first++; b2 = *first++;
00886                         b3 = *first++; b4 = *first++;
00887                         tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
00888                         *wrd_ptr++ = tmp;
00889                     }
00890                     while (1)
00891                     {
00892                         if (first == last) break;
00893                         *wrd_ptr = *first++;
00894                         if (first == last) break;
00895                         *wrd_ptr |= (*first++) << 8;
00896                         if (first == last) break;
00897                         *wrd_ptr |= (*first++) << 16;
00898                         if (first == last) break;
00899                         *wrd_ptr |= (*first++) << 24;
00900                         ++wrd_ptr;
00901                     }
00902                 }
00903                 if (first == last) break;
00904             } // for
00905         }
00906         break;
00907     case 2:
00908         {
00909             size_t word_cnt = array_size / 2;
00910             for (unsigned i = 0; i < bm::set_total_blocks; ++i)
00911             {
00912                 bm::word_t* blk = 
00913                     bman.check_allocate_block(i, 
00914                                               false, 
00915                                               BM_BIT, 
00916                                               &block_type,
00917                                               false /*no NULL ret*/);
00918                 if (block_type == 1) // gap
00919                 {
00920                     blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
00921                 }
00922                 
00923                 bm::word_t* wrd_ptr = blk;
00924                 if (word_cnt >= bm::set_block_size) {
00925                     bm::word_t* wrd_end = blk + bm::set_block_size;
00926                     do {
00927                         b1 = *first++; b2 = *first++;
00928                         tmp = b1 | (b2 << 16);
00929                         *wrd_ptr++ = tmp;
00930                     } while (wrd_ptr < wrd_end);
00931                     word_cnt -= bm::set_block_size;
00932                 } 
00933                 else 
00934                 {
00935                     size_t to_convert = last - first;
00936                     for (unsigned j = 0; j < to_convert / 2; ++j)
00937                     {
00938                         b1 = *first++; b2 = *first++;
00939                         tmp = b1 | (b2 << 16);
00940                         *wrd_ptr++ = tmp;
00941                     }
00942                     while (1)
00943                     {
00944                         if (first == last) break;
00945                         *wrd_ptr = *first++;
00946                         if (first == last) break;
00947                         *wrd_ptr |= (*first++) << 16;
00948                         ++wrd_ptr;
00949                     }
00950                 }
00951                 if (first == last) break;
00952             } // for
00953         }
00954         break;
00955     case 4:
00956         {
00957             size_t word_cnt = array_size;
00958             for (unsigned i = 0; i < bm::set_total_blocks; ++i)
00959             {
00960                 bm::word_t* blk = 
00961                     bman.check_allocate_block(i, 
00962                                               false, 
00963                                               BM_BIT, 
00964                                               &block_type,
00965                                               false /*no NULL ret*/);
00966                 if (block_type == 1) // gap
00967                 {
00968                     blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
00969                 }
00970                 
00971                 bm::word_t* wrd_ptr = blk;
00972                 if (word_cnt >= bm::set_block_size) {
00973                     bm::word_t* wrd_end = blk + bm::set_block_size;
00974                     do {
00975                         *wrd_ptr++ = *first++;
00976                     } while (wrd_ptr < wrd_end);
00977                     word_cnt -= bm::set_block_size;
00978                 } 
00979                 else 
00980                 {
00981                     while (1)
00982                     {
00983                         if (first == last) break;
00984                         *wrd_ptr = *first++;
00985                         ++wrd_ptr;
00986                     }
00987                 }
00988                 if (first == last) break;
00989             } // for
00990         }
00991         break;
00992     default:
00993         BM_ASSERT(0);
00994     } // switch
00995 
00996 }
00997 
00998 
00999 } // namespace bm
01000 
01001 #include "bmundef.h"
01002 
01003 #endif

Generated on Thu Apr 20 13:28:46 2006 for BitMagic by  doxygen 1.4.1