00001 /* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004 Free 00002 Software Foundation, Inc. 00003 00004 Based on strlen implementation by Torbjorn Granlund (tege@sics.se), 00005 with help from Dan Sahlin (dan@sics.se) and 00006 commentary by Jim Blandy (jimb@ai.mit.edu); 00007 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), 00008 and implemented by Roland McGrath (roland@ai.mit.edu). 00009 00010 NOTE: The canonical source of this file is maintained with the GNU C Library. 00011 Bugs can be reported to bug-glibc@prep.ai.mit.edu. 00012 00013 This program is free software; you can redistribute it and/or modify it 00014 under the terms of the GNU Lesser General Public License as published by the 00015 Free Software Foundation; either version 2.1, or (at your option) any 00016 later version. 00017 00018 This program is distributed in the hope that it will be useful, 00019 but WITHOUT ANY WARRANTY; without even the implied warranty of 00020 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00021 GNU Lesser General Public License for more details. 00022 00023 You should have received a copy of the GNU Lesser General Public License 00024 along with this program; if not, write to the Free Software Foundation, 00025 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 00026 00027 #ifdef HAVE_CONFIG_H 00028 # include <config.h> 00029 #endif 00030 00031 #include <string.h> 00032 00033 #include <stddef.h> 00034 00035 #if defined _LIBC 00036 # include <memcopy.h> 00037 #else 00038 # define reg_char char 00039 #endif 00040 00041 #include <limits.h> 00042 00043 #if HAVE_BP_SYM_H || defined _LIBC 00044 # include <bp-sym.h> 00045 #else 00046 # define BP_SYM(sym) sym 00047 #endif 00048 00049 #undef memchr 00050 #undef __memchr 00051 00052 /* Search no more than N bytes of S for C. */ 00053 void * 00054 __memchr (void const *s, int c_in, size_t n) 00055 { 00056 const unsigned char *char_ptr; 00057 const unsigned long int *longword_ptr; 00058 unsigned long int longword, magic_bits, charmask; 00059 unsigned reg_char c; 00060 int i; 00061 00062 c = (unsigned char) c_in; 00063 00064 /* Handle the first few characters by reading one character at a time. 00065 Do this until CHAR_PTR is aligned on a longword boundary. */ 00066 for (char_ptr = (const unsigned char *) s; 00067 n > 0 && (size_t) char_ptr % sizeof longword != 0; 00068 --n, ++char_ptr) 00069 if (*char_ptr == c) 00070 return (void *) char_ptr; 00071 00072 /* All these elucidatory comments refer to 4-byte longwords, 00073 but the theory applies equally well to any size longwords. */ 00074 00075 longword_ptr = (const unsigned long int *) char_ptr; 00076 00077 /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits 00078 the "holes." Note that there is a hole just to the left of 00079 each byte, with an extra at the end: 00080 00081 bits: 01111110 11111110 11111110 11111111 00082 bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD 00083 00084 The 1-bits make sure that carries propagate to the next 0-bit. 00085 The 0-bits provide holes for carries to fall into. */ 00086 00087 /* Set MAGIC_BITS to be this pattern of 1 and 0 bits. 00088 Set CHARMASK to be a longword, each of whose bytes is C. */ 00089 00090 magic_bits = 0xfefefefe; 00091 charmask = c | (c << 8); 00092 charmask |= charmask << 16; 00093 #if 0xffffffffU < ULONG_MAX 00094 magic_bits |= magic_bits << 32; 00095 charmask |= charmask << 32; 00096 if (8 < sizeof longword) 00097 for (i = 64; i < sizeof longword * 8; i *= 2) 00098 { 00099 magic_bits |= magic_bits << i; 00100 charmask |= charmask << i; 00101 } 00102 #endif 00103 magic_bits = (ULONG_MAX >> 1) & (magic_bits | 1); 00104 00105 /* Instead of the traditional loop which tests each character, 00106 we will test a longword at a time. The tricky part is testing 00107 if *any of the four* bytes in the longword in question are zero. */ 00108 while (n >= sizeof longword) 00109 { 00110 /* We tentatively exit the loop if adding MAGIC_BITS to 00111 LONGWORD fails to change any of the hole bits of LONGWORD. 00112 00113 1) Is this safe? Will it catch all the zero bytes? 00114 Suppose there is a byte with all zeros. Any carry bits 00115 propagating from its left will fall into the hole at its 00116 least significant bit and stop. Since there will be no 00117 carry from its most significant bit, the LSB of the 00118 byte to the left will be unchanged, and the zero will be 00119 detected. 00120 00121 2) Is this worthwhile? Will it ignore everything except 00122 zero bytes? Suppose every byte of LONGWORD has a bit set 00123 somewhere. There will be a carry into bit 8. If bit 8 00124 is set, this will carry into bit 16. If bit 8 is clear, 00125 one of bits 9-15 must be set, so there will be a carry 00126 into bit 16. Similarly, there will be a carry into bit 00127 24. If one of bits 24-30 is set, there will be a carry 00128 into bit 31, so all of the hole bits will be changed. 00129 00130 The one misfire occurs when bits 24-30 are clear and bit 00131 31 is set; in this case, the hole at bit 31 is not 00132 changed. If we had access to the processor carry flag, 00133 we could close this loophole by putting the fourth hole 00134 at bit 32! 00135 00136 So it ignores everything except 128's, when they're aligned 00137 properly. 00138 00139 3) But wait! Aren't we looking for C, not zero? 00140 Good point. So what we do is XOR LONGWORD with a longword, 00141 each of whose bytes is C. This turns each byte that is C 00142 into a zero. */ 00143 00144 longword = *longword_ptr++ ^ charmask; 00145 00146 /* Add MAGIC_BITS to LONGWORD. */ 00147 if ((((longword + magic_bits) 00148 00149 /* Set those bits that were unchanged by the addition. */ 00150 ^ ~longword) 00151 00152 /* Look at only the hole bits. If any of the hole bits 00153 are unchanged, most likely one of the bytes was a 00154 zero. */ 00155 & ~magic_bits) != 0) 00156 { 00157 /* Which of the bytes was C? If none of them were, it was 00158 a misfire; continue the search. */ 00159 00160 const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); 00161 00162 if (cp[0] == c) 00163 return (void *) cp; 00164 if (cp[1] == c) 00165 return (void *) &cp[1]; 00166 if (cp[2] == c) 00167 return (void *) &cp[2]; 00168 if (cp[3] == c) 00169 return (void *) &cp[3]; 00170 if (4 < sizeof longword && cp[4] == c) 00171 return (void *) &cp[4]; 00172 if (5 < sizeof longword && cp[5] == c) 00173 return (void *) &cp[5]; 00174 if (6 < sizeof longword && cp[6] == c) 00175 return (void *) &cp[6]; 00176 if (7 < sizeof longword && cp[7] == c) 00177 return (void *) &cp[7]; 00178 if (8 < sizeof longword) 00179 for (i = 8; i < sizeof longword; i++) 00180 if (cp[i] == c) 00181 return (void *) &cp[i]; 00182 } 00183 00184 n -= sizeof longword; 00185 } 00186 00187 char_ptr = (const unsigned char *) longword_ptr; 00188 00189 while (n-- > 0) 00190 { 00191 if (*char_ptr == c) 00192 return (void *) char_ptr; 00193 else 00194 ++char_ptr; 00195 } 00196 00197 return 0; 00198 } 00199 #ifdef weak_alias 00200 weak_alias (__memchr, BP_SYM (memchr)) 00201 #endif