正文
关于strlen
小程序:扫一扫查出行
【扫一扫了解最新限行尾号】
复制小程序
【扫一扫了解最新限行尾号】
复制小程序
strlen的实现是通过4个字节4个字节进行枚举,然后通过位运算来判断这4个字节中是否有一个字节含有0,这样的话,效率就提高了4倍。
这个效率提高是假设a&b&c&d与a&b有差不多效率的前提下。
那用8字节8字节来偏移的话,是不是更快呢?32位机上不会,64位机上会提高一倍。因为a&b在64位下会提高一倍,因为32位的寄存器大小是32位的,对于分别MOV高位与低位两次。
本来实验a&b&c&d与a&b的速度的,经实验验证,这两个效率确实是差不多的,然后去看汇编,看指令条数,在没有使用-O优化下,指令的条数差别跟运算符号的个数的倍数相同,就让我感到疑惑了。
下面附上实验的代码:
#include <iostream>
#include <time.h>
#include <cstdio>
#include <string>
using namespace std;int _strlen(const char *str) {
const unsigned int *p = (const unsigned int *) str;
unsigned int low = 0x01010101;
unsigned int high = 0x80808080;
while (true) {
unsigned int d = *p++;
if (((d - low) & ~d & high) != ) { // handle [0...256)
//if (((d - low) & high) != 0) { // handle [0...128)
break;
}
}
const char *q = (const char *)(p - );
for (int i = ; i < (int)sizeof(unsigned int); i++) {
if (q[i] == ) {
return q - str + i;
}
}
return -;
}int _strlen2(const char *str) {
const char *p = str;
while (*p != ) {
p++;
}
return p - str;
}int _strlen3(const char *str) {
const unsigned long long *p = (const unsigned long long *) str;
unsigned long long low = 0x0101010101010101;
unsigned long long high = 0x8080808080808080;
while (true) {
unsigned long long d = *p++;
if (((d - low) & ~d & high) != ) { // handle [0...256)
//if (((d - low) & high) != 0) { // handle [0...128)
break;
}
}
const char *q = (const char *)(p - );
for (int i = ; i < (int)sizeof(unsigned long long); i++) {
if (q[i] == ) {
return q - str + i;
}
}
return -;
}size_t _strlen4(const char *str)
{
const char *char_ptr;
const unsigned long int *longword_ptr;
unsigned long int longword, himagic, lomagic; /* Handle the first few characters by reading one character at a time.
Do this until CHAR_PTR is aligned on a longword boundary. */
for (char_ptr = str; ((unsigned long int) char_ptr
& (sizeof (longword) - )) != ;
++char_ptr)
if (*char_ptr == '\0')
return char_ptr - str; /* All these elucidatory comments refer to 4-byte longwords,
but the theory applies equally well to 8-byte longwords. */ longword_ptr = (unsigned long int *) char_ptr; /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
the "holes." Note that there is a hole just to the left of
each byte, with an extra at the end: bits: 01111110 11111110 11111110 11111111
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
himagic = 0x80808080L;
lomagic = 0x01010101L;
if (sizeof (longword) > )
{
/* 64-bit version of the magic. */
/* Do the shift in two steps to avoid a warning if long has 32 bits. */
himagic = ((himagic << ) << ) | himagic;
lomagic = ((lomagic << ) << ) | lomagic;
}
/*j
if (sizeof (longword) > 8)
abort ();
*/ /* Instead of the traditional loop which tests each character,
we will test a longword at a time. The tricky part is testing
if *any of the four* bytes in the longword in question are zero. */
for (;;)
{
longword = *longword_ptr++; if (((longword - lomagic) & ~longword & himagic) != )
{
/* Which of the bytes was the zero? If none of them were, it was
a misfire; continue the search. */ const char *cp = (const char *) (longword_ptr - ); if (cp[] == )
return cp - str;
if (cp[] == )
return cp - str + ;
if (cp[] == )
return cp - str + ;
if (cp[] == )
return cp - str + ;
if (sizeof (longword) > )
{
if (cp[] == )
return cp - str + ;
if (cp[] == )
return cp - str + ;
if (cp[] == )
return cp - str + ;
if (cp[] == )
return cp - str + ;
}
}
}
}string gen_data() {
string a;
for (int i = ; i < ; i++) {
a.push_back('a');
}
return a;
}double get_run_time(int(*fp)(const char *), const char *str, int count) {
clock_t start = clock();
for (int i = ; i < count; i++) {
fp(str);
}
clock_t end = clock();
return (double)(end - start) / CLOCKS_PER_SEC;
}double get_run_time(size_t(*fp)(const char *), const char *str, int count) {
clock_t start = clock();
for (int i = ; i < count; i++) {
fp(str);
}
clock_t end = clock();
return (double)(end - start) / CLOCKS_PER_SEC;
}int main() {
string a = gen_data();
printf("%d\n", _strlen(a.c_str()));
printf("%d\n", _strlen2(a.c_str()));
printf("%d\n", _strlen3(a.c_str()));
printf("%d\n", (int)strlen(a.c_str()));
double time = get_run_time(&_strlen, a.c_str(), );
printf("%f\n", time);
double time2 = get_run_time(&_strlen2, a.c_str(), );
printf("%f\n", time2);
double time3 = get_run_time(&_strlen3, a.c_str(), );
printf("%f\n", time3);
double time4 = get_run_time(&strlen, a.c_str(), );
printf("%f\n", time4);
double time5 = get_run_time(&_strlen4, a.c_str(), );
printf("%f\n", time5);
}