Created
August 29, 2019 20:20
-
-
Save alex/c628afea3b801195c27947f6a42c3534 to your computer and use it in GitHub Desktop.
Revisions
-
alex created this gist
Aug 29, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,83 @@ package foo import ( "bytes" "testing" ) var Data1 = bytes.Repeat([]byte("a"), 1) var Data10 = bytes.Repeat([]byte("a"), 10) var Data100 = bytes.Repeat([]byte("a"), 100) var Data1000 = bytes.Repeat([]byte("a"), 1000) var Data10000 = bytes.Repeat([]byte("a"), 10000) func BenchmarkLinear_1(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { linearFindFirstMultiByteChar(Data1) } } func BenchmarkVectorize_1(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { vectorizeFindFirstMultiByteChar(Data1) } } func BenchmarkLinear_10(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { linearFindFirstMultiByteChar(Data10) } } func BenchmarkVectorize_10(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { vectorizeFindFirstMultiByteChar(Data10) } } func BenchmarkLinear_100(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { linearFindFirstMultiByteChar(Data100) } } func BenchmarkVectorize_100(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { vectorizeFindFirstMultiByteChar(Data100) } } func BenchmarkLinear_1000(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { linearFindFirstMultiByteChar(Data1000) } } func BenchmarkVectorize_1000(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { vectorizeFindFirstMultiByteChar(Data1000) } } func BenchmarkLinear_10000(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { linearFindFirstMultiByteChar(Data10000) } } func BenchmarkVectorize_10000(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { vectorizeFindFirstMultiByteChar(Data10000) } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,15 @@ alex@penguin /t/yy> go test -bench=. goos: linux goarch: amd64 pkg: foo BenchmarkLinear_1-4 500000000 3.53 ns/op BenchmarkVectorize_1-4 300000000 4.33 ns/op BenchmarkLinear_10-4 200000000 9.82 ns/op BenchmarkVectorize_10-4 200000000 6.70 ns/op BenchmarkLinear_100-4 20000000 77.2 ns/op BenchmarkVectorize_100-4 100000000 14.2 ns/op BenchmarkLinear_1000-4 2000000 690 ns/op BenchmarkVectorize_1000-4 20000000 98.7 ns/op BenchmarkLinear_10000-4 200000 7754 ns/op BenchmarkVectorize_10000-4 2000000 891 ns/op PASS This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,42 @@ package foo import ( "unsafe" ) func linearFindFirstMultiByteChar(bytes []byte) int { for bytesIdx, b := range bytes { // We have a multi-byte codepoint, we need to allocate // codepointIndices if b&0x80 == 0x80 { return bytesIdx } } return len(bytes) } const wordSize = int(unsafe.Sizeof(uintptr(0))) func vectorizeFindFirstMultiByteChar(bytes []byte) int { // TODO: check if unaligned is allowed // Attempt to do a vectorized scan. if len(bytes) > wordSize { w := len(bytes) / wordSize bytesw := *(*[]uintptr)(unsafe.Pointer(&bytes)) for i := 0; i < w; i++ { if bytesw[i]&uintptr(0x8080808080808080) != 0 { return i * wordSize } } } for i := len(bytes) - (len(bytes) % wordSize); i < len(bytes); i++ { if bytes[i]&0x80 == 0x80 { return i } } return len(bytes) }