Open
Description
Consider the program below. The program runs slowly. Part of it is due to encoding and decoding between strings and runes. Roughly 1860ms out of 4050ms is spent doing this:
(pprof) top
4050ms of 9270ms total (43.69%)
Dropped 87 nodes (cum <= 46.35ms)
Showing top 10 nodes out of 138 (cum >= 580ms)
flat flat% sum% cum cum%
630ms 6.80% 6.80% 630ms 6.80% runtime.encoderune
600ms 6.47% 13.27% 2250ms 24.27% github.com/sergi/go-diff/diffmatchpatch.(*DiffMatchPatch).diffBisect
560ms 6.04% 19.31% 1230ms 13.27% runtime.slicerunetostring
400ms 4.31% 23.62% 500ms 5.39% runtime.semrelease
360ms 3.88% 27.51% 1120ms 12.08% runtime.pcvalue
310ms 3.34% 30.85% 310ms 3.34% runtime.readvarint
310ms 3.34% 34.20% 620ms 6.69% runtime.step
300ms 3.24% 37.43% 300ms 3.24% github.com/sergi/go-diff/diffmatchpatch.runesEqual
300ms 3.24% 40.67% 410ms 4.42% runtime.lock
280ms 3.02% 43.69% 580ms 6.26% github.com/sergi/go-diff/diffmatchpatch.runesIndex
It should be possible to work on one representation during the algorithm to avoid this overhead.
(Note: This has non-deterministic behavior that was reported in #75.)
package main
import (
"flag"
"fmt"
"log"
"os"
"runtime/pprof"
"sync"
"sync/atomic"
"github.com/sergi/go-diff/diffmatchpatch"
)
var (
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
dmp = diffmatchpatch.New()
)
const (
num = 50000
expect = "[{1 licensed } {0 under the apache license, version 2.0 (the} {-1 #} {0 'license'); you may not use this file except in compliance } {-1 # } {0 with the license. you may obtain a copy of the license at } {-1 # # } {0 http://www.apache.org/licenses/license-2.0 } {-1 # # } {0 unless required by applicable law or agreed to in writing, } {-1 # } {0 software distributed under the license is distributed on an} {-1 #} {0 'as is'basis, without warranties or conditions of any} {-1 #} {0 kind, either express or implied. see the license for the } {-1 # } {0 specific language governing permissions and limitations} {-1 #} {0 under the license.}]"
unknown = "under the apache license, version 2.0 (the #'license'); you may not use this file except in compliance # with the license. you may obtain a copy of the license at # # http://www.apache.org/licenses/license-2.0 # # unless required by applicable law or agreed to in writing, # software distributed under the license is distributed on an #'as is'basis, without warranties or conditions of any # kind, either express or implied. see the license for the # specific language governing permissions and limitations # under the license."
known = "licensed under the apache license, version 2.0 (the'license'); you may not use this file except in compliance with the license. you may obtain a copy of the license at http://www.apache.org/licenses/license-2.0 unless required by applicable law or agreed to in writing, software distributed under the license is distributed on an'as is'basis, without warranties or conditions of any kind, either express or implied. see the license for the specific language governing permissions and limitations under the license."
)
func main() {
flag.Parse()
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
var matched, missed int32
var wg sync.WaitGroup
wg.Add(num)
for i := 0; i < num; i++ {
go func(i int) {
defer wg.Done()
diffs := dmp.DiffMain(unknown, known, false)
s := fmt.Sprintf("%v", diffs)
if s != expect {
//fmt.Fprintf(os.Stderr, "MISMATCH(%d):\n%s\n", i, s)
atomic.AddInt32(&missed, 1)
} else {
atomic.AddInt32(&matched, 1)
}
}(i)
}
wg.Wait()
fmt.Fprintf(os.Stderr, "NUMBER MATCHING: %d\n", matched)
fmt.Fprintf(os.Stderr, "NUMBER MISMATCHING: %d\n", missed)
}
Metadata
Metadata
Assignees
Labels
No labels