Skip to content

Converting between string and []rune takes a long time #77

Open
@bwendling

Description

@bwendling

Consider the program below. The program runs slowly. Part of it is due to encoding and decoding between strings and runes. Roughly 1860ms out of 4050ms is spent doing this:

(pprof) top
4050ms of 9270ms total (43.69%)
Dropped 87 nodes (cum <= 46.35ms)
Showing top 10 nodes out of 138 (cum >= 580ms)
      flat  flat%   sum%        cum   cum%
     630ms  6.80%  6.80%      630ms  6.80%  runtime.encoderune
     600ms  6.47% 13.27%     2250ms 24.27%  github.com/sergi/go-diff/diffmatchpatch.(*DiffMatchPatch).diffBisect
     560ms  6.04% 19.31%     1230ms 13.27%  runtime.slicerunetostring
     400ms  4.31% 23.62%      500ms  5.39%  runtime.semrelease
     360ms  3.88% 27.51%     1120ms 12.08%  runtime.pcvalue
     310ms  3.34% 30.85%      310ms  3.34%  runtime.readvarint
     310ms  3.34% 34.20%      620ms  6.69%  runtime.step
     300ms  3.24% 37.43%      300ms  3.24%  github.com/sergi/go-diff/diffmatchpatch.runesEqual
     300ms  3.24% 40.67%      410ms  4.42%  runtime.lock
     280ms  3.02% 43.69%      580ms  6.26%  github.com/sergi/go-diff/diffmatchpatch.runesIndex

It should be possible to work on one representation during the algorithm to avoid this overhead.

(Note: This has non-deterministic behavior that was reported in #75.)

package main

import (
        "flag"
        "fmt"
        "log"
        "os"
        "runtime/pprof"
        "sync"
        "sync/atomic"

        "github.com/sergi/go-diff/diffmatchpatch"
)

var (
        cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
        dmp        = diffmatchpatch.New()
)

const (
        num    = 50000
        expect = "[{1 licensed } {0 under the apache license, version 2.0 (the} {-1  #} {0 'license'); you may not use this file except in compliance } {-1 # } {0 with the license. you may obtain a copy of the license at } {-1 # # } {0 http://www.apache.org/licenses/license-2.0 } {-1 # # } {0 unless required by applicable law or agreed to in writing, } {-1 # } {0 software distributed under the license is distributed on an} {-1  #} {0 'as is'basis, without warranties or conditions of any} {-1  #} {0  kind, either express or implied. see the license for the } {-1 # } {0 specific language governing permissions and limitations} {-1  #} {0  under the license.}]"

        unknown = "under the apache license, version 2.0 (the #'license'); you may not use this file except in compliance # with the license. you may obtain a copy of the license at # # http://www.apache.org/licenses/license-2.0 # # unless required by applicable law or agreed to in writing, # software distributed under the license is distributed on an #'as is'basis, without warranties or conditions of any # kind, either express or implied. see the license for the # specific language governing permissions and limitations # under the license."

        known = "licensed under the apache license, version 2.0 (the'license'); you may not use this file except in compliance with the license. you may obtain a copy of the license at http://www.apache.org/licenses/license-2.0 unless required by applicable law or agreed to in writing, software distributed under the license is distributed on an'as is'basis, without warranties or conditions of any kind, either express or implied. see the license for the specific language governing permissions and limitations under the license."
)

func main() {
        flag.Parse()
        if *cpuprofile != "" {
                f, err := os.Create(*cpuprofile)
                if err != nil {
                        log.Fatal(err)
                }
                pprof.StartCPUProfile(f)
                defer pprof.StopCPUProfile()
        }

        var matched, missed int32
        var wg sync.WaitGroup
        wg.Add(num)
        for i := 0; i < num; i++ {
                go func(i int) {
                        defer wg.Done()
                        diffs := dmp.DiffMain(unknown, known, false)
                        s := fmt.Sprintf("%v", diffs)
                        if s != expect {
                                //fmt.Fprintf(os.Stderr, "MISMATCH(%d):\n%s\n", i, s)
                                atomic.AddInt32(&missed, 1)
                        } else {
                                atomic.AddInt32(&matched, 1)
                        }
                }(i)
        }
        wg.Wait()
        fmt.Fprintf(os.Stderr, "NUMBER MATCHING: %d\n", matched)
        fmt.Fprintf(os.Stderr, "NUMBER MISMATCHING: %d\n", missed)
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions