Merge pull request #3253 from weaveworks/optimise-merge-subset

Optimise merge where one side is a subset of the other
This commit is contained in:
Bryan Boreham
2018-07-13 08:11:16 +01:00
committed by GitHub
11 changed files with 244 additions and 73 deletions

View File

@@ -2,6 +2,7 @@ package app
import (
"flag"
"math/rand"
"net/http"
"net/url"
"os"
@@ -65,6 +66,9 @@ func BenchmarkReportUpgrade(b *testing.B) {
func BenchmarkReportMerge(b *testing.B) {
reports := upgradeReports(readReportFiles(b, *benchReportPath))
rand.Shuffle(len(reports), func(i, j int) {
reports[i], reports[j] = reports[j], reports[i]
})
merger := NewFastMerger()
b.ResetTimer()
for i := 0; i < b.N; i++ {

View File

@@ -74,27 +74,54 @@ function generate_latest_map() {
return len(m)
}
// Merge produces a fresh ${latest_map_type} containing the keys from both inputs.
// Merge produces a ${latest_map_type} containing the keys from both inputs.
// When both inputs contain the same key, the newer value is used.
// Tries to return one of its inputs, if that already holds the correct result.
func (m ${latest_map_type}) Merge(n ${latest_map_type}) ${latest_map_type} {
switch {
case m == nil:
case len(m) == 0:
return n
case n == nil:
case len(n) == 0:
return m
}
l := len(m)
if len(n) > l {
l = len(n)
if len(n) > len(m) {
m, n = n, m //swap so m is always at least as long as n
} else if len(n) == len(m) && m[0].Timestamp.Before(n[0].Timestamp) {
// Optimise common case where we merge two nodes with the same contents
// sampled at different times.
m, n = n, m // swap equal-length arrays so first element of m is newer
}
out := make([]${entry_type}, 0, l)
i, j := 0, 0
loop:
for i < len(m) {
switch {
case j >= len(n) || m[i].key < n[j].key:
out = append(out, m[i])
case j >= len(n):
return m
case m[i].key == n[j].key:
if m[i].Timestamp.Before(n[j].Timestamp) {
break loop
}
i++
j++
case m[i].key < n[j].key:
i++
default:
break loop
}
}
if i >= len(m) && j >= len(n) {
return m
}
out := make([]${entry_type}, i, len(m))
copy(out, m[:i])
for i < len(m) {
switch {
case j >= len(n):
out = append(out, m[i:]...)
return out
case m[i].key == n[j].key:
if m[i].Timestamp.Before(n[j].Timestamp) {
out = append(out, n[j])
@@ -103,6 +130,9 @@ function generate_latest_map() {
}
i++
j++
case m[i].key < n[j].key:
out = append(out, m[i])
i++
default:
out = append(out, n[j])
j++

View File

@@ -27,10 +27,12 @@ func (r DNSRecords) Merge(other DNSRecords) DNSRecords {
cp := r.Copy()
for k, v := range other {
if v2, ok := cp[k]; ok {
cp[k] = DNSRecord{
Forward: v.Forward.Merge(v2.Forward),
Reverse: v.Reverse.Merge(v2.Reverse),
fMerged, fUnchanged := v.Forward.Merge(v2.Forward)
rMerged, rUnchanged := v.Reverse.Merge(v2.Reverse)
if fUnchanged && rUnchanged {
continue
}
cp[k] = DNSRecord{Forward: fMerged, Reverse: rMerged}
} else {
cp[k] = v
}

View File

@@ -23,7 +23,8 @@ func (a IDList) Add(ids ...string) IDList {
// Merge all elements from a and b into a new list
func (a IDList) Merge(b IDList) IDList {
return IDList(StringSet(a).Merge(StringSet(b)))
merged, _ := StringSet(a).Merge(StringSet(b))
return IDList(merged)
}
// Contains returns true if id is in the list.

View File

@@ -42,27 +42,54 @@ func (m StringLatestMap) Size() int {
return len(m)
}
// Merge produces a fresh StringLatestMap containing the keys from both inputs.
// Merge produces a StringLatestMap containing the keys from both inputs.
// When both inputs contain the same key, the newer value is used.
// Tries to return one of its inputs, if that already holds the correct result.
func (m StringLatestMap) Merge(n StringLatestMap) StringLatestMap {
switch {
case m == nil:
case len(m) == 0:
return n
case n == nil:
case len(n) == 0:
return m
}
l := len(m)
if len(n) > l {
l = len(n)
if len(n) > len(m) {
m, n = n, m //swap so m is always at least as long as n
} else if len(n) == len(m) && m[0].Timestamp.Before(n[0].Timestamp) {
// Optimise common case where we merge two nodes with the same contents
// sampled at different times.
m, n = n, m // swap equal-length arrays so first element of m is newer
}
out := make([]stringLatestEntry, 0, l)
i, j := 0, 0
loop:
for i < len(m) {
switch {
case j >= len(n) || m[i].key < n[j].key:
out = append(out, m[i])
case j >= len(n):
return m
case m[i].key == n[j].key:
if m[i].Timestamp.Before(n[j].Timestamp) {
break loop
}
i++
j++
case m[i].key < n[j].key:
i++
default:
break loop
}
}
if i >= len(m) && j >= len(n) {
return m
}
out := make([]stringLatestEntry, i, len(m))
copy(out, m[:i])
for i < len(m) {
switch {
case j >= len(n):
out = append(out, m[i:]...)
return out
case m[i].key == n[j].key:
if m[i].Timestamp.Before(n[j].Timestamp) {
out = append(out, n[j])
@@ -71,6 +98,9 @@ func (m StringLatestMap) Merge(n StringLatestMap) StringLatestMap {
}
i++
j++
case m[i].key < n[j].key:
out = append(out, m[i])
i++
default:
out = append(out, n[j])
j++
@@ -264,27 +294,54 @@ func (m NodeControlDataLatestMap) Size() int {
return len(m)
}
// Merge produces a fresh NodeControlDataLatestMap containing the keys from both inputs.
// Merge produces a NodeControlDataLatestMap containing the keys from both inputs.
// When both inputs contain the same key, the newer value is used.
// Tries to return one of its inputs, if that already holds the correct result.
func (m NodeControlDataLatestMap) Merge(n NodeControlDataLatestMap) NodeControlDataLatestMap {
switch {
case m == nil:
case len(m) == 0:
return n
case n == nil:
case len(n) == 0:
return m
}
l := len(m)
if len(n) > l {
l = len(n)
if len(n) > len(m) {
m, n = n, m //swap so m is always at least as long as n
} else if len(n) == len(m) && m[0].Timestamp.Before(n[0].Timestamp) {
// Optimise common case where we merge two nodes with the same contents
// sampled at different times.
m, n = n, m // swap equal-length arrays so first element of m is newer
}
out := make([]nodeControlDataLatestEntry, 0, l)
i, j := 0, 0
loop:
for i < len(m) {
switch {
case j >= len(n) || m[i].key < n[j].key:
out = append(out, m[i])
case j >= len(n):
return m
case m[i].key == n[j].key:
if m[i].Timestamp.Before(n[j].Timestamp) {
break loop
}
i++
j++
case m[i].key < n[j].key:
i++
default:
break loop
}
}
if i >= len(m) && j >= len(n) {
return m
}
out := make([]nodeControlDataLatestEntry, i, len(m))
copy(out, m[:i])
for i < len(m) {
switch {
case j >= len(n):
out = append(out, m[i:]...)
return out
case m[i].key == n[j].key:
if m[i].Timestamp.Before(n[j].Timestamp) {
out = append(out, n[j])
@@ -293,6 +350,9 @@ func (m NodeControlDataLatestMap) Merge(n NodeControlDataLatestMap) NodeControlD
}
i++
j++
case m[i].key < n[j].key:
out = append(out, m[i])
i++
default:
out = append(out, n[j])
j++

View File

@@ -93,13 +93,6 @@ func TestLatestMapMerge(t *testing.T) {
want: MakeStringLatestMap().
Set("foo", now, "bar"),
},
"Empty b": {
a: MakeStringLatestMap().
Set("foo", now, "bar"),
b: MakeStringLatestMap(),
want: MakeStringLatestMap().
Set("foo", now, "bar"),
},
"Disjoint a & b": {
a: MakeStringLatestMap().
Set("foo", now, "bar"),
@@ -117,10 +110,26 @@ func TestLatestMapMerge(t *testing.T) {
want: MakeStringLatestMap().
Set("foo", now, "bar"),
},
"Longer": {
a: MakeStringLatestMap().
Set("PID", now, "23128").
Set("Name", now, "curl"),
b: MakeStringLatestMap().
Set("PID", then, "0").
Set("Name", now, "curl").
Set("Domain", now, "node-a.local"),
want: MakeStringLatestMap().
Set("PID", now, "23128").
Set("Name", now, "curl").
Set("Domain", now, "node-a.local"),
},
} {
if have := c.a.Merge(c.b); !reflect.DeepEqual(c.want, have) {
t.Errorf("%s:\n%s", name, test.Diff(c.want, have))
}
if have := c.b.Merge(c.a); !reflect.DeepEqual(c.want, have) {
t.Errorf("%s:\n%s", name, test.Diff(c.want, have))
}
}
}

View File

@@ -531,13 +531,12 @@ func (r Report) upgradeDNSRecords() Report {
if ok && (foundS || foundR) {
// Add address and names to report-level map
if existing, found := dns[addr]; found {
// Optimise the expected case that they are equal
if existing.Forward.Equal(snoopedNames) && existing.Reverse.Equal(reverseNames) {
var sUnchanged, rUnchanged bool
snoopedNames, sUnchanged = snoopedNames.Merge(existing.Forward)
reverseNames, rUnchanged = reverseNames.Merge(existing.Reverse)
if sUnchanged && rUnchanged {
continue
}
// Not equal - merge this node's data into existing data,
snoopedNames = snoopedNames.Merge(existing.Forward)
reverseNames = reverseNames.Merge(existing.Reverse)
}
dns[addr] = DNSRecord{Forward: snoopedNames, Reverse: reverseNames}
}

View File

@@ -35,7 +35,11 @@ func (s Sets) Add(key string, value StringSet) Sets {
s = emptySets
}
if existingValue, ok := s.psMap.Lookup(key); ok {
value = value.Merge(existingValue.(StringSet))
var unchanged bool
value, unchanged = existingValue.(StringSet).Merge(value)
if unchanged {
return s
}
}
return Sets{
psMap: s.psMap.Set(key, value),
@@ -94,7 +98,11 @@ func (s Sets) Merge(other Sets) Sets {
iter.ForEach(func(key string, value interface{}) {
set := value.(StringSet)
if existingSet, ok := result.Lookup(key); ok {
set = set.Merge(existingSet.(StringSet))
var unchanged bool
set, unchanged = existingSet.(StringSet).Merge(set)
if unchanged {
return
}
}
result = result.Set(key, set)
})

View File

@@ -1,12 +1,44 @@
package report_test
import (
"fmt"
"testing"
"github.com/weaveworks/scope/report"
"github.com/weaveworks/scope/test/reflect"
)
func TestSetsAdd(t *testing.T) {
for _, testcase := range []struct {
a report.Sets
want map[string][]string
}{
{
report.MakeSets().Add("a", report.MakeStringSet("b")),
map[string][]string{"a": {"b"}},
},
{
report.MakeSets().Add("a", report.MakeStringSet("b")).Add("a", report.MakeStringSet("c")),
map[string][]string{"a": {"b", "c"}},
},
{
report.MakeSets().Add("a", report.MakeStringSet("b", "c")).Add("a", report.MakeStringSet("c")),
map[string][]string{"a": {"b", "c"}},
},
{
report.MakeSets().Add("a", report.MakeStringSet("c")).Add("a", report.MakeStringSet("b", "c")),
map[string][]string{"a": {"b", "c"}},
},
{
report.MakeSets().Add("a", report.MakeStringSet("1")).Add("b", report.MakeStringSet("2")).
Add("c", report.MakeStringSet("3")).Add("b", report.MakeStringSet("3")),
map[string][]string{"a": {"1"}, "b": {"2", "3"}, "c": {"3"}},
},
} {
check(t, "Add", testcase.a, testcase.want)
}
}
func TestSetsMerge(t *testing.T) {
for _, testcase := range []struct {
a, b report.Sets
@@ -29,15 +61,19 @@ func TestSetsMerge(t *testing.T) {
map[string][]string{"a": {"1"}, "b": {"2", "3"}, "c": {"3"}},
},
} {
haveSets := testcase.a.Merge(testcase.b)
have := map[string][]string{}
keys := haveSets.Keys()
for _, k := range keys {
have[k], _ = haveSets.Lookup(k)
}
if !reflect.DeepEqual(testcase.want, have) {
t.Errorf("%+v.Merge(%+v): want %+v, have %+v", testcase.a, testcase.b, testcase.want, have)
}
check(t, fmt.Sprintf("%+v.Merge(%+v)", testcase.a, testcase.b), testcase.a.Merge(testcase.b), testcase.want)
check(t, fmt.Sprintf("%+v.Merge(%+v)", testcase.b, testcase.a), testcase.b.Merge(testcase.a), testcase.want)
}
}
func check(t *testing.T, desc string, haveSets report.Sets, want map[string][]string) {
have := map[string][]string{}
keys := haveSets.Keys()
for _, k := range keys {
have[k], _ = haveSets.Lookup(k)
}
if !reflect.DeepEqual(want, have) {
t.Errorf("%s: want %+v, have %+v", desc, want, have)
}
}

View File

@@ -81,32 +81,54 @@ func (s StringSet) Add(strs ...string) StringSet {
}
// Merge combines the two StringSets and returns a new result.
func (s StringSet) Merge(other StringSet) StringSet {
// Second return value is true if the return value is s
func (s StringSet) Merge(other StringSet) (StringSet, bool) {
switch {
case len(other) <= 0: // Optimise special case, to avoid allocating
return s // (note unit test DeepEquals breaks if we don't do this)
return s, true // (note unit test DeepEquals breaks if we don't do this)
case len(s) <= 0:
return other
return other, false
}
result := make(StringSet, len(s)+len(other))
for i, j, k := 0, 0, 0; ; k++ {
i, j := 0, 0
loop:
for i < len(s) {
switch {
case i >= len(s):
copy(result[k:], other[j:])
return result[:k+len(other)-j]
case j >= len(other):
copy(result[k:], s[i:])
return result[:k+len(s)-i]
case s[i] < other[j]:
result[k] = s[i]
return s, true
case s[i] == other[j]:
i++
case s[i] > other[j]:
result[k] = other[j]
j++
default: // equal
result[k] = s[i]
case s[i] < other[j]:
i++
default:
break loop
}
}
if i >= len(s) && j >= len(other) {
return s, true
}
result := make(StringSet, i, len(s)+len(other))
copy(result, s[:i])
for i < len(s) {
switch {
case j >= len(other):
result = append(result, s[i:]...)
return result, false
case s[i] == other[j]:
result = append(result, s[i])
i++
j++
case s[i] < other[j]:
result = append(result, s[i])
i++
default:
result = append(result, other[j])
j++
}
}
result = append(result, other[j:]...)
return result, false
}

View File

@@ -61,8 +61,8 @@ func TestStringSetMerge(t *testing.T) {
{input: report.MakeStringSet("a", "c"), other: report.MakeStringSet("a", "b"), want: report.MakeStringSet("a", "b", "c")},
{input: report.MakeStringSet("b"), other: report.MakeStringSet("a"), want: report.MakeStringSet("a", "b")},
} {
if want, have := testcase.want, testcase.input.Merge(testcase.other); !reflect.DeepEqual(want, have) {
t.Errorf("%v + %v: want %v, have %v", testcase.input, testcase.other, want, have)
if have, _ := testcase.input.Merge(testcase.other); !reflect.DeepEqual(testcase.want, have) {
t.Errorf("%v + %v: want %v, have %v", testcase.input, testcase.other, testcase.want, have)
}
}
}