/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // This package includes multiple probabalistic data structures needed for // admission/eviction metadata. Most are Counting Bloom Filter variations, but // a caching-specific feature that is also required is a "freshness" mechanism, // which basically serves as a "lifetime" process. This freshness mechanism // was described in the original TinyLFU paper [1], but other mechanisms may // be better suited for certain data distributions. // // [1]: https://arxiv.org/abs/1512.00727 package ristretto import ( "fmt" "math/rand" "time" ) // cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily // based on Damian Gryski's CM4 [1]. // // [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go type cmSketch struct { rows [cmDepth]cmRow seed [cmDepth]uint64 mask uint64 } const ( // cmDepth is the number of counter copies to store (think of it as rows). cmDepth = 4 ) func newCmSketch(numCounters int64) *cmSketch { if numCounters == 0 { panic("cmSketch: bad numCounters") } // Get the next power of 2 for better cache performance. numCounters = next2Power(numCounters) sketch := &cmSketch{mask: uint64(numCounters - 1)} // Initialize rows of counters and seeds. source := rand.New(rand.NewSource(time.Now().UnixNano())) for i := 0; i < cmDepth; i++ { sketch.seed[i] = source.Uint64() sketch.rows[i] = newCmRow(numCounters) } return sketch } // Increment increments the count(ers) for the specified key. func (s *cmSketch) Increment(hashed uint64) { for i := range s.rows { s.rows[i].increment((hashed ^ s.seed[i]) & s.mask) } } // Estimate returns the value of the specified key. func (s *cmSketch) Estimate(hashed uint64) int64 { min := byte(255) for i := range s.rows { val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask) if val < min { min = val } } return int64(min) } // Reset halves all counter values. func (s *cmSketch) Reset() { for _, r := range s.rows { r.reset() } } // Clear zeroes all counters. func (s *cmSketch) Clear() { for _, r := range s.rows { r.clear() } } // cmRow is a row of bytes, with each byte holding two counters. type cmRow []byte func newCmRow(numCounters int64) cmRow { return make(cmRow, numCounters/2) } func (r cmRow) get(n uint64) byte { return byte(r[n/2]>>((n&1)*4)) & 0x0f } func (r cmRow) increment(n uint64) { // Index of the counter. i := n / 2 // Shift distance (even 0, odd 4). s := (n & 1) * 4 // Counter value. v := (r[i] >> s) & 0x0f // Only increment if not max value (overflow wrap is bad for LFU). if v < 15 { r[i] += 1 << s } } func (r cmRow) reset() { // Halve each counter. for i := range r { r[i] = (r[i] >> 1) & 0x77 } } func (r cmRow) clear() { // Zero each counter. for i := range r { r[i] = 0 } } func (r cmRow) string() string { s := "" for i := uint64(0); i < uint64(len(r)*2); i++ { s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f) } s = s[:len(s)-1] return s } // next2Power rounds x up to the next power of 2, if it's not already one. func next2Power(x int64) int64 { x-- x |= x >> 1 x |= x >> 2 x |= x >> 4 x |= x >> 8 x |= x >> 16 x |= x >> 32 x++ return x }