源码分析
// A header for a Go map.
type hmap struct {
// Note: the format of the Hmap is encoded in ../../cmd/internal/gc/reflect.go and
// ../reflect/type.go. Don't change this structure without also changing that code!
count int // # live cells == size of map. Must be first (used by len() builtin)
flags uint8
B uint8 // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
hash0 uint32 // hash seed
buckets unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
oldbuckets unsafe.Pointer // prevIoUs bucket array of half the size,non-nil only when growing
nevacuate uintptr // progress counter for evacuation (buckets less than this have been evacuated)
// If both key and value do not contain pointers and are inline,then we mark bucket // type as containing no pointers. This avoids scanning such maps. // However,bmap.overflow is a pointer. In order to keep overflow buckets // alive,we store pointers to all overflow buckets in hmap.overflow. // Overflow is used only if key and value do not contain pointers. // overflow[0] contains overflow buckets for hmap.buckets. // overflow[1] contains overflow buckets for hmap.oldbuckets. // The first indirection allows us to reduce static size of hmap. // The second indirection allows to store a pointer to the slice in hiter. overflow *[2]*[]*bmap } // A bucket for a Go map. type bmap struct { 注释: tophash用于记录8个key哈希值的高8位,这样在寻找对应key的时候可以更快,不必每次都对key做全等判断 // tophash generally contains the top byte of the hash value // for each key in this bucket. If tophash[0] < minTopHash,// tophash[0] is a bucket evacuation state instead. tophash [bucketCnt]uint8 // Followed by bucketCnt keys and then bucketCnt values. // NOTE: packing all the keys together and then all the values together makes the // code a bit more complicated than alternating key/value/key/value/... but it allows // us to eliminate padding which would be needed for,e.g.,map[int64]int8. // Followed by an overflow pointer. 注释写到: 如上面的例子,节省padding空间,在map[int64]int8中,4个相邻的int8可以存储在同一个内存单元中。如果使用kv交错存储的话,每个int8都会被padding占用单独的内存单元(为了提高寻址速度)。 }
map 扩容
负载因子默认6.5,超过这个阈值将会扩容,新buckets的大小是旧的2倍。
扩容完成后会将old指针指向原buckets,同时保留新旧buckets,即每个hash对应两个bucket(一个新的一个旧的)。
oldbucket不会立即被转移到新的bucket下,而是当访问到该bucket时,会调用growWork方法进行迁移,growWork方法会将oldbucket下的元素rehash到新的bucket中。随着访问的进行,所有oldbucket会被逐渐移动到bucket中。
get
// mapaccess1 returns a pointer to h[key]. Never returns nil,instead
// it will return a reference to the zero object for the value type if
// the key is not in the map.
// NOTE: The returned pointer may keep the whole map live,so don't
// hold onto it for very long.
func mapaccess1(t *maptype,h *hmap,key unsafe.Pointer) unsafe.Pointer {
...
if h == nil || h.count == 0 {
return unsafe.Pointer(&zeroVal[0])
}
if h.flags&hashWriting != 0 {
// 读写并发race
throw("concurrent map read and map write")
}
// 获取key对应的hash值
alg := t.key.alg
hash := alg.hash(key,uintptr(h.hash0))
m := uintptr(1)<<h.B - 1
b := (*bmap)(add(h.buckets,(hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
// 如果老的bucket没有被移动完,那么去老的bucket中寻找
if !h.sameSizeGrow() {
// There used to be half as many buckets; mask down one more power of two.
m >>= 1
}
oldb := (*bmap)(add(c,(hash&m)*uintptr(t.bucketsize)))
if !evacuated(oldb) {
b = oldb
}
}
top := uint8(hash >> (sys.PtrSize*8 - 8))
if top < minTopHash {
top += minTopHash
}
for {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
// 对比高8位
continue
}
k := add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
if t.indirectkey {
k = *((*unsafe.Pointer)(k))
}
// key和k相等,即找到
if alg.equal(key,k) {
v := add(unsafe.Pointer(b),dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
if t.indirectvalue {
v = *((*unsafe.Pointer)(v))
}
return v
}
}
b = b.overflow(t)
if b == nil {
return unsafe.Pointer(&zeroVal[0])
}
}
}
set
/ Like mapaccess,but allocates a slot for the key if it is not present in the map.
func mapassign(t *maptype,key unsafe.Pointer) unsafe.Pointer {
if h == nil {
panic(plainError("assignment to entry in nil map"))
}
...
if h.flags&hashWriting != 0 {
// 并发写
throw("concurrent map writes")
}
h.flags |= hashWriting
alg := t.key.alg
hash := alg.hash(key,uintptr(h.hash0))
if h.buckets == nil {
// 延迟初始化
h.buckets = newarray(t.bucket, 1)
}
again:
bucket := hash & (uintptr(1)<<h.B - 1)
if h.growing() {
// 把oldbucket迁移到新bucket中
growWork(t,h,bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
top := uint8(hash >> (sys.PtrSize*8 - 8))
if top < minTopHash {
top += minTopHash
}
var inserti *uint8
var insertk unsafe.Pointer
var val unsafe.Pointer
for {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
if b.tophash[i] == empty && inserti == nil {
inserti = &b.tophash[i]
insertk = add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
val = add(unsafe.Pointer(b),dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
}
continue
}
k := add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
if t.indirectkey {
k = *((*unsafe.Pointer)(k))
}
if !alg.equal(key,k) {
continue
}
// already have a mapping for key. Update it.
if t.needkeyupdate {
typedmemmove(t.key,k,key)
}
val = add(unsafe.Pointer(b),dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
goto done
}
ovf := b.overflow(t)
if ovf == nil {
break
}
b = ovf
}
// Did not find mapping for key. Allocate new cell & add entry.
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing,start growing.
if !h.growing() && (overLoadFactor(int64(h.count),h.B) || tooManyOverflowBuckets(h.noverflow,h.B)) {
hashGrow(t,h)
goto again // Growing the table invalidates everything,so try again
}
if inserti == nil {
// all current buckets are full,allocate a new one.
newb := (*bmap)(newobject(t.bucket))
h.setoverflow(t,b,newb)
inserti = &newb.tophash[0]
insertk = add(unsafe.Pointer(newb),dataOffset)
val = add(insertk,bucketCnt*uintptr(t.keysize))
}
// store new key/value at insert position
if t.indirectkey {
kmem := newobject(t.key)
*(*unsafe.Pointer)(insertk) = kmem
insertk = kmem
}
if t.indirectvalue {
vmem := newobject(t.elem)
*(*unsafe.Pointer)(val) = vmem
}
typedmemmove(t.key,insertk,key)
*inserti = top
h.count++
done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
if t.indirectvalue {
val = *((*unsafe.Pointer)(val))
}
return val
}
del
empty = 0 // cell is empty
func mapdelete(t *maptype,key unsafe.Pointer) {
...
if h == nil || h.count == 0 {
return
}
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
h.flags |= hashWriting
alg := t.key.alg
hash := alg.hash(key,uintptr(h.hash0))
bucket := hash & (uintptr(1)<<h.B - 1)
if h.growing() {
growWork(t,bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
top := uint8(hash >> (sys.PtrSize*8 - 8))
if top < minTopHash {
top += minTopHash
}
for {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
}
k := add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
k2 := k
if t.indirectkey {
k2 = *((*unsafe.Pointer)(k2))
}
if !alg.equal(key,k2) {
continue
}
if t.indirectkey {
*(*unsafe.Pointer)(k) = nil
} else {
typedmemclr(t.key,k)
}
v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize))
if t.indirectvalue {
*(*unsafe.Pointer)(v) = nil
} else {
typedmemclr(t.elem,v)
}
// 将删除的key设置为empty
// del 不会直接删除对象,真正删除对象需要等到指针无效然后被gc回收
b.tophash[i] = empty
h.count--
goto done
}
b = b.overflow(t)
if b == nil {
goto done
}
}
done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
}