golang map 源码分析

源码分析

// A header for a Go map.
type hmap struct {
    // Note: the format of the Hmap is encoded in ../../cmd/internal/gc/reflect.go and
    // ../reflect/type.go. Don't change this structure without also changing that code!
    count     int // # live cells == size of map.  Must be first (used by len() builtin)
    flags     uint8
    B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
    noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
    hash0     uint32 // hash seed

    buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
    oldbuckets unsafe.Pointer // prevIoUs bucket array of half the size,non-nil only when growing
    nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)

    // If both key and value do not contain pointers and are inline,then we mark bucket // type as containing no pointers. This avoids scanning such maps. // However,bmap.overflow is a pointer. In order to keep overflow buckets // alive,we store pointers to all overflow buckets in hmap.overflow. // Overflow is used only if key and value do not contain pointers. // overflow[0] contains overflow buckets for hmap.buckets. // overflow[1] contains overflow buckets for hmap.oldbuckets. // The first indirection allows us to reduce static size of hmap. // The second indirection allows to store a pointer to the slice in hiter. overflow *[2]*[]*bmap } // A bucket for a Go map. type bmap struct { 注释： tophash用于记录8个key哈希值的高8位，这样在寻找对应key的时候可以更快，不必每次都对key做全等判断 // tophash generally contains the top byte of the hash value // for each key in this bucket. If tophash[0] < minTopHash,// tophash[0] is a bucket evacuation state instead. tophash [bucketCnt]uint8 // Followed by bucketCnt keys and then bucketCnt values. // NOTE: packing all the keys together and then all the values together makes the // code a bit more complicated than alternating key/value/key/value/... but it allows // us to eliminate padding which would be needed for,e.g.,map[int64]int8. // Followed by an overflow pointer. 注释写到： 如上面的例子，节省padding空间，在map[int64]int8中，4个相邻的int8可以存储在同一个内存单元中。如果使用kv交错存储的话，每个int8都会被padding占用单独的内存单元（为了提高寻址速度）。 }

map 扩容

负载因子默认6.5，超过这个阈值将会扩容，新buckets的大小是旧的2倍。
扩容完成后会将old指针指向原buckets，同时保留新旧buckets，即每个hash对应两个bucket（一个新的一个旧的）。
oldbucket不会立即被转移到新的bucket下，而是当访问到该bucket时，会调用growWork方法进行迁移，growWork方法会将oldbucket下的元素rehash到新的bucket中。随着访问的进行，所有oldbucket会被逐渐移动到bucket中。

get

// mapaccess1 returns a pointer to h[key]. Never returns nil,instead
// it will return a reference to the zero object for the value type if
// the key is not in the map.
// NOTE: The returned pointer may keep the whole map live,so don't
// hold onto it for very long.
func mapaccess1(t *maptype,h *hmap,key unsafe.Pointer) unsafe.Pointer {
    ...

    if h == nil || h.count == 0 {
        return unsafe.Pointer(&zeroVal[0])
    }
    if h.flags&hashWriting != 0 {
        // 读写并发race
        throw("concurrent map read and map write")
    }
    // 获取key对应的hash值
    alg := t.key.alg
    hash := alg.hash(key,uintptr(h.hash0))
    m := uintptr(1)<<h.B - 1
    b := (*bmap)(add(h.buckets,(hash&m)*uintptr(t.bucketsize)))
    if c := h.oldbuckets; c != nil {
        // 如果老的bucket没有被移动完，那么去老的bucket中寻找
        if !h.sameSizeGrow() {
            // There used to be half as many buckets; mask down one more power of two.
            m >>= 1
        }
        oldb := (*bmap)(add(c,(hash&m)*uintptr(t.bucketsize)))
        if !evacuated(oldb) {
            b = oldb
        }
    }
    top := uint8(hash >> (sys.PtrSize*8 - 8))
    if top < minTopHash {
        top += minTopHash
    }
    for {
        for i := uintptr(0); i < bucketCnt; i++ {
            if b.tophash[i] != top {
                // 对比高8位
                continue
            }
            k := add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
            if t.indirectkey {
                k = *((*unsafe.Pointer)(k))
            }
            // key和k相等，即找到
            if alg.equal(key,k) {
                v := add(unsafe.Pointer(b),dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                if t.indirectvalue {
                    v = *((*unsafe.Pointer)(v))
                }
                return v
            }
        }
        b = b.overflow(t)
        if b == nil {
            return unsafe.Pointer(&zeroVal[0])
        }
    }
}

set

/ Like mapaccess,but allocates a slot for the key if it is not present in the map.
func mapassign(t *maptype,key unsafe.Pointer) unsafe.Pointer {
    if h == nil {
        panic(plainError("assignment to entry in nil map"))
    }

    ...

    if h.flags&hashWriting != 0 {
        // 并发写
        throw("concurrent map writes")
    }
    h.flags |= hashWriting

    alg := t.key.alg
    hash := alg.hash(key,uintptr(h.hash0))

    if h.buckets == nil {
        // 延迟初始化
        h.buckets = newarray(t.bucket, 1)
    }

again:
    bucket := hash & (uintptr(1)<<h.B - 1)
    if h.growing() {
        // 把oldbucket迁移到新bucket中
        growWork(t,h,bucket)
    }
    b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
    top := uint8(hash >> (sys.PtrSize*8 - 8))
    if top < minTopHash {
        top += minTopHash
    }

    var inserti *uint8
    var insertk unsafe.Pointer
    var val unsafe.Pointer
    for {
        for i := uintptr(0); i < bucketCnt; i++ {
            if b.tophash[i] != top {
                if b.tophash[i] == empty && inserti == nil {
                    inserti = &b.tophash[i]
                    insertk = add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
                    val = add(unsafe.Pointer(b),dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                }
                continue
            }
            k := add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
            if t.indirectkey {
                k = *((*unsafe.Pointer)(k))
            }
            if !alg.equal(key,k) {
                continue
            }
            // already have a mapping for key. Update it.
            if t.needkeyupdate {
                typedmemmove(t.key,k,key)
            }
            val = add(unsafe.Pointer(b),dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
            goto done
        }
        ovf := b.overflow(t)
        if ovf == nil {
            break
        }
        b = ovf
    }

    // Did not find mapping for key. Allocate new cell & add entry.

    // If we hit the max load factor or we have too many overflow buckets,
    // and we're not already in the middle of growing,start growing.
    if !h.growing() && (overLoadFactor(int64(h.count),h.B) || tooManyOverflowBuckets(h.noverflow,h.B)) {
        hashGrow(t,h)
        goto again // Growing the table invalidates everything,so try again
    }

    if inserti == nil {
        // all current buckets are full,allocate a new one.
        newb := (*bmap)(newobject(t.bucket))
        h.setoverflow(t,b,newb)
        inserti = &newb.tophash[0]
        insertk = add(unsafe.Pointer(newb),dataOffset)
        val = add(insertk,bucketCnt*uintptr(t.keysize))
    }

    // store new key/value at insert position
    if t.indirectkey {
        kmem := newobject(t.key)
        *(*unsafe.Pointer)(insertk) = kmem
        insertk = kmem
    }
    if t.indirectvalue {
        vmem := newobject(t.elem)
        *(*unsafe.Pointer)(val) = vmem
    }
    typedmemmove(t.key,insertk,key)
    *inserti = top
    h.count++

done:
    if h.flags&hashWriting == 0 {
        throw("concurrent map writes")
    }
    h.flags &^= hashWriting
    if t.indirectvalue {
        val = *((*unsafe.Pointer)(val))
    }
    return val
}

del

empty          = 0 // cell is empty
func mapdelete(t *maptype,key unsafe.Pointer) {
    ...

    if h == nil || h.count == 0 {
        return
    }
    if h.flags&hashWriting != 0 {
        throw("concurrent map writes")
    }
    h.flags |= hashWriting

    alg := t.key.alg
    hash := alg.hash(key,uintptr(h.hash0))
    bucket := hash & (uintptr(1)<<h.B - 1)
    if h.growing() {
        growWork(t,bucket)
    }
    b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
    top := uint8(hash >> (sys.PtrSize*8 - 8))
    if top < minTopHash {
        top += minTopHash
    }
    for {
        for i := uintptr(0); i < bucketCnt; i++ {
            if b.tophash[i] != top {
                continue
            }
            k := add(unsafe.Pointer(b),dataOffset+i*uintptr(t.keysize))
            k2 := k
            if t.indirectkey {
                k2 = *((*unsafe.Pointer)(k2))
            }
            if !alg.equal(key,k2) {
                continue
            }
            if t.indirectkey {
                *(*unsafe.Pointer)(k) = nil
            } else {
                typedmemclr(t.key,k)
            }
            v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize))
            if t.indirectvalue {
                *(*unsafe.Pointer)(v) = nil
            } else {
                typedmemclr(t.elem,v)
            }
            // 将删除的key设置为empty
            // del 不会直接删除对象，真正删除对象需要等到指针无效然后被gc回收
            b.tophash[i] = empty
            h.count--
            goto done
        }
        b = b.overflow(t)
        if b == nil {
            goto done
        }
    }

done:
    if h.flags&hashWriting == 0 {
        throw("concurrent map writes")
    }
    h.flags &^= hashWriting
}

golang map 源码分析

源码分析

map 扩容

get

set

del

猜你在找的Go相关文章