HashMap源码解析

简介

hashMap是基于map接口的实现，允许key允许一条为null，value允许多条为null。它根据键的hashCode值存储数据，大多数情况下可以直接定位到它的值，因而具有很快的访问速度，但不保证有序（即插入的顺序），也不保证顺序不随时间变化而变化。hashMap非线程安全，即任一时刻可以有多个线程同时写HashMap，可能导致数据的不一致。如果需要满足线程安全，可以用 Collections的synchronizedMap方法使HashMap具有线程安全的能力，或者使用ConcurrentHashMap。

数据结构

hashMap采用数组 + 链表 + 红黑树的存储结构（链表长度大于8以后，采用红黑树存储）

hashMap数据结构

源码解析

Node()

static class Node<K,V> implements Map.Entry<K,V> {
    final int hash;// 哈希值，HashMap用这个值来确定记录的位置
    final K key;
    V value;
    Node<K,V> next;// 链表下一个节点

    Node(int hash, K key, V value, Node<K,V> next) {
        this.hash = hash;
        this.key = key;
        this.value = value;
        this.next = next;
    }

    public final K getKey()        { return key; }
    public final V getValue()      { return value; }
    public final String toString() { return key + "=" + value; }

    public final int hashCode() {
        return Objects.hashCode(key) ^ Objects.hashCode(value);
    }

    public final V setValue(V newValue) {
        V oldValue = value;
        value = newValue;
        return oldValue;
    }

    public final boolean equals(Object o) {
        if (o == this)
            return true;
        if (o instanceof Map.Entry) {
            Map.Entry<?,?> e = (Map.Entry<?,?>)o;
            if (Objects.equals(key, e.getKey()) &&
                Objects.equals(value, e.getValue()))
                return true;
        }
        return false;
    }
}

变量属性

// 默认的初始容量为16
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16
// 最大容量 2的31次方
static final int MAXIMUM_CAPACITY = 1 << 30;
// 默认的负载因子
static final float DEFAULT_LOAD_FACTOR = 0.75f;
// 当桶（bucket）上的结点数大于这个值时会转成红黑树
static final int TREEIFY_THRESHOLD = 8;
// 当桶（bucket）上的结点小于这个值时会转成链表
static final int UNTREEIFY_THRESHOLD = 6;
// 桶中结构转化为红黑树对应的数组的最小大小，如果当前容量小于它，就不会将链表转化为红黑树，而是用resize（）代替
static final int MIN_TREEIFY_CAPACITY = 64;
// 存储元素的数组，总是2的幂
transient Node<K,V>[] table;
// 存放具体元素的集
transient Set<Map.Entry<K,V>> entrySet;
// 存放元素个数，注意这个不等于数组的长度
transient int size;
// 每次扩容和更改map结构的计数器（value的覆盖不属于结构性变化）
transient int modCount;
// 临界值 当实际结点个数超过临界值（容量*负载因子）时，会进行扩容
int threshold;
// 负载因子
final float loadFactor;

构造方法

public HashMap(int initialCapacity, float loadFactor) {
    if (initialCapacity < 0)
        throw new IllegalArgumentException("Illegal initial capacity: " +
                                           initialCapacity);
    if (initialCapacity > MAXIMUM_CAPACITY)
        initialCapacity = MAXIMUM_CAPACITY;
    if (loadFactor <= 0 || Float.isNaN(loadFactor))
        throw new IllegalArgumentException("Illegal load factor: " +
                                           loadFactor);
    this.loadFactor = loadFactor;
    //初始容量值通过此方法创建，并不是传入的initialCapacity
    this.threshold = tableSizeFor(initialCapacity);
}

tableSizeFor()

/*tableSizeFor(initialCapacity)　方法返回的值是最接近 initialCapacity 的2的幂，若指定初始容量为９，则实际 hashMap 容量为16*/
static final int tableSizeFor(int cap) {
    int n = cap - 1;
    n |= n >>> 1;// 代表无符号右移
    n |= n >>> 2;
    n |= n >>> 4;
    n |= n >>> 8;
    n |= n >>> 16;
    return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
}

hash()

static final int hash(Object key) {
    int h;
    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}

(key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16) 高位运算主要是从速度、功效、质量来考虑的，这么做可以在数组table的length比较小的时候，也能保证考虑到高低Bit都参与到Hash的计算中，同时不会有太大的开销。

下面举例说明下，n为table的长度。

hashMap哈希算法例图

put()

public V put(K key, V value) {
    return putVal(hash(key), key, value, false, true);
}
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
               boolean evict) {
    Node<K,V>[] tab; Node<K,V> p; int n, i;
    // 若table为空或table的长度为0，则进行扩容运算
    if ((tab = table) == null || (n = tab.length) == 0)
        n = (tab = resize()).length;
    // 若当前要插入的键值对位置上刚好没有元素，则把他封装成Node对象，放上去即可
    if ((p = tab[i = (n - 1) & hash]) == null)
        tab[i] = newNode(hash, key, value, null);
    // 若已经存在元素了
    else {
        Node<K,V> e; K k;
        // 若这个元素的key与要插入的一样，那么就替换一下
        if (p.hash == hash &&
            ((k = p.key) == key || (key != null && key.equals(k))))
            e = p;
        // 若当前节点类型为TreeNode（红黑树）类型，则执行putTreeVal方法
        else if (p instanceof TreeNode)
            e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
        else {
            for (int binCount = 0; ; ++binCount) {
            // 当下一个节点为空时
                if ((e = p.next) == null) {
                // 新建节点插入
                    p.next = newNode(hash, key, value, null);
                    // 若链表上节点超过TREEIFY_THRESHOLD - 1，即超过八个，将链表变为红黑树
                    if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                        treeifyBin(tab, hash);
                    break;
                }
                if (e.hash == hash &&
                    ((k = e.key) == key || (key != null && key.equals(k))))
                    break;
                p = e;
            }
        }
        // 待插入元素在hashMap中已存在
        if (e != null) { // existing mapping for key
            V oldValue = e.value;
            if (!onlyIfAbsent || oldValue == null)
                e.value = value;
            afterNodeAccess(e);
            return oldValue;
        }
    }
    ++modCount;
    if (++size > threshold)
        resize();
    afterNodeInsertion(evict);
    return null;
}

tab[i = (n - 1) & hash]) 抽象为公式int index = hashCode & (length -1)，index即表示节点在链表中的位置

get()

public V get(Object key) {
    Node<K,V> e;
    // 计算出hash值和key，通过getNode()方法进行查找
    return (e = getNode(hash(key), key)) == null ? null : e.value;
}
final Node<K,V> getNode(int hash, Object key) {
    Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
    if ((tab = table) != null && (n = tab.length) > 0 &&
        (first = tab[(n - 1) & hash]) != null) {
        if (first.hash == hash && // always check first node
            ((k = first.key) == key || (key != null && key.equals(k))))
            return first;
        if ((e = first.next) != null) {
        // 若为TreeNode，则通过getTreeNode方法进行查找
            if (first instanceof TreeNode)
                return ((TreeNode<K,V>)first).getTreeNode(hash, key);
            do {// 否则在链表中进行查找
                if (e.hash == hash &&
                    ((k = e.key) == key || (key != null && key.equals(k))))
                    return e;
            } while ((e = e.next) != null);
        }
    }
    return null;
}

resize()

final Node<K,V>[] resize() {
    Node<K,V>[] oldTab = table;
    int oldCap = (oldTab == null) ? 0 : oldTab.length;
    int oldThr = threshold;
    int newCap, newThr = 0;
    // 若原table不为空
    if (oldCap > 0) {
    // 超过最大值就不再扩容
        if (oldCap >= MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return oldTab;
        }
        // 未超过最大值，则扩容至原来的两倍，容量和临界值都翻倍
        else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                 oldCap >= DEFAULT_INITIAL_CAPACITY)
            newThr = oldThr << 1; // double threshold
    }
    // 使用构造器构造时，直接初始化了临界值，则将临界值赋值给table容量
    else if (oldThr > 0) // initial capacity was placed in threshold
        newCap = oldThr;
    // 老的table容量和门限值都为0，初始化新容量，新门限值为默认值，在调用hashmap（）方式构造容器时，就采用这种方式初始化  
    else {               // zero initial threshold signifies using defaults
        newCap = DEFAULT_INITIAL_CAPACITY;
        newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
    }
    // 若临界值为0， 则重新设置临界值
    if (newThr == 0) {
        float ft = (float)newCap * loadFactor;
        newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                  (int)ft : Integer.MAX_VALUE);
    }
    // 更新临界值
    threshold = newThr;
    @SuppressWarnings({"rawtypes","unchecked"})
    // 初始化新的table数组
    Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
    table = newTab;
    // 若原来的table数组不为空，则需将原table数组中数据迁移至新的table中
    if (oldTab != null) {
        for (int j = 0; j < oldCap; ++j) {
            Node<K,V> e;
            // 取出链表中第j个节点保存，若不为null
            if ((e = oldTab[j]) != null) {
                oldTab[j] = null;//释放资源
                // 链表中只有一个节点，没有后续节点，则直接重新计算在新table中的index，并将此节点存储到新table对应的index位置处 
                if (e.next == null)
                // 计算在新table中的index，并将此节点存储到新table对应的index位置处  
                    newTab[e.hash & (newCap - 1)] = e;
                // 若e为红黑树节点
                else if (e instanceof TreeNode)
                    ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                // 迁移单链表中每个节点
                else { // preserve order
                    Node<K,V> loHead = null, loTail = null;
                    Node<K,V> hiHead = null, hiTail = null;
                    Node<K,V> next;
                    do {
                        next = e.next;
                        if ((e.hash & oldCap) == 0) {
                            if (loTail == null)
                                loHead = e;
                            else
                                loTail.next = e;
                            loTail = e;
                        }
                        else {
                            if (hiTail == null)
                                hiHead = e;
                            else
                                hiTail.next = e;
                            hiTail = e;
                        }
                    } while ((e = next) != null);
                    if (loTail != null) {
                        loTail.next = null;
                        newTab[j] = loHead;
                    }
                    if (hiTail != null) {
                        hiTail.next = null;
                        newTab[j + oldCap] = hiHead;
                    }
                }
            }
        }
    }
    return newTab;
}

reSize()主要包括三个关键内容：

（1）老table的置为null，方便gc回收释放

（2）新table申请

（3）重新计算记录的hash值，以将键值对插入到新table中

上文中已经提到，table的长度确保是2的n次方，那么有意思的是，每次扩容容量变为原来的两倍，那么一个记录在新table中的位置要么就和原来一样，要么就需要迁移到(oldCap + index)的位置上。

两个元素A和B，哈希值分别为3和47，在table长度为4的情况下，因为(3) = (11)，所以A和B会有两位参与运算来
获得index，A和B的二进制分别为：
3 ： 11
47： 101111

在table的length为4的前提下：
3-> 11 & 11 = 3
47-> 101111 & 000011 = 3

在扩容后，length变为8：
3-> 011 & 111 = 3
47-> 101111 & 000111 = 7
对于3来说，新增的参与运算的位为0，所以index不变，而对于47来说，新增的参与运算的位为1，所以
index需要变为(index + oldCap)