结构差异

  1. dictEntry,该结构中union里面多了double类型d
1
2
3
4
5
6
7
8
9
10
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;
  1. dict,多了一个迭代标志
1
2
3
4
5
6
7
typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
unsigned long iterators; /* number of iterators currently running */
} dict;
  1. 不知道是不是多出来的迭代器(adlist也有)

    迭代器,大多用在rdb,aof,cluster场景中
    有safe的差异

    1
    2
    3
    4
    5
    6
    7
    8
    typedef struct dictIterator {
    dict *d;
    long index;
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint; // 在非安全模式模式下,迭代的时候生成一个指纹,在后续再次处理相应键的时候,对比指纹是否一致,判断键值是否被修改
    } dictIterator;

dict一些参数

  1. hash用的是siphash
  2. 强制负载因子 dict_force_resize_ratio = 5
  3. 最小的size是 DICT_HT_INITIAL_SIZE = 4(大小是2^n)
  4. dict_can_resize,在bgsave等情况下会设置成0

dictExpand

在rehash 过程中,dict是不会再次扩充的,那么有没有可能dict填满了,而还没有rehash完?
不会,dicths的table是二维链表,size只是外层bucket的数量,幷不限制每个bucket的存储数量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */

// dictIsRehashing 返回 rehashidx > -1
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;

dictht n; /* the new hash table */
// 返回下一个2^n,使得它大于等于size(最小是 DICT_HT_INITIAL_SIZE = 4)
unsigned long realsize = _dictNextPower(size);

/* Rehashing to the same table size is not useful. */
// 如果当前hashtable的使用长度到realsize相同,相当于没有扩充,是没有意义的
if (realsize == d->ht[0].size) return DICT_ERR;

/* Allocate the new hash table and initialize all pointers to NULL */
// 分配新结构内存
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;

/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}

/* Prepare a second hash table for incremental rehashing */
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}

dictRehash

  1. 渐进式rehash过程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
// n是rehash的bucket数量(不包含空的)
int dictRehash(dict *d, int n) {
// 最大访问空的bucket数量
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;

while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;

/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
// 因为是从ht[0]迁移到ht[1]的过程,所以rehashidx自然要小于ht[0]的size
assert(d->ht[0].size > (unsigned long)d->rehashidx);
// 访问到空bucket
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
// de,非空bucket的(head)dictEntry
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;

nextde = de->next;
/* Get the index in the new hash table */
// 获取新bucket的索引,值超过sizemask后不是取余而是取与
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
// 将当前 de放到新bucket的头,相当于对应的实现了一个'倒序'
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}

/* Check if we already rehashed the whole table... */
// 如果used直接是0了,说明直接可以结束rehash了
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}

/* More to rehash... */
return 1;
}
  1. rehash机制dictRehashMilliseconds

    这个返回的rehashes的bucket数量不一定准确,因为rehash完成了就不会把最后一次的具体数量加进去,然后这个毫秒时间差异也不是精确的

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;

    while(dictRehash(d,100)) {
    rehashes += 100;
    if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
    }
  2. rehash机制_dictRehashStep

    这个就简单了,迭代一个一个bucket去rehash

    1
    2
    3
    static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
    }

dict操作

  1. 比如dictAdd

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76

    int dictAdd(dict *d, void *key, void *val)
    {
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
    }

    // 查找和替换都通过这个实现
    dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
    {
    long index;
    dictEntry *entry;
    dictht *ht;

    // 如果正在rehash,遇到add就执行一个bucket的rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
    * the element already exists. */
    // 这里是获取index的过程
    // 如果existing不为nil,会将找到的值放进去然后直接返回
    // 在addOrFind 和replace的时候,existing不是nil
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
    return NULL;

    /* Allocate the memory and store the new entry.
    * Insert the element in top, with the assumption that in a database
    * system it is more likely that recently added entries are accessed
    * more frequently. */
    // 如在正在rehash就直接加到新ht里去
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    // 涉及dict的type和keyDup
    dictSetKey(d, entry, key);
    return entry;
    }

    // existing如果有值,那么用existing存储找到的值,幷不再返回索引
    static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
    {
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;

    /* Expand the hash table if needed */
    // 每次都会检查是否要expand
    // 如果正在rehash,返回OK
    // 如果还没初始化,调用dictExpand初始化
    // 在used>size的情况下,如果dict_can_resize或者used/size>dict_force_resize_ratio,进行resize
    if (_dictExpandIfNeeded(d) == DICT_ERR)
    return -1;
    // 这个过程,如果dict不在rehash,就遍历一次
    // 如果rehash了就要遍历两次
    for (table = 0; table <= 1; table++) {
    idx = hash & d->ht[table].sizemask;
    /* Search if this slot does not already contain the given key */
    he = d->ht[table].table[idx];
    while(he) {
    if (key==he->key || dictCompareKeys(d, key, he->key)) {
    if (existing) *existing = he;
    return -1;
    }
    he = he->next;
    }
    if (!dictIsRehashing(d)) break;
    }
    return idx;
    }
  2. dictGenericDelete

    很简单,就是找到幷删除,然后在删除的时候区分是否释放内存
    如果是dictDelete的实现,是要释放内存的
    如果是dictUnlink,则不释放内存,在之后用dictFreeUnlinkedEntry释放内存,用在释放前,需要对它做其他操作而不影响其他增删改查.比如数据库server->db->dict对键dbAsyncDelete,zse里的del

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;

    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);

    for (table = 0; table <= 1; table++) {
    idx = h & d->ht[table].sizemask;
    he = d->ht[table].table[idx];
    prevHe = NULL;
    while(he) {
    if (key==he->key || dictCompareKeys(d, key, he->key)) {
    /* Unlink the element from the list */
    if (prevHe)
    prevHe->next = he->next;
    else
    d->ht[table].table[idx] = he->next;
    if (!nofree) {
    dictFreeKey(d, he);
    dictFreeVal(d, he);
    zfree(he);
    }
    d->ht[table].used--;
    return he;
    }
    prevHe = he;
    he = he->next;
    }
    if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
    }

dict的type和privdata

  1. type,类型特定的函数,差不多是类函数的意思,因为dict有多种使用场景,其中的复制,对比,销毁函数有不同的实现,有点接口的意思
  2. privdata,保存了需要传递给dict三个函数所需要的参数.比如,在dictReplace,如果key存在,执行dictSetVal,就是将旧值取出来,然后在privdata找到相关参数直接赋值过去
    1
    2
    3
    4
    5
    6
    #define dictSetVal(d, entry, _val_) do { \
    if ((d)->type->valDup) \
    (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \
    else \
    (entry)->v.val = (_val_); \
    } while(0)

其他

  1. 随机获取,公平随机()
  2. **dictscan,实现了一个算法,最小消耗的动态迭代(可能有重复,单不会遗漏)