Open vSwitch系列之数据结构解析深入分析Hmap

作为Open vSwitch系列的第一篇文章，选择分析哪个数据结构我思考很久，最后还是选择比较常见而且很基础的结构hmap。在Open vSwitch世界中很多地方都是由hmap组织、关联起来的，因此我们将这部分分析透彻是很有必要的，而且这部分功能相对独立，也可以做日后一个技术积累，最后会有一个demo提供给大家。

/* A hash map. */
struct hmap {
struct hmap_node **buckets; /* Must point to 'one' iff 'mask' == 0. 哈希桶 其实就是哈希数组的意思.指针数组. 当mask=0始终指向成员one的地址*/
 struct hmap_node *one; /* 此变量只有在mask=0的时候才存储数据。如果mask非0则one存储的是0。 */
    size_t mask; /* 数组大小 从0开始 1、用于获取数组下标(mask+1为数组大小)，2、hash操作时确定数组下标方式 mask &amp; hash（mask和hash值进行与操作） */
size_t n; /* 保存该hash-map中的所有hmap_node节点个数 */
};
/* A hash map node, to be embedded inside the data structure being mapped. */
struct hmap_node {
size_t hash;                /* Hash value. 哈希值*/
 struct hmap_node *next;     /* Next in linked list. 单向链表*/
};

1、初始化hmap的函数

/* Initializes 'hmap' as an empty hash table. */
void
hmap_init(struct hmap *hmap)
{
    hmap->buckets = &hmap->one; /* mask=0 将buckets指向one的地址 注意：如果mask一直都是0那么buckets保存的值始终是one的地址。*/
    hmap->one = NULL;
    hmap->mask = 0;
    hmap->n = 0;
}

以全局变量static struct hmap raw_instance_map为例进行讲解，经过上面函数处理之后，就会形成下面数据结构图：

右侧的0xbfffee98，0xbfffee9c，代表内存块的地址，左侧的buckets，one，mask，n代表内存名称。由内存结构中可知，one=null，buckets存储的是one的地址。 2、插入函数

#define hmap_insert(HMAP, NODE, HASH) 
hmap_insert_at(HMAP, NODE, HASH, SOURCE_LOCATOR)
//插入hmap   
static inline void
hmap_insert_at(struct hmap *hmap, struct hmap_node *node, size_t hash,
const char *where)
{
hmap_insert_fast(hmap, node, hash); // 快速插入，在链表头插入。新节点作为链表头
    if (hmap->n / 2 > hmap->mask) {// if成立则需要调整hmap的大小，即hash桶buckets指针数组大小。 注：因为除2操作，保证链表个数最大为2。
hmap_expand_at(hmap, where);
    }
}   
//快速插入
static inline void
hmap_insert_fast(struct hmap *hmap, struct hmap_node *node, size_t hash)
{
struct hmap_node **bucket = &hmap->buckets[hash & hmap->mask]; // buckets是二级指针并且是指针数组，取下标为hash & hmap->mask数组元素的地址
    node->hash = hash;
    node->next = *bucket; // 调整链表
    *bucket = node; //调整链表
    hmap->n++;
}
// 扩充hmap大小
void
hmap_expand_at(struct hmap *hmap, const char *where)
{
    size_t new_mask = calc_mask(hmap->n); //重新生成mask
    if (new_mask > hmap->mask) {
        COVERAGE_INC(hmap_expand);
        resize(hmap, new_mask, where); // 以新的mask调整hmap
    }
}
static void
resize(struct hmap *hmap, size_t new_mask, const char *where)
{
    struct hmap tmp;
    size_t i;
    ovs_assert(is_pow2(new_mask + 1));
    //初始化临时hmap
    hmap_init(&tmp);
    if (new_mask) {
        // (sizeof(*tmp.buckets))*(new_mask + 1) 创建new_mask+1个大小数组，数组存储内容是指针(struct hmap_node *)
tmp.buckets = xmalloc(sizeof *tmp.buckets * (new_mask + 1))
        tmp.mask = new_mask;
        for (i = 0; i <= tmp.mask; i++) {
            tmp.buckets[i] = NULL;
        }
    }
//将已经存在hmap中元素迁移到临时hmap tmp中
    for (i = 0; i <= hmap->mask; i++) {
        struct hmap_node *node, *next;
        int count = 0;
        for (node = hmap->buckets[i]; node; node = next) {
            next = node->next;
            hmap_insert_fast(&tmp, node, node->hash);
            count++;
        }
        if (count > 5) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
            COVERAGE_INC(hmap_pathological);
            VLOG_DBG_RL(&rl, "%s: %d nodes in bucket (%"PRIuSIZE" nodes, %"PRIuSIZE" buckets)",
            where, count, hmap->n, hmap->mask + 1);
        }
    }
    //交换两个hmap的结构并且释放到临时hmap，其实在释放临时的tmp的时候其实释放的是hmap，因为交换了嘛！！！
    hmap_swap(hmap, &tmp);
    hmap_destroy(&tmp);
}

通过函数hmap_insert_fast，插入一个节点。

1）struct hmap_node **bucket = &hmap->buckets[hash & hmap->mask]; 获取数组存储单元的地址。由于此时mask=0，因此此处临时变量bucket的值是one的地址，即0xbfffee9c。

2）临时变量bucket进行*操作后，取值为0，即为NULL。

3）代码*bucket = node; 则将one置为0xbffff03c。

注：当mask=0时，临时变buckets保存的始终是成员变量one的地址。

当插入两条记录的时候，在链表头部进行插入并且修改one指向。在函数hmap_insert_at中执行完插入操作，又以一个判断，是否需要调整hmap的哈希桶的大小，如下：

if (hmap->n / 2 > hmap->mask) {
        hmap_expand_at(hmap, where);
    }

当插入完第二条数据之后，开始进行调整，即函数hmap_expand_at。

1）计算新的new_mask值。 2）调用函数resize进行调整。

此函数主体思想是：首先，申请一个临时变量，并创建new_mask+1个大小的数组(用于存储struct hmap_node *)。其次，然后现有的hmap中的节点链表一个一个的移动到临时tmp，并且以new_mask做映射。再次，将现有hmap与临时hmap进行替换，并且释放临时hmap。

最后内存存储结构如下图所示：

注意：这个地方需要注意下，这个两行代码，

hmap_swap(hmap, &tmp); //将hmap结构中对应数据进行交换
hmap_destroy(&tmp);// 这个释放其实原先hmap中内存。 但是首次buckets存储的内容是one的地址，因此不会调用free函数。

以上就是hmap核心内容，当然hmap中还有很多内容，但是都是辅助内容，这里不再介绍。具体介绍opevswitch数据结构可参考这篇文章OpenvSwitch数据结构解析。具体demo下载。