简述
最近研究redis hash中的ziplist数据结构,网上大部分文章都说如果value值变更不大(不超过254字节)则ziplist性能也不错。但是个人有个疑问:ziplist明明是使用了连续内存和去掉双指针来减少额外内存使用,所以如果只要value更新了,不会进行内存重新分配操作吗,如果涉及到内存分配,性能肯定收到影响的。所以带着疑问,我直接clone https://github.com/redis/redis.git
看了C的代码😀
ziplist提供的接口
#ifndef _ZIPLIST_H
#define _ZIPLIST_H
#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1
unsigned char *ziplistNew(void);
unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
unsigned char *ziplistIndex(unsigned char *zl, int index);
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);
unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num);
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
unsigned int ziplistLen(unsigned char *zl);
size_t ziplistBlobLen(unsigned char *zl);
void ziplistRepr(unsigned char *zl);
typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata);
int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
ziplistValidateEntryCB entry_cb, void *cb_userdata);
#ifdef REDIS_TEST
int ziplistTest(int argc, char *argv[]);
#endif
#endif /* _ZIPLIST_H */
可以看到,redis的ziplist是没有带有更新方法的。个人推测,hash是调用了ziplistFind,ziplistDelete和ziplistPush来进行更新操作。
入口:t_hash.c
void hsetCommand(client *c) {
int i, created = 0;
robj *o;
if ((c->argc % 2) == 1) {
addReplyErrorFormat(c, "wrong number of arguments for '%s' command", c->cmd->name);
return;
}
if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return;
hashTypeTryConversion(o, c->argv, 2, c->argc - 1);
for (i = 2; i < c->argc; i += 2)
/*重点这里,进行hash set*/
created += !hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, HASH_SET_COPY);
/* HMSET (deprecated) and HSET return value is different. */
char *cmdname = c->argv[0]->ptr;
if (cmdname[1] == 's' || cmdname[1] == 'S') {
/* HSET */
addReplyLongLong(c, created);
} else {
/* HMSET */
addReply(c, shared.ok);
}
signalModifiedKey(c, c->db, c->argv[1]);
notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id);
server.dirty += (c->argc - 2) / 2;
}
上面是hsetCommand代码,我们现在研究对hash进行set操作
hset操作
int hashTypeSet(robj *o, sds field, sds value, int flags) {
int update = 0;
if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl, *fptr, *vptr;
zl = o->ptr;
/* Returns an offset to use for iterating with ziplistNext. When the given
* index is negative, the list is traversed back to front. When the list
* doesn't contain an element at the provided index, NULL is returned.
* 返回ziplistNext进行迭代的偏移量*/
fptr = ziplistIndex(zl, ZIPLIST_HEAD);
if (fptr != NULL) {
/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
* between every comparison. Returns NULL when the field could not be found.
* 拿到指向想查询的数据的指针*/
fptr = ziplistFind(zl, fptr, (unsigned char *) field, sdslen(field), 1);
if (fptr != NULL) {
/* Grab pointer to the value (fptr points to the field) */
vptr = ziplistNext(zl, fptr);
serverAssert(vptr != NULL);
update = 1;
/* Delete value.每次del都会调用ziplistResize,重新分配内存 */
zl = ziplistDelete(zl, &vptr);
/* Insert new value.每次插入都会调用ziplistResize,重新分配内存 */
zl = ziplistInsert(zl, vptr, (unsigned char *) value,
sdslen(value));
}
}
if (!update) {
/* Push new field/value pair onto the tail of the ziplist
* 如果没有更新则插入ziplist的尾部.先插入field,再插入value,分两步 */
zl = ziplistPush(zl, (unsigned char *) field, sdslen(field),
ZIPLIST_TAIL);
zl = ziplistPush(zl, (unsigned char *) value, sdslen(value),
ZIPLIST_TAIL);
}
o->ptr = zl;
/* Check if the ziplist needs to be converted to a hash table
* 如果entry总数量大于redis设置的hash_max_ziplist_entries,则数据结构需要变更为hashTable*/
if (hashTypeLength(o) > server.hash_max_ziplist_entries)
hashTypeConvert(o, OBJ_ENCODING_HT);
} else if (o->encoding == OBJ_ENCODING_HT) {
/*下面是hashTable操作*/
/* 找到需要查询的数据的指针*/
dictEntry *de = dictFind(o->ptr, field);
/*如果存在,则释放指针,标记为需要更新*/
if (de) {
sdsfree(dictGetVal(de));
if (flags & HASH_SET_TAKE_VALUE) {
dictGetVal(de) = value;
value = NULL;
} else {
/*本来数据指向 new sds string*/
dictGetVal(de) = sdsdup(value);
}
update = 1;
} else {
sds f, v;
if (flags & HASH_SET_TAKE_FIELD) {
f = field;
field = NULL;
} else {
f = sdsdup(field);
}
if (flags & HASH_SET_TAKE_VALUE) {
v = value;
value = NULL;
} else {
v = sdsdup(value);
}
/*向目标的hash表添加元素*/
dictAdd(o->ptr, f, v);
}
} else {
serverPanic("Unknown hash encoding");
}
/* Free SDS strings we did not referenced elsewhere if the flags
* want this function to be responsible. */
if (flags & HASH_SET_TAKE_FIELD && field) sdsfree(field);
if (flags & HASH_SET_TAKE_VALUE && value) sdsfree(value);
return update;
}
以上是hset代码,代码中我把注释写好了。
总结
我看了网络上很多对ziplist分析的代码,其实大部分都说得对,只是没有在对内存重新分配上过多解释。ziplist每次delete/insert操作,都会对内存重新分配,而且对于hash使用ziplist来说,update一次性能其实很差的,进行多次内存分配,数据重排处理。所以个人认为,如果你是用hash的ziplist存储数据,是可以的,但是要注意:
数据不能频繁更新,插入以及删除
。特别对于更新,需要ziplistIndex,ziplistFind,ziplistDelete以及ziplistInsert,删除和插入都需要进行内存分配。如果只是进行插入,则直接插入到ziplist的尾部,先插入field,再插入value。
评论区