1 前言
SkipList由William Pugh在他的论文《Skip Lists: A Probabilistic Alternative to Balanced Trees》中提出,是一种可以用来代替平衡树的数据结构。它采取了概率平衡而不是严格强制的平衡,因此插入和删除的算法比平衡树的等效算法简单得多,速度也快得多。
1.1 空间开销
SkipList的底层是单项链表,区别在于每个链表节点有多层指向下个节点的指针,而链表的随机层数算法由常数p决定,因此,SkipList的额外空间开销与p相关。
根据随即层数算法,得出每个节点的平均层数为1/(1-p),则SkipList的总体空间开销即为n/(1-p)。
1.2 时间复杂度
SkipList的查找时间复杂度为O(log(n))。
2 Redis中的实现
在Redis中,SkipList主要作为Sorted Set的底层数据结构,它的实现几乎是William Pugh在论文中描述的原始算法的C语言版的翻译。除了对以下三个方面进行了修改:
- 该实现允许重复的分数(score)。
- 同时通过键(即score)和对应的元素进行比较
- 有一个向前指针,所以第一层是双向链表,以此允许从尾到头的遍历。
2.1 数据结构
/* 链表节点 */
typedef struct zskiplistNode {
sds ele;
double score;
struct zskiplistNode *backward; // 第一层的backward指针
struct zskiplistLevel {
struct zskiplistNode *forward; // 每一层的forward指针
unsigned long span; // 到下个节点的跨度
} level[];
} zskiplistNode;
/* 链表 */
typedef struct zskiplist {
struct zskiplistNode *header; // 哑头节点,初始化MAX_LEVEL层
struct zskiplistNode *tail; // 尾节点,初始化为null
unsigned long length;
int level;
} zskiplist;
2.2 SkipList核心API
2.2.1 初始化
/* 创建SkipList */
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;
zsl = zmalloc(sizeof(*zsl));
zsl->level = 1; // 初始化最高层数为1
zsl->length = 0; // 初始化长度为0
/* 头节点为哑节点,初始化MAX_LEVEL层 */
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
zsl->header->level[j].forward = NULL;
zsl->header->level[j].span = 0;
}
zsl->header->backward = NULL;
zsl->tail = NULL; // 尾节点初始化为null
return zsl;
}
2.2.2 添加
/* 随即层数算法 */
int zslRandomLevel(void) {
int level = 1;
while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
level += 1;
return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}
/* 插入节点(假设插入元素不存在) */
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL]; // 记录所插入节点的每一层的前驱节点
zskiplistNode *x;
unsigned int rank[ZSKIPLIST_MAXLEVEL]; // 记录所插入节点的每一层的前驱节点的跨度
int i, level;
serverAssert(!isnan(score));
x = zsl->header;
/* 每一层循环相当于走向down指针 */
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
/*
走向forward指针的条件:
1. forward节点不为空
2. forward节点的值小于插入值,或者等于但是元素小于插入元素
*/
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
sdscmp(x->level[i].forward->ele,ele) < 0)))
{
rank[i] += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x; // while循环结束时,x所在节点必为当前层所插入节点的前驱节点
}
/* we assume the element is not already inside, since we allow duplicated
* scores, reinserting the same element should never happen since the
* caller of zslInsert() should test in the hash table if the element is
* already inside or not. */
level = zslRandomLevel(); // 随机生成层数
/* 如果随机层数大于当前最高层数,适当更新 */
if (level > zsl->level) {
for (i = zsl->level; i < level; i++) {
rank[i] = 0;
update[i] = zsl->header;
update[i]->level[i].span = zsl->length;
}
zsl->level = level;
}
x = zslCreateNode(level,score,ele); // 创建节点
for (i = 0; i < level; i++) {
/* 插入节点 */
x->level[i].forward = update[i]->level[i].forward;
update[i]->level[i].forward = x;
/* update span covered by update[i] as x is inserted here */
x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
update[i]->level[i].span = (rank[0] - rank[i]) + 1;
}
/* increment span for untouched levels */
for (i = level; i < zsl->level; i++) {
update[i]->level[i].span++;
}
/* 更新第一层的backward指针 */
x->backward = (update[0] == zsl->header) ? NULL : update[0];
if (x->level[0].forward)
x->level[0].forward->backward = x;
else
zsl->tail = x;
zsl->length++; // 更新链表长度
return x;
}
2.2.3 删除
/* 删除节点
* Internal function used by zslDelete, zslDeleteRangeByScore and
* zslDeleteRangeByRank. */
void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
int i;
/*
每一层逐步删除节点的逻辑:
1. 如果前驱节点forward指针指向被删除节点,则更新forward指针和跨度
2. 否则则更新跨度。但为何会存在forward指针不指向被删除节点的情况呢?
*/
for (i = 0; i < zsl->level; i++) {
if (update[i]->level[i].forward == x) {
update[i]->level[i].span += x->level[i].span - 1;
update[i]->level[i].forward = x->level[i].forward;
} else {
update[i]->level[i].span -= 1;
}
}
/* 适当更新第一层的backward指针 */
if (x->level[0].forward) {
x->level[0].forward->backward = x->backward;
} else {
zsl->tail = x->backward;
}
/* 如果最高层为空了就缩减层数 */
while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL)
zsl->level--;
zsl->length--; // 更新链表长度
}
/* 删除元素
* Delete an element with matching score/element from the skiplist.
* The function returns 1 if the node was found and deleted, otherwise
* 0 is returned.
*
* If 'node' is NULL the deleted node is freed by zslFreeNode(), otherwise
* it is not freed (but just unlinked) and *node is set to the node pointer,
* so that it is possible for the caller to reuse the node (including the
* referenced SDS string at node->ele). */
int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
sdscmp(x->level[i].forward->ele,ele) < 0)))
{
x = x->level[i].forward;
}
update[i] = x;
}
/* We may have multiple elements with the same score, what we need
* is to find the element with both the right score and object. */
x = x->level[0].forward;
if (x && score == x->score && sdscmp(x->ele,ele) == 0) {
zslDeleteNode(zsl, x, update);
if (!node)
zslFreeNode(x);
else
*node = x;
return 1;
}
return 0; /* not found */
}
2.2.4 范围判断
/* Returns if there is a part of the zset is in range. */
int zslIsInRange(zskiplist *zsl, zrangespec *range) {
zskiplistNode *x;
/* Test for ranges that will always be empty. */
if (range->min > range->max ||
(range->min == range->max && (range->minex || range->maxex)))
return 0;
x = zsl->tail;
if (x == NULL || !zslValueGteMin(x->score,range)) // 尾元素是否小于最小值
return 0;
x = zsl->header->level[0].forward;
if (x == NULL || !zslValueLteMax(x->score,range)) // 首元素是否大于最大值
return 0;
return 1;
}
/* Find the first node that is contained in the specified range.
* Returns NULL when no element is contained in the range. */
zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInRange(zsl,range)) return NULL;
x = zsl->header;
/* 当小于最小值的时候前进 */
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *OUT* of range. */
while (x->level[i].forward &&
!zslValueGteMin(x->level[i].forward->score,range))
x = x->level[i].forward;
}
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
serverAssert(x != NULL);
/* Check if score <= max. */
if (!zslValueLteMax(x->score,range)) return NULL;
return x;
}
/* Find the last node that is contained in the specified range.
* Returns NULL when no element is contained in the range. */
zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInRange(zsl,range)) return NULL;
x = zsl->header;
/* 当小于最大值的时候前进 */
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *IN* range. */
while (x->level[i].forward &&
zslValueLteMax(x->level[i].forward->score,range))
x = x->level[i].forward;
}
/* This is an inner range, so this node cannot be NULL. */
serverAssert(x != NULL);
/* Check if score >= min. */
if (!zslValueGteMin(x->score,range)) return NULL;
return x;
}
2.2.5 范围删除
/* Delete all the elements with score between min and max from the skiplist.
* Min and max are inclusive, so a score >= min || score <= max is deleted.
* Note that this function takes the reference to the hash table view of the
* sorted set, in order to remove the elements from the hash table too. */
unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dict) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned long removed = 0;
int i;
/* 当小于最小值的时候,继续向前,同时记录每层的前驱节点,用于删除 */
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (range->minex ?
x->level[i].forward->score <= range->min :
x->level[i].forward->score < range->min))
x = x->level[i].forward;
update[i] = x;
}
/* Current node is the last with score < or <= min. */
x = x->level[0].forward;
/* Delete nodes while in range. */
while (x &&
(range->maxex ? x->score < range->max : x->score <= range->max))
{
/* 删除当前节点,释放内存,更新删除节点数,走向下一个节点 */
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
dictDelete(dict,x->ele);
zslFreeNode(x); /* Here is where x->ele is actually released. */
removed++;
x = next;
}
return removed;
}
/* Delete all the elements with rank between start and end from the skiplist.
* Start and end are inclusive. Note that start and end need to be 1-based */
unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned long traversed = 0, removed = 0;
int i;
x = zsl->header;
/* 当小于start的时候继续前进,记录前驱节点用于删除 */
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (traversed + x->level[i].span) < start) {
traversed += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}
traversed++;
x = x->level[0].forward;
/* 当小于end的时候边前进边删除,因为span是到下一个节点的跨度,所以删除对span的更新不影响后续节点 */
while (x && traversed <= end) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
dictDelete(dict,x->ele);
zslFreeNode(x);
removed++;
traversed++;
x = next;
}
return removed;
}
2.2.6 排名
/* Find the rank for an element by both score and key.
* Returns 0 when the element cannot be found, rank otherwise.
* Note that the rank is 1-based due to the span of zsl->header to the
* first element. */
unsigned long zslGetRank(zskiplist *zsl, double score, sds ele) {
zskiplistNode *x;
unsigned long rank = 0;
int i;
x = zsl->header;
/* 每层遍历的时候累加rank */
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
sdscmp(x->level[i].forward->ele,ele) <= 0))) {
rank += x->level[i].span;
x = x->level[i].forward;
}
/* x might be equal to zsl->header, so test if obj is non-NULL */
if (x->ele && sdscmp(x->ele,ele) == 0) {
return rank;
}
}
return 0;
}
/* Finds an element by its rank. The rank argument needs to be 1-based. */
zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
zskiplistNode *x;
unsigned long traversed = 0;
int i;
x = zsl->header;
/* 每层遍历的时候累加已经遍历的节点数traversed */
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (traversed + x->level[i].span) <= rank)
{
traversed += x->level[i].span;
x = x->level[i].forward;
}
if (traversed == rank) {
return x;
}
}
return NULL;
}
3 参考文献
[1] William Pugh, Skip Lists: A Probabilistic Alternative to Balanced Trees
https://15721.courses.cs.cmu....
[2] Redis-6.0.6 source codes
https://github.com/redis/redi...
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。