顺风车运营研发团队 熊浩含
sdiff
SDIFF key [key ...]
返回一个集合的全部成员,该集合是所有给定集合之间的差集。
准确来说,是返回第一个集合与其它集合并集的差集,即有SDIFF A B1 B2 B3...Bn,最终返回A-(B1∪B2∪...∪Bn)
不存在的 key 被视为空集。
原理
M(A 集合元素个数)
N(其余集合元素总数)
n(集合总数)
算法一
将A集合元素在B1...Bn集合间一一查找,将查找不到的元素加入结果集。复杂度=M*n
算法二
先将A集合加入结果集,再将B1...Bn集合中的所有元素在结果集中一一查找,将查到的元素从结果集中删除。复杂度=N
在算法选择时,大的逻辑是:如果A集合比较小,遍历A更划算;反之遍历其余集合更划算
源码
void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,robj *dstkey, int op) {
robj **sets = zmalloc(sizeof(robj*)*setnum);//sets是集合指针的指针,setnum是集合总数
setTypeIterator *si;//迭代器
robj *dstset = NULL;//目标集合,最终返回
sds ele;
int j, cardinality = 0;//临时变量
int diff_algo = 1;//算法标识
for (j = 0; j < setnum; j++) {//循环keys,把取到的set放入sets中
robj *setobj = dstkey ?
lookupKeyWrite(c->db,setkeys[j]) :
lookupKeyRead(c->db,setkeys[j]);
if (!setobj) {//取出set,如果key不存在,跳过
sets[j] = NULL;
continue;
}
if (checkType(c,setobj,OBJ_SET)) {//校验obj的类型,如果不为OBG_SET,错误,直接返回
zfree(sets);
return;
}
sets[j] = setobj;//把取到的set放入sets中
}
/* Select what DIFF algorithm to use.
*
* Algorithm 1 is O(N*M) where N is the size of the element first set
* and M the total number of sets.
*
* Algorithm 2 is O(N) where N is the total number of elements in all
* the sets.
*
* We compute what is the best bet with the current input here. */
if (op == SET_OP_DIFF && sets[0]) {
long long algo_one_work = 0, algo_two_work = 0;
for (j = 0; j < setnum; j++) {
if (sets[j] == NULL) continue;
algo_one_work += setTypeSize(sets[0]);//待求差集的目标集合元素数量*总集合数
algo_two_work += setTypeSize(sets[j]);//所有集合中的元素数量总数
}
/* Algorithm 1 has better constant times and performs less operations
* if there are elements in common. Give it some advantage. */
algo_one_work /= 2;
diff_algo = (algo_one_work <= algo_two_work) ? 1 : 2;
if (diff_algo == 1 && setnum > 1) {
/* 使用的是算法 1 ,对 sets[0] 以外的其他集合进行排序,这样有助于优化算法的性能?*/
qsort(sets+1,setnum-1,sizeof(robj*),
qsortCompareSetsByRevCardinality);
}
}
/* 使用一个临时集合来保存结果集,如果程序执行的是 SUNIONSTORE 命令, 那么这个结果将会成为将来的集合值对象。*/
dstset = createIntsetObject();
if (op == SET_OP_UNION) {
/* 遍历所有集合,将元素添加到结果集里就可以了*/
for (j = 0; j < setnum; j++) {
if (!sets[j]) continue; /* non existing keys are like empty sets */
si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
if (setTypeAdd(dstset,ele)) cardinality++;
sdsfree(ele);
}
setTypeReleaseIterator(si);
}
} else if (op == SET_OP_DIFF && sets[0] && diff_algo == 1) {
/* DIFF Algorithm 1:
*程序遍历 sets[0] 集合中的所有元素,并将这个元素和其他集合的所有元素进行对比,只有这个元素不存在于其他所有集合时,才将这个元素添加到结果集。
* N*M 操作, N是第一个集合的元素个数,M是其余集合的数量
*/
si = setTypeInitIterator(sets[0]);
while((ele = setTypeNextObject(si)) != NULL) {
for (j = 1; j < setnum; j++) {
if (!sets[j]) continue; /* no key is an empty set. */
if (sets[j] == sets[0]) break; /* same set! */
if (setTypeIsMember(sets[j],ele)) break;
}
if (j == setnum) {
/* There is no other set with this element. Add it. */
setTypeAdd(dstset,ele);
cardinality++;
}
sdsfree(ele);
}
setTypeReleaseIterator(si);
} else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) {
/* DIFF Algorithm 2:
* 将 sets[0] 的所有元素都添加到结果集中,然后遍历其他所有集合,将相同的元素从结果集中删除。
* 算法复杂度为 O(N) ,N 为所有集合的基数之和。
*/
for (j = 0; j < setnum; j++) {
if (!sets[j]) continue; /* non existing keys are like empty sets */
si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
if (j == 0) {
if (setTypeAdd(dstset,ele)) cardinality++;
} else {
if (setTypeRemove(dstset,ele)) cardinality--;
}
sdsfree(ele);
}
setTypeReleaseIterator(si);
/* Exit if result set is empty as any additional removal
* of elements will have no effect. */
if (cardinality == 0) break;
}
}
/* Output the content of the resulting set, if not in STORE mode */
if (!dstkey) {
addReplyMultiBulkLen(c,cardinality);
si = setTypeInitIterator(dstset);
while((ele = setTypeNextObject(si)) != NULL) {
addReplyBulkCBuffer(c,ele,sdslen(ele));
sdsfree(ele);
}
setTypeReleaseIterator(si);
decrRefCount(dstset);
} else {
/* If we have a target key where to store the resulting set
* create this key with the result set inside */
int deleted = dbDelete(c->db,dstkey);
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(NOTIFY_SET,
op == SET_OP_UNION ? "sunionstore" : "sdiffstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
server.dirty++;
}
zfree(sets);
}
这里有个疑问,为啥在算法一中,redis将B1...Bn集合按元素个数从大到小排序?一种解释是:查询A中元素是否在其余集合中时,大集合更容易命中,命中后就不用遍历其余集合了,提升了效率。
但由于每次dictfind都是o(1)的复杂度,这样排一下序效率提升很有限,而且排序本身也有额外的花销,感觉没有必要。
sunion
SUNION key [key ...]
返回一个集合的全部成员,该集合是所有给定集合的并集。
不存在的 key 被视为空集。
原理
将所有集合依次插入结果集,插一遍自然去重,返回结果集即可
源码
sunion底层调用的方法和sdiff一样,详见sdiff。
sinter
SINTER key [key ...]
返回一个集合的全部成员,该集合是所有给定集合的交集。
不存在的 key 被视为空集。
当给定集合当中有一个空集时,结果也为空集(根据集合运算定律)。
原理
找到元素个数最小的集合A
将A中元素一一遍历其它集合,若其它集合都包含该元素,保留该元素
源码
void sinterGenericCommand(client *c, robj **setkeys,unsigned long setnum, robj *dstkey) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *dstset = NULL;
sds elesds;
int64_t intobj;
void *replylen = NULL;
unsigned long j, cardinality = 0;
int encoding;
for (j = 0; j < setnum; j++) {
robj *setobj = dstkey ?
lookupKeyWrite(c->db,setkeys[j]) :
lookupKeyRead(c->db,setkeys[j]);
if (!setobj) {
zfree(sets);
if (dstkey) {
if (dbDelete(c->db,dstkey)) {
signalModifiedKey(c->db,dstkey);
server.dirty++;
}
addReply(c,shared.czero);
} else {
addReply(c,shared.emptymultibulk);
}
return;
}
if (checkType(c,setobj,OBJ_SET)) {
zfree(sets);
return;
}
sets[j] = setobj;
}
/* Sort sets from the smallest to largest, this will improve our
* algorithm's performance */
qsort(sets,setnum,sizeof(robj*),qsortCompareSetsByCardinality);
/* The first thing we should output is the total number of elements...
* since this is a multi-bulk write, but at this stage we don't know
* the intersection set size, so we use a trick, append an empty object
* to the output list and save the pointer to later modify it with the
* right length */
if (!dstkey) {
replylen = addDeferredMultiBulkLength(c);
} else {
/* If we have a target key where to store the resulting set
* create this key with an empty set inside */
dstset = createIntsetObject();
}
/* Iterate all the elements of the first (smallest) set, and test
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
/*遍历第一个集合A,并在其它集合中查询A中的所有元素,对于A中的元素,只要有一个集合中不存在该元素,删去该元素*/
si = setTypeInitIterator(sets[0]);
while((encoding = setTypeNext(si,&elesds,&intobj)) != -1) {
for (j = 1; j < setnum; j++) {
if (sets[j] == sets[0]) continue;
if (encoding == OBJ_ENCODING_INTSET) {
/* intset with intset is simple... and fast */
if (sets[j]->encoding == OBJ_ENCODING_INTSET &&
!intsetFind((intset*)sets[j]->ptr,intobj))
{
break;
/* in order to compare an integer with an object we
* have to use the generic function, creating an object
* for this */
} else if (sets[j]->encoding == OBJ_ENCODING_HT) {
elesds = sdsfromlonglong(intobj);
if (!setTypeIsMember(sets[j],elesds)) {
sdsfree(elesds);
break;
}
sdsfree(elesds);
}
} else if (encoding == OBJ_ENCODING_HT) {
if (!setTypeIsMember(sets[j],elesds)) {
break;
}
}
}
/* Only take action when all sets contain the member */
if (j == setnum) {
if (!dstkey) {
if (encoding == OBJ_ENCODING_HT)
addReplyBulkCBuffer(c,elesds,sdslen(elesds));
else
addReplyBulkLongLong(c,intobj);
cardinality++;
} else {
if (encoding == OBJ_ENCODING_INTSET) {
elesds = sdsfromlonglong(intobj);
setTypeAdd(dstset,elesds);
sdsfree(elesds);
} else {
setTypeAdd(dstset,elesds);
}
}
}
}
setTypeReleaseIterator(si);
if (dstkey) {
/* Store the resulting set into the target, if the intersection
* is not an empty set. */
int deleted = dbDelete(c->db,dstkey);
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
server.dirty++;
} else {
setDeferredMultiBulkLength(c,replylen,cardinality);
}
zfree(sets);
}
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。