每天一个lodash方法-difference中的优化探索

这一篇，来探究下difference的优化尝试

涉及优化的部分代码逻辑

  if (comparator) {
   // #1
    includes = arrayIncludesWith;
    isCommon = false;
  }
  else if (values.length >= LARGE_ARRAY_SIZE) {
   // #2
    includes = cacheHas;
    isCommon = false;
    values = new SetCache(values);
  }
  outer:
  while (++index < length) {
    var value = array[index],
        computed = iteratee ? iteratee(value) : value;

    value = (comparator || value !== 0) ? value : 0;
    ...省略
    else if (!includes(values, computed, comparator)) { //如果在缓存里有相关，就移除，否则就push
      result.push(value);
    }
  }
  return result;

comparator优化部分

difference的api虽然不需要传入comparator函数，只需要传入array和values.但是_.differenceWith是基于difference的实现，

_.differenceWith(array, [values], [comparator])

这个方法类似_.difference ，除了它接受一个 comparator.它调用比较array，values中的元素。结果值是从第一数组中选择。comparator 调用参数有两个：(arrVal, othVal)。

//demo
var objects = [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }];
 
_.differenceWith(objects, [{ 'x': 1, 'y': 2 }], _.isEqual);
// => [{ 'x': 2, 'y': 1 }]

arrayIncludes

/**
 *  多了个校验器。
 * @private
 * @param {Array} [array] The array to inspect. // 被调用检查的数组
 * @param {*} target The value to search for.  // 
 * @param {Function} comparator The comparator invoked per element.// comparator调用每个值
 * @returns {boolean} Returns `true` if `target` is found, else `false`. // 返回true或者false
 */
function arrayIncludesWith(array, value, comparator) {
  var index = -1,
      length = array ? array.length : 0;

  while (++index < length) {
    if (comparator(value, array[index])) {
      return true;
    }
  }
  return false;
}

而在baseDifference中，includes(values, computed, comparator)是非正常模式校验的核心代码，此时includes就是arrayIncludesWith,这个校验器要接收两个参数(values,computed)，其实就是判断这两个值是否符合比较器的条件。

缓存优化部分

SetCache部分

// 设置缓存 SetCache.__data__ => MapCache => Mapcache.__data__用来存放数据
function SetCache(values) {
  var index = -1, //用于遍历
      length = values ? values.length : 0; //遍历的临界值
      
  this.__data__ = new MapCache;// 等同于new MapCache() 作者真的强迫症啊。
  while (++index < length) {
    this.add(values[index]);// 这行往下看add方法,this.__data__.set(value, HASH_UNDEFINED);=> MapCacheObj.set(value, HASH_UNDEFINED)

  }
}
// this.__data__ 用来存放数据，因为指向了MapCache实例，

看下边的代码，this.add等同于this.__data__.set(value, HASH_UNDEFINED),而SetCache下的this.__data__指向的是一个MapCache实例。

思考，此处为何再抽象出一个MapCache类

SetCache.prototype.add = SetCache.prototype.push = setCacheAdd;
SetCache.prototype.has = setCacheHas;
function setCacheAdd(value) {
     //var HASH_UNDEFINED = '__lodash_hash_undefined__';=>用于替代为定义的hash值

  this.__data__.set(value, HASH_UNDEFINED); //当前的`this.__data__`指向的一个MapCache实例
  return this;
}

MapCache部分

(同样处理了代码结构,看着舒服点)
MapCache类,map可以理解为一组key,value的映射。es6中也引入了Map类型。lodash的作者实现了一个MapCache类。

//Creates a map cache object to store key-value pairs.创建一个map类型的缓存对象，用来存储键值对。
function MapCache(entries) {
  var index = -1,
      length = entries ? entries.length : 0;

  this.clear();
  while (++index < length) {
    var entry = entries[index];
    this.set(entry[0], entry[1]);
  }
}

MapCache.prototype.set = function mapCacheSet(key, value) { //value =>HASH_UNDEFINED  字符串
  getMapData(this, key).set(key, value);// this.__data__.['hash'] = value
  return this; // 返回this
}


MapCache.prototype.delete = function mapCacheDelete(key) {
  return getMapData(this, key)['delete'](key);
}


MapCache.prototype.get = function mapCacheGet(key) {
  return getMapData(this, key).get(key);
}


MapCache.prototype.has = function mapCacheHas(key) {
  return getMapData(this, key).has(key);
}

MapCache.prototype.clear = function mapCacheClear() {
  this.__data__ = {
    'hash': new Hash,
    'map': new (Map || ListCache),// 这里是兼容es6的map，
    'string': new Hash
  };
}

new MapCache，MapChe的实例结构如下,是由初始化阶段的this.clear()实现的

{
    __data__:{
        hash: {
            __data__:{
            }
        },
        map:{
            __data__:{}
        },
        string:{
            __data__:{
            }
        }
    }
}

lodash中的缓存策略存在3种类型，hash,map,string,对于map类型，使用es6的Map,不存在便使用自实现的ListCache.而决定于用哪种方式缓存，取决于isKeyable

/**
 * Gets the data for `map`. 返回对应缓存的实例。
 *
 */
function getMapData(map, key) {
  var data = map.__data__;
  return isKeyable(key)
    ? data[typeof key == 'string' ? 'string' : 'hash'] // hash
    : data.map; // map类型缓存
}

从这个函数来上边来看，isKeyable的作用是用来判断是否采用hash缓存的模式。

//是否采用hash模式
function isKeyable(value) {
  var type = typeof value;
  return (type == 'string' || type == 'number' || type == 'symbol' || type == 'boolean')
    ? (value !== '__proto__')
    : (value === null);
}

isKeyable的判断typeof value的返回值，

(type == 'string' || type == 'number' || type == 'symbol' || type == 'boolean'),只要不是上边的几种类型
- value !== '__proto__'，不等于就返回true，采用hash，
这里很好理解，因为typeof null == Object 这里只要不是null类型，都采用Map

getMapData，会想上文一个Mapcache的实例结构，getMapData会根据传入isKeyable(value)的返回

 {
            [hash |string | map]:__data__:{}
 }

接下来我们会发现，无论是Hash还是ListCache或者Map对外都维护了统一的接口。拿Hash为例

Hash源码部分

原来的代码有些难看，我适当调整了一下

Hash
- delete
- get
- has
- set
- clear

function Hash(entries) {
  var index = -1,
      length = entries ? entries.length : 0;

  this.clear();
  while (++index < length) {
    var entry = entries[index];
    this.set(entry[0], entry[1]);
  }
}


Hash.prototype.clear = function hashClear() {
  this.__data__ = nativeCreate ? nativeCreate(null) : {};//Object.create(null)
}


Hash.prototype.delete = function hashDelete(key) {
  return this.has(key) && delete this.__data__[key];// 移除某个key的值  
}


Hash.prototype.get = function hashGet(key) {
  var data = this.__data__;
  if (nativeCreate) {
    var result = data[key];
    return result === HASH_UNDEFINED ? undefined : result;
  }
  return hasOwnProperty.call(data, key) ? data[key] : undefined; //获取对应key的值，否则为undefined
}


Hash.prototype..has = function hashHas(key) {
  var data = this.__data__;
  return nativeCreate ? data[key] !== undefined : hasOwnProperty.call(data, key); // 
}


Hash.prototype.set = function hashSet(key, value) {
  var data = this.__data__;
  data[key] = (nativeCreate && value === undefined) ? HASH_UNDEFINED : value;
  return this;
}

Hash的代码中为何会有两种判断值存在的方式，如下

这是因为Object.create(null)创建了一个没有任何属性的真正的空对象。

而hasOwnProperty方法会返回一个布尔值，指示对象自身属性中是否具有指定的属性,忽略原型链上的对象


nativeCreate ? data[key] !== undefined : hasOwnProperty.call(data, key);

ListCache部分

function ListCache(entries) {
  var index = -1,
      length = entries ? entries.length : 0;

  this.clear();
  while (++index < length) {
    var entry = entries[index];
    this.set(entry[0], entry[1]);
  }
}


ListCache.prototype.clear = function listCacheClear() {
  this.__data__ = [];
}


ListCache.prototype['delete'] =function listCacheDelete(key) {
  var data = this.__data__,
      index = assocIndexOf(data, key);

  if (index < 0) {
    return false;
  }
  var lastIndex = data.length - 1;
  if (index == lastIndex) {
    data.pop();
  } else {
    splice.call(data, index, 1);
  }
  return true;
}


ListCache.prototype.get = function listCacheGet(key) {
  var data = this.__data__,
      index = assocIndexOf(data, key);

  return index < 0 ? undefined : data[index][1];
}


ListCache.prototype.has =function listCacheHas(key) {
  return assocIndexOf(this.__data__, key) > -1;
}


ListCache.prototype.set =function listCacheSet(key, value) {
  var data = this.__data__,
      index = assocIndexOf(data, key);

  if (index < 0) {
    data.push([key, value]);
  } else {
    data[index][1] = value;
  }
  return this;
}