实现 O(n) 时间的高斯模糊算法

访问我的个人博客网站，获取更好的阅读体验

本文主要是针对高斯模糊算法进行优化，最后在线性时间内实现高斯模糊效果。当然，该算法并非本人原创，实现过程中也借鉴了一些文章和论文，相关链接都在文末贴出

搭配阅读，我写了一个简单的 Demo， Demo 链接， Demo 代码地址，可以在 Demo 中测试各种模糊效果及其耗时

高斯模糊

高斯模糊可以实现非常平滑的模糊效果，对于每一个像素，它会取它本身及其周围一定范围内的像素，去计算模糊后的效果，当然，每个像素在计算过程中所占的权重并不相同，距离其本身越近，权重越高 —— 这当然是合理的。权重的计算公式如下：

公式中的 σ(sigma) 是人为定值的，值越大，生成的图像越模糊

当然，并非所有像素都会互相影响，以当前像素为圆心，取一个半径，在这个半径以内的像素才会影响该像素的模糊效果。这个半径与 sigma 成正比，不过没有统一的公式，在这个提问下，有回答说 NVidia 采用的是 radius=sigma*3 的计算方式，我在这也使用了这个公式

下面是其基本实现，时间复杂度为 O (nr^2)，n 为图片大小

function gaussianBlur(src, dest, width, height, sigma) {
  const radius = Math.round(sigma * 3) // kernel size
  for (let i = 0; i < width; i++) {
    for (let j = 0; j < height; j++) {
      let accumulation = 0
      let weightSum = 0
      for (let dx = -radius; dx <= radius; dx++) {
        for (let dy = -radius; dy <= radius; dy++) {
          const x = Math.min(width - 1, Math.max(0, i + dx))
          const y = Math.min(height - 1, Math.max(0, j + dy))
          // calc weight
          const weight =
            Math.exp(
              -(Math.pow(dx, 2) + Math.pow(dy, 2)) / (2 * Math.pow(sigma, 2))
            ) /
            (Math.PI * 2 * Math.pow(sigma, 2))
          accumulation += src[y * width + x] * weight
          weightSum += weight
        }
      }
      dest[j * width + i] = Math.round(accumulation / weightSum)
    }
  }
}

效果如下，也可以去 Demo 中尝试 (高斯模糊可能耗时较久，耐心等待吧)

原图：

高斯模糊后 (sigma=5)，5411ms (MBP, 13-inch, 2017, 8G)：

方框模糊

最简单的方框模糊 (box blur) 没有权重，只要在半径内，权重都是相同的

function simpleBoxBlur(src, dest, width, height, radius) {
  for (let i = 0; i < width; i++) {
    for (let j = 0; j < height; j++) {
      let accumulation = 0
      for (let dx = -radius; dx <= radius; dx++) {
        for (let dy = -radius; dy <= radius; dy++) {
          const x = Math.min(width - 1, Math.max(0, i + dx))
          const y = Math.min(height - 1, Math.max(0, j + dy))
          accumulation += src[y * width + x]
        }
      }
      dest[j * width + i] = Math.round(
        accumulation / Math.pow(2 * radius + 1, 2)
      )
    }
  }
}

radius=15 的效果 (775ms)：
简单方框模糊

可以看出，虽然耗时较高斯模糊短，但其效果是很不平滑的。那有没有办法让方框模糊拥有高斯模糊一样的品质呢，这篇论文提出了方案

根据论文中所述，可以通过多次的方框模糊实现高斯模糊，而这多次方框模糊的核由以下步骤得出：

由下方公式 (n 为模糊次数) 计算得出 wideal，再计算 wl 和 wu，wl 为第一个小于等于 wideal 的奇数，wu 是第一个大于等于 wideal 的奇数 (很明显 wu = wl+2)(要求奇数的原因是 size=radius*2+1)
计算 m：
前 m 次用 wl 作核的大小，其余的 n-m 次用 wu 作核的大小

化作代码：

function genKernelsForGaussian(sigma, n) {
  const wIdeal = Math.sqrt((12 * Math.pow(sigma, 2)) / n + 1)
  let wl = Math.floor(wIdeal)
  if (wl % 2 === 0) {
    wl--
  }
  const wu = wl + 2
  let m =
    (12 * Math.pow(sigma, 2) - n * Math.pow(wl, 2) - 4 * n * wl - 3 * n) /
    (-4 * wl - 4)
  m = Math.round(m)
  const sizes = []
  for (let i = 0; i < n; i++) {
    sizes.push(i < m ? wl : wu)
  }
  return sizes
}

function boxBlur(src, dest, width, height, sigma) {
  const kernels = genKernelsForGaussian(sigma, 3)
  // radius * 2 + 1 = kernel size
  simpleBoxBlur(src, dest, width, height, (kernels[0] - 1) / 2)
  // 注意这里要颠倒 src 和 dest 的顺序
  simpleBoxBlur(dest, src, width, height, (kernels[1] - 1) / 2)
  simpleBoxBlur(src, dest, width, height, (kernels[2] - 1) / 2)
}

效果和高斯模糊基本一致 (227ms)：
box
虽然时间复杂度与高斯模糊相同，但得到的半径更小，在 sigma=5，n=3 的情况下，radius=[4, 4, 5]，高斯模糊则是 15；同时还不用计算复杂的 weight，所以从实际结果上看，速度要优于比高斯模糊

水平 & 垂直模糊

在这篇文章中提到方框模糊可以用水平模糊 + 垂直模糊替代实现。要实现水平 / 垂直模糊很简单，只需考虑水平 / 垂直线上的像素即可：

// horizontal motion blur
function hMotionBlur(src, dest, width, height, radius) {
  for (let i = 0; i < width; i++) {
    for (let j = 0; j < height; j++) {
      let accumulation = 0
      for (let dx = -radius; dx <= radius; dx++) {
        const x = Math.min(width - 1, Math.max(0, i + dx))
        accumulation += src[j * width + x]
      }
      dest[j * width + i] = Math.round(accumulation / (2 * radius + 1))
    }
  }
}

// vertical motion blur
function vMotionBlur(src, dest, width, height, radius) {
  for (let i = 0; i < width; i++) {
    for (let j = 0; j < height; j++) {
      let accumulation = 0
      for (let dy = -radius; dy <= radius; dy++) {
        const y = Math.min(height - 1, Math.max(0, j + dy))
        accumulation += src[y * width + i]
      }
      dest[j * width + i] = Math.round(accumulation / (2 * radius + 1))
    }
  }
}

应用此优化的模糊算法，时间复杂度可以优化到 O (nr)：

function _mutantBoxBlur(src, dest, width, height, radius) {
  hMotionBlur(dest, src, width, height, radius)
  vMotionBlur(src, dest, width, height, radius)
}

function mutantBoxBlur(src, dest, width, height, sigma) {
  const boxes = genKernelsForGaussian(sigma, 3)
  for (let i = 0; i < src.length; i++) {
    dest[i] = src[i]
  }
  _mutantBoxBlur(src, dest, width, height, (boxes[0] - 1) / 2)
  _mutantBoxBlur(src, dest, width, height, (boxes[1] - 1) / 2)
  _mutantBoxBlur(src, dest, width, height, (boxes[2] - 1) / 2)
}

效果如下 (82ms)：
fast

终极优化

以水平模糊为例，相邻两个像素点计算公式（这里为了方便理解，使用了二位数组，实际计算中是一唯的；并且没有考虑边界情况）：

公式3

公式4

可以看出，两个公式的差别很小：desti 的公式多了一个 srci，desti 的多了一个 srci

那么就有
公式5

所以，在计算模糊像素时，不必再遍历半径 r 内的像素，而是直接与上一像素关联，节省了 O (r) 的时间复杂度。可以借此优化将时间复杂度由 O (nr) 降到 O (n)，最终的代码:

// horizontal fast motion blur
function hFastMotionBlur(src, dest, width, height, radius) {
  for (let i = 0; i < height; i++) {
    let accumulation = radius * src[i * width]
    for (let j = 0; j <= radius; j++) {
      accumulation += src[i * width + j]
    }
    dest[i * width] = Math.round(accumulation / (2 * radius + 1))
    for (let j = 1; j < width; j++) {
      const left = Math.max(0, j - radius - 1)
      const right = Math.min(width - 1, j + radius)
      accumulation =
        accumulation + (src[i * width + right] - src[i * width + left])
      dest[i * width + j] = Math.round(accumulation / (2 * radius + 1))
    }
  }
}

// vertical fast motion blur
function vFastMotionBlur(src, dest, width, height, radius) {
  for (let i = 0; i < width; i++) {
    let accumulation = radius * src[i]
    for (let j = 0; j <= radius; j++) {
      accumulation += src[j * width + i]
    }
    dest[i] = Math.round(accumulation / (2 * radius + 1))
    for (let j = 1; j < height; j++) {
      const top = Math.max(0, j - radius - 1)
      const bottom = Math.min(height - 1, j + radius)
      accumulation =
        accumulation + src[bottom * width + i] - src[top * width + i]
      dest[j * width + i] = Math.round(accumulation / (2 * radius + 1))
    }
  }
}

function _fastBlur(src, dest, width, height, radius) {
  hFastMotionBlur(dest, src, width, height, radius)
  vFastMotionBlur(src, dest, width, height, radius)
}

function fastBlur(src, dest, width, height, sigma) {
  const boxes = genKernelsForGaussian(sigma, 3)
  for (let i = 0; i < src.length; i++) {
    dest[i] = src[i]
  }
  _fastBlur(src, dest, width, height, (boxes[0] - 1) / 2)
  _fastBlur(src, dest, width, height, (boxes[1] - 1) / 2)
  _fastBlur(src, dest, width, height, (boxes[2] - 1) / 2)
}

效果 (20ms)：
fast

总结

先后通过多次方框模糊、水平 + 垂直模糊代替方框模糊、对水平 / 垂直模糊计算过程的优化，逐步将高斯模糊的耗时降低，最后得到了只与图片大小相关的 O (n) 时间复杂度算法

实现 O(n) 时间的高斯模糊算法

高斯模糊

方框模糊

水平 & 垂直模糊

终极优化

总结

参考

Bowen

引用和评论

使用 regex-vis.com 可视化正则表达式

2025年最新反编译微信小程序的教程及工具

手写一个动态海洋和天空效果的vue hooks

你可能不知道的图片加载相关知识

原生JS大揭秘—JS代码执行原理解刨

原生electron起步-从零到一完成构建和打包

LRU算法，你别跑，我就要吃透你