主要应用场景

content-diff主要配合tiptap富文本内容渲染custom-editor-render进行对比差异

需要支持的外部库:

"similarity":"^1.2.1",

"hex-rgb":"^5.0.0",

"lodash-es":"^4.17.21",

整体demo如下:

<script setup lang="ts">
import { diff, DiffPatches } from './html-diff'
import {
  createEmptyDom,
  handleAddElement,
  handleChangeAttribute,
  handleRemoveElement,
  handleReplaceElement,
  handleTextAdded,
  handleTextModified,
  handleTextRemoved,
  formatMixedDom,
  handleChangeTag,
} from '.'

const props = withDefaults(
  defineProps<{
    originContent: string
    changedContent: string
    /**
     * 右上角展示颜色说明
     */
    showFlag?: boolean
  }>(),
  { showFlag: true },
)

const { originContent, changedContent, showFlag } = toRefs(props)

const diffContent = computed(() => {
  const $origin = createEmptyDom('div')
  $origin.innerHTML = originContent.value
  const $changed = createEmptyDom('div')
  $changed.innerHTML = changedContent.value
  console.time('diff compare')
  const diffs = diff($origin, $changed)
  console.timeEnd('diff compare')
  const unhandledDiffs: DiffPatches[] = []

  for (const diff of diffs) {
    switch (diff.action) {
      case 'addElement':
        handleAddElement(diff)
        break
      case 'addAttribute':
      case 'modifyAttribute':
      case 'removeAttribute':
        handleChangeAttribute(diff)
        break
      case 'tagChanged':
        handleChangeTag(diff)
        break
      case 'removeElement':
        handleRemoveElement(diff)
        break
      case 'removeTextElement':
        handleTextRemoved(diff)
        break

      case 'replaceElement':
      case 'addTextElement':
      case 'modifyTextElement':
        // 会替换当前节点,需要后处理
        unhandledDiffs.push(diff)
        break

      default:
        console.info('Unknown diff type: ', diff)
    }
  }

  unhandledDiffs.forEach((diff) => {
    switch (diff.action) {
      // 会替换当前节点
      case 'replaceElement':
        handleReplaceElement(diff)
        break
      // 会替换当前节点
      case 'addTextElement':
        handleTextAdded(diff)
        break
      // 可能会替换当前节点
      case 'modifyTextElement':
        handleTextModified(diff)
        break
    }
  })

  formatMixedDom($changed)
  return $changed.innerHTML
})
</script>

<template>
  <div class="wrapper">
    <div v-if="showFlag" class="diff-content diff-desc">
      <span>标识:</span>
      <ins>该行被添加。</ins>  <del>该行被删除。</del> 
      <span data-diff-change>格式已经改变。</span>
    </div>
    <y-editor-renderer :enable-fixed-header="false" :content="diffContent" class="diff-content">
    </y-editor-renderer>
  </div>
</template>

<style lang="scss" scoped>
.wrapper {
  position: relative;
}

.diff-desc {
  position: absolute;
  top: -23px;
  right: 0;

  * {
    display: inline-block;
    font-size: 12px;
    padding: 1px 5px !important;
  }
}

.diff-content {
  // 处理有换行符的场景
  white-space: inherit;

  :deep(del),
  :deep(del table tr),
  :deep(del table td),
  :deep(*[data-del]) {
    background-color: #ffe7e7 !important;
  }

  :deep(ins),
  :deep(ins table tr),
  :deep(ins table td),
  :deep(*[data-ins]) {
    background-color: #ddfade !important;
    text-decoration: none;
  }

  :deep(img[data-del]) {
    border: 8px solid #ffe7e7;
    box-sizing: content-box;
  }

  :deep(img[data-ins]) {
    border: 8px solid #ddfade;
    box-sizing: content-box;
  }

  :deep(*[data-diff-change]) {
    background-color: #d6f0ff !important;
  }

  :deep(ins),
  :deep(del),
  :deep(*[data-diff-change]) {
    padding: 0 3px;
  }
}
</style>

相关函数抽离

index.ts


import { Change, diffChars, diffWordsWithSpace } from 'diff'
import { DiffPatches, NodeTypes } from './html-diff'

const ENGLISH_WORDS_REG = /^[a-z0-9,.?'"_()]+$/i

export const createEmptyDom = (tag: string) => {
  return document.createElement(tag)
}

export const handleTextAdded = (diff: DiffPatches & { action: 'addTextElement' }) => {
  const $ins = createEmptyDom('ins')
  $ins.innerHTML = diff.element.nodeValue || ''
  diff.element.replaceWith($ins)
}

export const handleTextRemoved = (diff: DiffPatches & { action: 'removeTextElement' }) => {
  const $deleted = createEmptyDom('del')
  $deleted.innerText = diff.element.nodeValue || ''
  $deleted.dataset.del = 'remove-text'

  if (diff.after) {
    diff.after.parentNode?.insertBefore($deleted, diff.after.nextSibling)
  } else if (diff.before) {
    diff.before.parentNode?.insertBefore($deleted, diff.before)
  } else if (diff.in) {
    diff.in.appendChild($deleted)
  } else {
    console.log('Removed text lost: ', diff.element.textContent)
  }
}

const getTextDiff = (oldText: string, newText: string) => {
  const compareAlgorithm = ENGLISH_WORDS_REG.test(oldText) ? diffWordsWithSpace : diffChars
  return compareAlgorithm(oldText, newText)
}

const isComplexParagraph = (patches: Change[]) => {
  const meta = { removeCount: 0, addCount: 0, removeIndex: 0, addIndex: 0 }
  patches.forEach((patch, index) => {
    if (patch.added) {
      ++meta.addCount
      meta.addIndex = index
    } else if (patch.removed) {
      ++meta.removeCount
      meta.removeIndex = index
    }
  })

  if (meta.addCount > 1 || meta.removeCount > 1) {
    return true
  }

  if (
    meta.addCount === 1 &&
    meta.removeCount === 1 &&
    Math.abs(meta.addIndex - meta.removeIndex) > 1
  ) {
    return true
  }

  return false
}

export const handleTextModified = (diff: DiffPatches & { action: 'modifyTextElement' }) => {
  const fragment = document.createDocumentFragment()
  const textPatches = getTextDiff(diff.oldValue, diff.newValue)

  if (isComplexParagraph(textPatches)) {
    const $del = createEmptyDom('del')
    $del.innerText = diff.oldValue
    const $ins = createEmptyDom('ins')
    $ins.innerText = diff.newValue
    fragment.appendChild($del)
    fragment.appendChild($ins)
  } else {
    textPatches.forEach((item) => {
      if (item.added || item.removed) {
        const tagName = item.added ? 'ins' : 'del'
        const $flag = createEmptyDom(tagName)
        $flag.dataset[tagName] = 'modify-text'
        $flag.innerText = item.value
        fragment.appendChild($flag)
      } else {
        fragment.append(item.value)
      }
    })
  }

  diff.element.replaceWith(fragment)
}

export const handleAddElement = (diff: DiffPatches & { action: 'addElement' }) => {
  diff.element.dataset.ins = 'add-element'
}

export const handleRemoveElement = (diff: DiffPatches & { action: 'removeElement' }) => {
  const $deleted = formatDeletedHtml(diff.element)

  if (diff.after) {
    diff.after.parentNode?.insertBefore($deleted, diff.after.nextSibling)
  } else if (diff.before) {
    diff.before.parentNode?.insertBefore($deleted, diff.before)
  } else {
    console.log('Removed element lost: ', diff.element.outerHTML)
  }
}

export const handleReplaceElement = (diff: DiffPatches & { action: 'replaceElement' }) => {
  let $current = diff.newValue
  let $prev = diff.oldValue

  if (
    $current.nodeType === NodeTypes.text &&
    $prev.nodeName === 'SPAN' &&
    $prev.childNodes.length === 1 &&
    $prev.firstChild!.nodeType === NodeTypes.text
  ) {
    const textPatches = getTextDiff($current.nodeValue!, $prev.firstChild!.nodeValue!)
    if (!isComplexParagraph(textPatches)) {
      const $wrapper = document.createElement('span')
      textPatches.forEach((change) => {
        if (change.added) {
          const $tmp = document.createElement('ins')
          $tmp.innerText = change.value
          $wrapper.appendChild($tmp)
        } else if (change.removed) {
          const $tmp = $prev.cloneNode() as HTMLSpanElement
          $tmp.innerText = change.value
          $tmp.dataset.diffChange = 'tag'
          $wrapper.appendChild($tmp)
        } else {
          $wrapper.appendChild(document.createTextNode(change.value))
        }
      })
      $current.replaceWith($wrapper)
      return
    }
  } else if (
    $current.nodeName === 'SPAN' &&
    $current.childNodes.length === 1 &&
    $current.firstChild!.nodeType === NodeTypes.text &&
    $prev.nodeType === NodeTypes.text
  ) {
    const textPatches = getTextDiff($current.firstChild!.nodeValue!, $prev.nodeValue!)
    if (!isComplexParagraph(textPatches)) {
      const $wrapper = document.createElement('span')
      textPatches.forEach((change) => {
        if (change.added) {
          const $tmp = $current.cloneNode() as HTMLSpanElement
          $tmp.innerText = change.value
          $tmp.dataset.ins = 'tag'
          $wrapper.appendChild($tmp)
        } else if (change.removed) {
          const $tmp = document.createElement('del')
          $tmp.innerText = change.value
          $wrapper.appendChild($tmp)
        } else {
          const $tmp = $current.cloneNode() as HTMLSpanElement
          $tmp.innerText = change.value
          $wrapper.appendChild($tmp)
        }
      })
      $current.replaceWith($wrapper)
      return
    }
  }

  const $deleted = formatDeletedHtml(diff.oldValue)

  if ($current.nodeType === NodeTypes.text) {
    const $ins = createEmptyDom('ins')
    $ins.innerHTML = $current.nodeValue || ''
    $current.replaceWith($ins)
    $current = $ins
  } else {
    $current.dataset.ins = 'replace-element'
  }

  $current.parentNode?.insertBefore($deleted, $current)
}

export const handleChangeAttribute = (
  diff: DiffPatches & {
    action: 'addAttribute' | 'removeAttribute' | 'modifyAttribute'
  },
) => {
  diff.element.dataset.diffChange = 'change-attr'
  diff.element.dataset.diffChangeName = diff.name
}

export const handleChangeTag = (diff: DiffPatches & { action: 'tagChanged' }) => {
  diff.element.dataset.diffChange = 'change-attr'
  diff.element.dataset.diffChangeName = `${diff.oldTag} -> ${diff.newTag}`
}

export const formatMixedDom = ($wrapper: HTMLElement) => {
  $wrapper.querySelectorAll<HTMLElement>('ins,[data-ins]').forEach(($item) => {
    $item.removeAttribute('data-ins')
    deepFindText($item, 'ins')
    if ($item.tagName.toLowerCase() === 'ins') {
      $item.outerHTML = $item.innerHTML
    }
  })

  $wrapper.querySelectorAll<HTMLElement>('del,[data-del]').forEach(($item) => {
    $item.removeAttribute('data-del')
    deepFindText($item, 'del')
    if ($item.tagName.toLowerCase() === 'del') {
      $item.outerHTML = $item.innerHTML
    }
  })

  $wrapper.querySelectorAll<HTMLElement>('[data-diff-change]').forEach(($item) => {
    $item.removeAttribute('data-diff-change')
    deepFindText($item, 'diffChange', ['table', 'thead', 'tbody', 'tr', 'ins', 'del'], () => {
      const $span = createEmptyDom('span')
      $span.dataset.diffChange = $item.dataset.diffChangeName
      return $span
    })
  })
}

const formatDeletedHtml = ($deleted: HTMLElement) => {
  if ($deleted.nodeType === NodeTypes.text) {
    const $del = createEmptyDom('del')
    $del.innerText = $deleted.nodeValue || ''
    return $del
  } else {
    $deleted.dataset.del = 'be-deleted'
    return $deleted
  }
}

const deepFindText = (
  $wrapper: HTMLElement,
  tag: string,
  stopTags: string[] = [],
  createFlag?: () => HTMLElement,
) => {
  const tagName = ($wrapper.tagName || $wrapper.nodeName).toLowerCase()
  if ($wrapper.nodeType === NodeTypes.comment) return
  if (stopTags.includes(tagName)) return
  if ($wrapper.tagName === 'SPAN' && $wrapper.classList.contains('el-checkbox')) return

  if ($wrapper.nodeType === NodeTypes.text) {
    const nodeValue = $wrapper.nodeValue || ''
    const $flag = createFlag ? createFlag() : createEmptyDom(tag)
    $flag.innerText = nodeValue
    $wrapper.replaceWith($flag)
    return
  }

  if ($wrapper.tagName.toLowerCase() === 'img') {
    $wrapper.dataset[tag] = 'img'
    return
  }

  if ($wrapper.childNodes.length === 0) {
    $wrapper.innerHTML = ' '
  }

  $wrapper.childNodes.forEach(($item: HTMLElement) => {
    deepFindText($item, tag, stopTags, createFlag)
  })
}

富文本对比

html-diff.ts

import similarity from 'similarity'
import { intersection } from 'lodash-es'
import hexRgb from 'hex-rgb'

export enum NodeTypes {
  node = 1,
  text = 3,
  comment = 8,
}

export type DiffPatches =
  | {
      // 添加文字
      action: 'addTextElement'
      element: HTMLElement
    }
  | {
      // 修改文字
      action: 'modifyTextElement'
      oldValue: string
      newValue: string
      element: HTMLElement
    }
  | {
      // 删除文字
      action: 'removeTextElement'
      element: HTMLElement
      before?: HTMLElement | null
      after?: HTMLElement | null
      in?: HTMLElement
    }
  | {
      // 添加节点
      action: 'addElement'
      element: HTMLElement
    }
  | {
      // 删除节点
      action: 'removeElement'
      before?: HTMLElement | null
      after?: HTMLElement | null
      element: HTMLElement
    }
  | {
      // 替换节点
      action: 'replaceElement'
      newValue: HTMLElement
      oldValue: HTMLElement
    }
  | {
      // 替换标签属性
      action: 'removeAttribute'
      // 属性名称
      name: string
      value: string
      element: HTMLElement
    }
  | {
      // 新增标签属性
      action: 'addAttribute'
      // 属性名称
      name: string
      value: string
      element: HTMLElement
    }
  | {
      // 修改标签属性
      action: 'modifyAttribute'
      // 属性名称
      name: string
      oldValue: string
      newValue: string
      element: HTMLElement
    }
  | {
      // 标签替换
      action: 'tagChanged'
      oldTag: string
      newTag: string
      element: HTMLElement
    }

interface DiffOptions {
  ignoredAttributes?: string[]
}

export const BREAKING_LINE_REG = /^[\n\s]+$/

const MINIMUM_PRIORITY = 65
const HIGH_PRIORITY = 80
// 属性相似性占比
const ATTR_RATIO = 0.3
// 子节点相似性占比
const CHILDREN_RATIO = 0.5
// 文字相似性占比
const TEXT_RATIO = 0.6
// id直接扣权重
const IDENTITY_PRIORITY = 70

const PREFER_AS_REPLACE_ELEMENT = {
  IMG: 'src',
  A: 'href',
}

export const diff = (
  from: HTMLElement | string,
  to: HTMLElement | string,
  options: DiffOptions = {},
) => {
  let $origin: HTMLElement
  if (typeof from === 'string') {
    $origin = document.createElement('div')
    $origin.innerHTML = from
  } else {
    $origin = from
  }

  let $changed: HTMLElement
  if (typeof to === 'string') {
    $changed = document.createElement('div')
    $changed.innerHTML = to
  } else {
    $changed = to
  }

  $origin.normalize()
  $changed.normalize()
  return diffInternal($origin, $changed, {
    ignoredAttributes: ['data-highlight-id', 'colwidth', 'rowspan', 'colspan'],
    ...options,
  })
}

const diffInternal = (
  $origin: HTMLElement,
  $changed: HTMLElement,
  options: Required<DiffOptions>,
) => {
  let patches: DiffPatches[] = []

  const usedOriginChildIndex: number[] = []
  const usedMaps: { [originIndex: string]: HTMLElement } = {}
  // 反向匹配存储
  const cachedSimilarNodes = new Map<HTMLElement, HTMLElement>()
  removeMarkTags($origin)
  removeMarkTags($changed)
  const originChildNodes = getUsefulChildNodes($origin)
  const changedChildNodes = getUsefulChildNodes($changed)

  changedChildNodes.forEach(($to, index) => {
    const originSliceFrom = (usedOriginChildIndex[0] ?? -1) + 1
    const notComparedFromNodes = originChildNodes.slice(originSliceFrom)
    let $similar: HTMLElement | null
    let isFirstCrossMatched = false

    if (changedChildNodes.length === 1 && originChildNodes.length === 1) {
      // 只有一个子节点的情况下,直接当成替换节点
      $similar = originChildNodes[0]
    } else if (cachedSimilarNodes.has($to)) {
      $similar = cachedSimilarNodes.get($to)!
    } else {
      const { node, priority } = findSimilarNode($to, notComparedFromNodes)
      $similar = node
      let reverseMatch = true

      // 如果匹配到的节点不是第一个,则进行检测交叉匹配
      if ($similar && $similar !== notComparedFromNodes[0]) {
        const maybeCrossMatchedNodesLength = originChildNodes.findIndex((item) => item === $similar)
        const notComparedChangedNodes = changedChildNodes.slice(index + 1)
        for (let i = originSliceFrom; i < maybeCrossMatchedNodesLength; ++i) {
          if (findSimilarNode(originChildNodes[i], notComparedChangedNodes, priority + 1).node) {
            if (i > originSliceFrom) {
              // 第一个基准节点没有匹配到对比节点,可以直接使用
              $similar = originChildNodes[originSliceFrom]
              reverseMatch = false
            } else {
              $similar = null
              isFirstCrossMatched = true
            }
            break
          }
        }
      }

      // 反向更优匹配(两个相似的节点靠近时,容易出错)
      if (reverseMatch && $similar && priority < 95 && index + 1 < changedChildNodes.length) {
        const betterMatched = findSimilarNode($similar, [changedChildNodes[index + 1]], 100)

        if (betterMatched.node) {
          cachedSimilarNodes.set(betterMatched.node, $similar)
          $similar = null
          isFirstCrossMatched = true
        }
      }
    }

    // 一开始就没找到,如果第一个是孤单节点,则分配给当前节点
    if (!isFirstCrossMatched && $similar === null && notComparedFromNodes.length > 0) {
      const notComparedChangedNodes = changedChildNodes.slice(index + 1)
      const crossMatch = findSimilarNode(
        notComparedFromNodes[0],
        notComparedChangedNodes,
        HIGH_PRIORITY,
      )
      if (!crossMatch.node) {
        $similar = notComparedFromNodes[0]
      }
    }

    let currentIndex = $similar
      ? notComparedFromNodes.findIndex(($node) => $node === $similar) + originSliceFrom
      : -1

    if (currentIndex >= 0) {
      usedOriginChildIndex.push(currentIndex)
      // 必须实时排序,因为开头要用到
      usedOriginChildIndex.sort((a, b) => b - a)
      usedMaps[currentIndex] = $to
    }

    if (!$similar) {
      // 新增节点
      patches.push({
        action: $to.nodeType === NodeTypes.text ? 'addTextElement' : 'addElement',
        element: $to,
      })
      return
    }

    if ($to.nodeName === $similar.nodeName) {
      patches.push(...getPatchBetweenSameTagNodes($similar, $to, options))
    } else if (preferAsSameTag($to, $similar)) {
      patches.push({
        action: 'tagChanged',
        oldTag: $similar.nodeName,
        newTag: $to.nodeName,
        element: $to,
      })
      patches.push(...getPatchBetweenSameTagNodes($similar, $to, options))
    } else {
      if (
        // 文字配色或者存储信息需要增加标签才能设置属性
        ($similar.nodeType === NodeTypes.text || $to.nodeType === NodeTypes.text) &&
        getInnerText($similar) === getInnerText($to)
      ) {
        const currentNodeName = $to.nodeName
        if ($to.nodeType === NodeTypes.text) {
          const $span = document.createElement('span')
          $span.innerText = $to.nodeValue!
          $to.replaceWith($span)
          $to = $span
          usedMaps[currentIndex] = $to
        }
        patches.push({
          action: 'tagChanged',
          oldTag: $similar.nodeName,
          newTag: currentNodeName,
          element: $to,
        })
      } else {
        // 替换
        patches.push({
          action: 'replaceElement',
          newValue: $to,
          oldValue: $similar,
        })
      }
    }
  })

  originChildNodes.forEach(($node, index) => {
    if (usedOriginChildIndex.includes(index)) return
    let insertBefore: HTMLElement | null = null
    let nextIndex = index + 1
    while (nextIndex < originChildNodes.length) {
      if (usedMaps[nextIndex] !== undefined) {
        insertBefore = usedMaps[nextIndex]
        break
      }
      nextIndex += 1
    }

    const actionName: DiffPatches['action'] =
      $node.nodeType === NodeTypes.text ? 'removeTextElement' : 'removeElement'
    let removePatch: DiffPatches

    if (insertBefore) {
      patches.push(
        (removePatch = {
          action: actionName,
          element: $node,
          before: insertBefore,
        }),
      )
    } else if (!changedChildNodes.length) {
      const isRootDiv = !$changed.parentElement
      patches.unshift(
        (removePatch = {
          action: actionName,
          element: $node,
          ...(isRootDiv ? {} : { in: $changed }),
        }),
      )
    } else {
      const siblingPrev = usedOriginChildIndex.find((item) => item < index)

      if (siblingPrev === undefined) {
        patches.push(
          (removePatch = {
            action: actionName,
            element: $node,
            before: changedChildNodes[0],
          }),
        )
      } else {
        patches.unshift(
          (removePatch = {
            action: actionName,
            element: $node,
            after: usedMaps[siblingPrev]!,
          }),
        )
      }
    }

    // 删除与新增尽量促成配对
    if (removePatch.action === 'removeElement') {
      const $fromElement = removePatch.element
      const $toElements = (
        removePatch.before
          ? removePatch.before.previousSibling
          : removePatch.after
            ? removePatch.after.nextSibling
            : null
      ) as HTMLElement | null
      if ($toElements) {
        const addPatch = patches.find((item): item is DiffPatches & { action: 'addElement' } => {
          return item.action === 'addElement' && $toElements === item.element
        })
        if (addPatch) {
          patches = patches.filter((item) => item !== addPatch && item !== removePatch)
          patches.push(...getPatchBetweenSameTagNodes($fromElement, addPatch.element, options))
        }
      }
    }
  })

  return patches
}

const getPatchBetweenSameTagNodes = (
  $similar: HTMLElement,
  $to: HTMLElement,
  options: Required<DiffOptions>,
) => {
  const patches: DiffPatches[] = []

  if ($to.nodeType === NodeTypes.text) {
    if ($to.nodeValue !== $similar.nodeValue) {
      patches.push({
        action: 'modifyTextElement',
        oldValue: $similar.nodeValue!,
        newValue: $to.nodeValue!,
        element: $to,
      })
    }
  } else {
    let replaced = false
    for (const tag of Object.keys(PREFER_AS_REPLACE_ELEMENT)) {
      if ($to.nodeName === tag) {
        if (
          $similar.getAttribute(PREFER_AS_REPLACE_ELEMENT[tag]) !==
          $to.getAttribute(PREFER_AS_REPLACE_ELEMENT[tag])
        ) {
          patches.push({
            action: 'replaceElement',
            oldValue: $similar,
            newValue: $to,
          })
          replaced = true
          break
        }
      }
    }
    if (!replaced) {
      patches.push(...collectAttrPatches($similar, $to, options))
      if ($similar.innerHTML !== $to.innerHTML) {
        patches.push(...diffInternal($similar, $to, options))
      }
    }
  }

  return patches
}

const getNodeIndex = ($node: HTMLElement) => {
  let index = 0,
    prev: ChildNode | null = $node
  while ((prev = prev.previousSibling)) {
    index += 1
  }
  return index
}

const getInnerText = ($dom: HTMLElement) => {
  if ($dom.nodeType === NodeTypes.text) {
    return $dom.nodeValue || ''
  }

  return $dom.innerText
}

const stylesSeparatorReg = /;\s*/
const emptyStyleSeparatorReg = /:\s*$/
const styleSeparatorReg = /:\s*/
const hexColorReg = /:\s*(#\d{3}|#\d{6})$/

const formatAttribute = ($node: HTMLElement, attrKey: string) => {
  const value = $node.getAttribute(attrKey) || ''
  if (attrKey === 'style') {
    return value
      .split(stylesSeparatorReg)
      .map((style) => {
        if (emptyStyleSeparatorReg.test(style)) return ''
        if (hexColorReg.test(style)) {
          const colorEntries = style.split(styleSeparatorReg)
          const { red, green, blue } = hexRgb(colorEntries[1])
          return `${colorEntries[0]}: rgb(${red}, ${green}, ${blue})`
        }
        return style
      })
      .filter(Boolean)
      .join('; ')
  }

  return value
}

const collectAttrPatches = (
  $from: HTMLElement,
  $to: HTMLElement,
  options: Required<DiffOptions>,
) => {
  const patches: DiffPatches[] = []

  // 检测修改属性
  const fromAttrs = $from
    .getAttributeNames()
    .filter((item) => !options.ignoredAttributes.includes(item))
  const toAttrs = $to
    .getAttributeNames()
    .filter((item) => !options.ignoredAttributes.includes(item))

  const commonAttrs = intersection(fromAttrs, toAttrs)
  for (const attr of fromAttrs) {
    if (!commonAttrs.includes(attr) && !!formatAttribute($from, attr)) {
      patches.push({
        action: 'removeAttribute',
        name: attr,
        value: $from.getAttribute(attr) || '',
        element: $to,
      })
    }
  }
  for (const attr of toAttrs) {
    if (!commonAttrs.includes(attr) && !!formatAttribute($to, attr)) {
      patches.push({
        action: 'addAttribute',
        name: attr,
        value: $to.getAttribute(attr) || '',
        element: $to,
      })
    }
  }
  for (const attr of commonAttrs) {
    const fromAttr = formatAttribute($from, attr)
    const toAttr = formatAttribute($to, attr)
    if (fromAttr !== toAttr) {
      patches.push({
        action: 'modifyAttribute',
        name: attr,
        newValue: toAttr,
        oldValue: fromAttr,
        element: $to,
      })
    }
  }

  return patches
}

const isMarkTag = ($node: HTMLElement) => {
  return (
    $node.nodeName === 'SPAN' &&
    ($node.dataset.highlightId || $node.classList.contains('inline-comment-marker'))
  )
}

const replaceMarkTagToText = ($node: HTMLElement) => {
  if (isMarkTag($node)) {
    const firstChild = $node.firstChild?.cloneNode(true)
    if (firstChild) {
      $node.replaceWith(firstChild)
      replaceMarkTagToText(firstChild as HTMLElement)
    } else {
      $node.remove()
    }
  }
}

const removeMarkTags = ($parent: Element) => {
  $parent.childNodes.forEach(replaceMarkTagToText)
  // 合并文字节点
  $parent.normalize()
}

/**
 * 删除无效的子节点
 */
export const getUsefulChildNodes = ($parent: Element) => {
  return Array.from($parent.childNodes).filter((item) => {
    return item.nodeType !== NodeTypes.comment && item.nodeName !== 'COLGROUP'
  }) as HTMLElement[]
}

export const findSimilarNode = (
  $to: HTMLElement,
  compareNodes: HTMLElement[],
  minimumPriority = MINIMUM_PRIORITY,
): { node: null | HTMLElement; priority: number } => {
  if (!compareNodes.length) return { node: null, priority: 0 }

  const nodeResult: {
    index: number
    priority: number
    element: HTMLElement
  }[] = []

  compareNodes.forEach(($from: HTMLElement, index) => {
    let priority = 100

    if ($to.nodeName !== $from.nodeName && !preferAsSameTag($to, $from)) {
      priority -= IDENTITY_PRIORITY
    }

    priority = getIdentityPriority($from, $to, priority)

    if (priority >= minimumPriority) {
      const attrPriority = getAttributePriority($from, $to) / 100
      priority = priority * (1 - ATTR_RATIO) + priority * ATTR_RATIO * attrPriority
    }

    if (priority >= minimumPriority) {
      const childrenPriority = getChildrenPriority($from, $to) / 100
      priority = priority * (1 - CHILDREN_RATIO) + priority * CHILDREN_RATIO * childrenPriority
    }

    if (priority >= minimumPriority) {
      const textPriority = getTextPriority($from, $to) / 100
      priority = priority * (1 - TEXT_RATIO) + priority * TEXT_RATIO * textPriority
    }

    nodeResult.push({ index, priority, element: $from })
  })

  nodeResult.sort((a, b) => b.priority - a.priority)

  // 如果匹配的太远,则优先查看更近的
  if (
    nodeResult.length >= 2 &&
    nodeResult[1].priority >= 85 &&
    nodeResult[0].priority - nodeResult[1].priority <= 10
  ) {
    const firstOriginNodeIndex = getNodeIndex(nodeResult[0].element)
    const secondOriginNodeIndex = getNodeIndex(nodeResult[1].element)
    const changedNodeIndex = getNodeIndex($to)
    if (
      Math.abs(secondOriginNodeIndex - changedNodeIndex) <
      Math.abs(firstOriginNodeIndex - changedNodeIndex)
    ) {
      ;[nodeResult[0], nodeResult[1]] = [nodeResult[1], nodeResult[0]]
    }
  }

  return nodeResult[0].priority >= minimumPriority
    ? {
        node: compareNodes[nodeResult[0].index],
        priority: nodeResult[0].priority,
      }
    : { node: null, priority: 0 }
}

const getChildrenNames = ($element: HTMLElement) => {
  return getUsefulChildNodes($element)
    .map(($node) => {
      if ($node.nodeType === NodeTypes.text && BREAKING_LINE_REG.test($node.nodeValue || '')) {
        return ''
      }
      if (isMarkTag($node)) return '#text'
      return $node.nodeName
    })
    .filter(Boolean)
    .join(' ')
    .replace(/#text(\s#text)+/g, '#text')
}

const getChildrenPriority = ($from: HTMLElement, $to: HTMLElement): number => {
  const priority = 100
  const fromChildrenNames = getChildrenNames($from)
  const toChildrenNames = getChildrenNames($to)
  return priority * similarity(fromChildrenNames, toChildrenNames)
}

export const getIdentityPriority = (
  $from: HTMLElement,
  $to: HTMLElement,
  originPriority: number,
): number => {
  if ($from.nodeType === NodeTypes.text || $to.nodeType === NodeTypes.text) return originPriority
  const fromId = $from.getAttribute('id')
  const toId = $to.getAttribute('id')
  if (fromId && fromId === toId) return originPriority + IDENTITY_PRIORITY
  if (fromId !== toId) return originPriority - IDENTITY_PRIORITY
  return originPriority
}

const getAttributePriority = ($from: HTMLElement, $to: HTMLElement): number => {
  let priority = 100

  if ($from.nodeType === NodeTypes.text || $to.nodeType === NodeTypes.text) return priority

  const fromAttrs = $from.getAttributeNames().sort()
  const toAttrs = $to.getAttributeNames().sort()
  const commonAttrs = intersection(fromAttrs, toAttrs)
  priority -= Math.min(15, 3 * (fromAttrs.length + toAttrs.length - commonAttrs.length * 2))

  const maxReducedPriority = priority / commonAttrs.length
  commonAttrs.forEach((attrName) => {
    const fromValue = $from.getAttribute(attrName) || ''
    const toValue = $to.getAttribute(attrName) || ''
    priority -= maxReducedPriority * (1 - similarity(fromValue, toValue, { sensitive: true }))
  })

  return priority
}

const getTextPriority = ($from: HTMLElement, $to: HTMLElement): number => {
  let priority = 100
  const fromContent = getInnerText($from)
  const toContent = getInnerText($to)

  priority *= similarity(fromContent, toContent)

  return priority
}

const preferAsSameTag = ($a: Node, $b: Node) => {
  if (
    ($a.nodeName === 'TH' && $b.nodeName === 'TD') ||
    ($a.nodeName === 'TD' && $b.nodeName === 'TH')
  ) {
    return true
  }

  return false
}

同步标记

sync-mark.ts

import { NodeTypes, findSimilarNode, getUsefulChildNodes } from './html-diff'

const selector = 'span[data-highlight-id]'

/**
 * 把某段文本的标记同步到其他文本的相同位置
 * @param sourceContent 原始文本内容
 * @param destContent  需要被同步的文本内容
 */
export const syncMark = (sourceContent: string, destContent: string): string => {
  if (sourceContent === destContent) return destContent

  const $source = document.createElement('div')
  $source.innerHTML = sourceContent
  $source.normalize()

  const $dest = document.createElement('div')
  $dest.innerHTML = destContent
  $dest.normalize()

  const sourceMarks = $source.querySelectorAll(selector)
  if (sourceMarks.length === 0) return destContent
  const destMarks = Array.from($dest.querySelectorAll(selector))
  const newHighlightIds: string[] = []
  sourceMarks.forEach(($mark) => {
    const id = $mark.getAttribute('data-highlight-id')
    if (id && !destMarks.find(($destMark) => $destMark.getAttribute('data-highlight-id') === id)) {
      newHighlightIds.push(id)
    }
  })
  internalSyncMark($source, $dest, newHighlightIds)
  return $dest.innerHTML
}

const internalSyncMark = ($source: HTMLElement, $dest: HTMLElement, newHighlightIds: string[]) => {
  const sourceChildNodes = getUsefulChildNodes($source)
  const destChildNodes = getUsefulChildNodes($dest)

  if (
    // 源内容带有划词标记,说明已经是某个段落了
    sourceChildNodes.some(($child) => {
      return $child.nodeName === 'SPAN' && $child.hasAttribute('data-highlight-id')
    })
  ) {
    const sourceHighlightNodes = Array.from($source.querySelectorAll(selector)).filter(($child) => {
      const id = $child.getAttribute('data-highlight-id')
      return id && newHighlightIds.includes(id)
    })

    if (!sourceHighlightNodes.length) return

    sourceHighlightNodes.forEach(($child) => {
      const highlightOuterHtml = $child.outerHTML
      const highlightInnerHtml = $child.innerHTML
      const sourceArr = $source.innerHTML.split(highlightOuterHtml)
      const markIndex = sourceArr[0].split(highlightInnerHtml).length - 1

      let destMarkIndex = -1
      ;(function loop($dest: Element) {
        const destNodes = getUsefulChildNodes($dest)
        for (let i = 0; i < destNodes.length; ++i) {
          const $destChild = destNodes[i]

          if ($destChild.nodeType === NodeTypes.text) {
            const currentMarkCount = $destChild.nodeValue!.split(highlightInnerHtml).length - 1
            destMarkIndex += currentMarkCount
            if (destMarkIndex >= markIndex) {
              const $div = document.createElement('div')
              $div.innerHTML = $destChild
                .nodeValue!.split(highlightInnerHtml)
                .reduce((carry, item, itemIndex, destArr) => {
                  const separator =
                    destArr.length - 1 === itemIndex
                      ? ''
                      : itemIndex === markIndex
                        ? highlightOuterHtml
                        : highlightInnerHtml
                  return carry + item + separator
                }, '')

              $div.childNodes.forEach(($node) => {
                // 必须克隆,否则就是剪切粘贴的行为
                $dest.insertBefore($node.cloneNode(true), $destChild)
              })
              $destChild.remove()
            }
          } else if ($destChild.nodeType === NodeTypes.node) {
            const currentMarkCount = $destChild.innerHTML.split(highlightInnerHtml).length - 1
            if (destMarkIndex + currentMarkCount >= markIndex) {
              if ($destChild.nodeName === 'SPAN' && $destChild.hasAttribute('data-highlight-id')) {
                destMarkIndex += currentMarkCount
              } else {
                loop($destChild)
              }
            }
          }

          if (destMarkIndex >= markIndex) break
        }
      })($dest)
    })

    return
  }

  /**
   * Source -> Dest
   */
  const usedMaps: {
    [source: string]: HTMLElement
  } = {}

  for (let index = 0; index < sourceChildNodes.length; ++index) {
    const $node = sourceChildNodes[index]!
    if ($node.nodeType !== NodeTypes.node) continue

    const matchedNodes = Object.values(usedMaps)
    let $similar: HTMLElement | null

    if (sourceChildNodes.length === 1 && destChildNodes.length === 1) {
      $similar = destChildNodes[0]
    } else {
      $similar = findSimilarNode(
        $node,
        destChildNodes.filter(($item) => !matchedNodes.includes($item)),
      ).node
    }

    if (!$similar) continue

    usedMaps[index] = $similar

    if ($node.nodeName !== $similar.nodeName) continue

    const hasNewHighlightId = Array.from($node.querySelectorAll(selector)).some(($mark) => {
      const id = $mark.getAttribute('data-highlight-id')
      return id && newHighlightIds.includes(id)
    })

    if (hasNewHighlightId) {
      internalSyncMark($node, $similar, newHighlightIds)
    }
  }
}

愚者
12 声望3 粉丝