前言:有一天,产品经理贱兮兮的跟你说:“我们看到一个挺好的搜谱小程序,我们来仿一个吧,爬出他们的数据来,放到我们库里”
没办法,搞吧!!!!!!
爬数据
分析小程序接口
首先安利一个好用的ios系统免费抓包软件: stream
通过分析目标小程序接口发现,有一个接口可以获取单个详情,并且发现每个乐谱的id是自增的,,啧啧!这就好说了啊
上代码
const shell = require('shelljs')
const fs = require('fs')
const getDetil = (id) => {
// 获取curl
let curl = `curl 'https://api.quxuege.com/search/one?id=${id}' -H 'Host: api.quxuege.com' -H 'Accept: */*' -H 'Content-Type: application/x-www-form-urlencoded' -H 'Accept-Encoding: gzip, deflate, br' -H 'Connection: keep-alive' -H 'Cookie: ' -H 'User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.15(0x17000f31) NetType/WIFI Language/zh_CN' -H 'Referer: https://servicewechat.com/wx21c9c829a9ecfc04/8/page-frame.html' -H 'token: ' -H 'Accept-Language: zh-cn'`
const res = JSON.parse(shell.exec(curl).stdout)
if (res.code === 200) {
const p = res.data
if (p && p.id) {
let t = [p.id, p.title, p.createTime, p.details[0].image]
fs.appendFileSync('sopu.txt', `${t.join(',')}\n`)
}
}
}
for (let i = 101042; i < 199999; i++) {
getDetil(i)
}
id爬到20万左右就没有了,总共爬了9万条数据
分析数据
经过分析发现,每张乐谱都有竞品小程序二维码,这样的肯定不行
这样就不能搞一个脚本去固定贴二维码了,!!!
opencvjs识别二维码
不说了上opencv识别二维码啊,先整个html测试下,具体opencvjs识别原理我计划稍后单独出一个文章解释
引入opencvjs
<script async src="opencv.js" onload="onOpenCvReady();" type="text/javascript"></script>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Hello OpenCV.js</title>
<script async src="opencv.js" onload="onOpenCvReady();" type="text/javascript"></script>
</head>
<body>
<h2>Hello OpenCV.js</h2>
<p id="status">OpenCV.js is loading...</p>
<div>
<div class="inputoutput">
<img id="imageSrc" alt="No Image" />
<div class="caption">imageSrc <input type="file" id="fileInput" name="file" /></div>
</div>
<div class="inputoutput">
<canvas id="canvasOutput"></canvas>
<div class="caption">canvasOutput</div>
</div>
<div class="inputoutput2">
<canvas id="canvasOutput2"></canvas>
<div class="caption">canvasOutput2</div>
</div>
</div>
<script type="text/javascript">
let imgElement = document.getElementById('imageSrc');
let inputElement = document.getElementById('fileInput');
inputElement.addEventListener('change', (e) => {
imgElement.src = URL.createObjectURL(e.target.files[0]);
}, false);
imgElement.onload = function (e) {
console.log(imgElement);
let src = cv.imread(imgElement);
let src_clone = cv.imread(imgElement);
let dsize = new cv.Size(800, 1000);
// You can try more different parameters
cv.resize(src, src, dsize); cv.resize(src_clone, src_clone, dsize);
let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
let ksize = new cv.Size(1, 1);
// You can try more different parameters
cv.blur(src, src, ksize);
cv.threshold(src, src, 0, 255, cv.THRESH_OTSU);
let contours = new cv.MatVector();
let contours2 = new cv.MatVector();
let hierarchy = new cv.Mat();
// You can try more different parameters
cv.findContours(src, contours, hierarchy, cv.RETR_TREE, cv.CHAIN_APPROX_NONE);
//轮廓筛选
let c = 0, ic = 0, area = 0;
let parentIdx = -1;
for (let i = 0; i < contours.size(); i++) {
//let hier = hierarchy.intPtr(0, i)
// console.log(hierarchy.intPtr(0, i))
if (hierarchy.intPtr(0, i)[2] != -1 && ic == 0) {
parentIdx = i;
ic++;
}
else if (hierarchy.intPtr(0, i)[2] != -1) {
console.log(hierarchy.intPtr(0, i))
ic++;
}
else if (hierarchy.intPtr(0, i)[2] == -1) {
ic = 0;
parentIdx = -1;
}
// if (ic == 2) {
// console.log(parentIdx, i)
// }
//找到定位点信息
if (ic == 2) {
//let cnt = matVec.get(0);
contours2.push_back(contours.get(parentIdx));
ic = 0;
parentIdx = -1;
}
}
console.log(contours2.size());
//填充定位点
for (let i = 0; i < contours.size(); i++) {
let color = new cv.Scalar(255, 0, 0, 255);
cv.drawContours(src_clone, contours, i, color, 1);
}
cv.imshow('canvasOutput', src_clone);
for (let i = 0; i < contours2.size(); i++) {
let color = new cv.Scalar(Math.round(Math.random() * 255), Math.round(Math.random() * 255),
Math.round(Math.random() * 255));
console.log(contours2)
cv.drawContours(dst, contours2, i, color, 1);
}
cv.imshow('canvasOutput2', dst);
src.delete(); src_clone.delete();
dst.delete(); contours.delete(); hierarchy.delete();
};
function onOpenCvReady() {
document.getElementById('status').innerHTML = 'OpenCV.js is ready.';
}
</script>
</body>
</html>
启动
选择一个乐谱上传
看到三个回型。说明识别成功
接下来就是下载图片。定位位置。用canvas贴上我们二维码。入库~
好啦~,产品经理的需求就这么愉快的搞定了!
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。