Python实现的小红书无水印图片解析下载

摘要

小红书的图片如果是长按保存是有水印的，如何获得无水印的图片？当然是通过网页解析！

代码

from flask import Flask, request, jsonify
import requests
import re
 
app = Flask(__name__)
 
@app.route('/')
def index():
    url = request.args.get('url')
    if url:
        result = get_image_urls(url)
        return jsonify(result)
    else:
        return jsonify({'error': 'Missing URL parameter'})
 
def get_html(url):
    headers = {
        "authority": "www.xiaohongshu.com",
        "cache-control": "max-age=0",
        "sec-ch-ua": '"Chromium";v="21", " Not;A Brand";v="99"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Windows"',
        "upgrade-insecure-requests": "1",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "sec-fetch-site": "same-origin",
        "sec-fetch-mode": "navigate",
        "sec-fetch-user": "?1",
        "sec-fetch-dest": "document",
        "accept-language": "zh-CN,zh;q=0.9",
    }
    response = requests.get(url, headers=headers)
    return response.text
 
def json_content(html):
    rule = r"<script>window\.__INITIAL_STATE__=(.*?)</script>"
    js = re.search(rule, html, re.I)
    if js:
        content = js.group(1)
        return content
    else:
        return None
 
def get_image_urls(url):
    html = get_html(url)
    js = json_content(html)
    js = js.replace(r"\u002F", r"/")
 
    all_urls = re.findall(r'url":"(http://[^\":\{\}\[\]]*?wm_1)"', js)
     
    return {'image_urls': all_urls}
 
if __name__ == "__main__":
    app.run(debug=True)

使用 Python的Flask框架实现一个简单的HTTP服务器，然后通过?url=小红书链接，就可以解析出小红书无水印的图片地址。

演示

http://127.0.0.1:5000/?url=http://xhslink.com/wZh4pw

PHP版

<?php
 
    function get_html($url) {
        $headers = array(
            "authority" => "www.xiaohongshu.com",
            "cache-control" => "max-age=0",
            "sec-ch-ua" => '"Chromium";v="21", " Not;A Brand";v="99"',
            "sec-ch-ua-mobile" => "?0",
            "sec-ch-ua-platform" => '"Windows"',
            "upgrade-insecure-requests" => "1",
            "user-agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
            "accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "sec-fetch-site" => "same-origin",
            "sec-fetch-mode" => "navigate",
            "sec-fetch-user" => "?1",
            "sec-fetch-dest" => "document",
            "accept-language" => "zh-CN,zh;q=0.9",
        );
     
        $options = array(
            'http' => array(
                'header' => implode("\r\n", array_map(
                    function ($v, $k) {
                        return $k . ':' . $v;
                    },
                    $headers,
                    array_keys($headers)
                )),
            ),
        );
     
        $context = stream_context_create($options);
        $response = file_get_contents($url, false, $context);
     
        return $response;
    }
     
    function json_content($html) {
        $rule = '/<script>window\.__INITIAL_STATE__=(.*?)<\/script>/i';
        preg_match($rule, $html, $matches);
     
        if ($matches) {
            $content = $matches[1];
            return $content;
        } else {
            return null;
        }
    }
     
    function get_image_urls($url) {
        $html = get_html($url);
        $js = json_content($html);
        $js = str_replace("\\u002F", "/", $js);
     
        preg_match_all('/"url":"(http:\/\/[^":\{\}\[\]]*?wm_1)"/', $js, $all_urls);
     
        return array('image_urls' => $all_urls[1]);
    }
     
    if ($_SERVER['REQUEST_METHOD'] === 'GET') {
        $url = $_GET['url'];
        if ($url) {
            $result = get_image_urls($url);
            header('Content-Type: application/json');
            echo json_encode($result);
        } else {
            header('Content-Type: application/json');
            echo json_encode(array('error' => 'Missing URL parameter'));
        }
    }
     
?>

作者

TANKING

Python实现的小红书无水印图片解析下载

摘要

代码

演示

PHP版

作者

TANKING

引用和评论

2025年最新反编译微信小程序的教程及工具

python与nodejs哪个性能高

Anaconda安装教程以及Anaconda和pip配置国内镜像

如何减少跨团队交付摩擦？——基于 DevOps 与敏捷的最佳实践

Python 描述符

科学计算编程涉及到的技术栈简介

使用 chardet 判断文件编码需要注意的坑——过大的文件会导致高耗时