明明有这个文件为什么还报错？

Question

明明有这个文件为什么还报错？

import io
import random
from pathlib import Path
from time import sleep, perf_counter

import requests
from PIL import Image
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService

from bulk_rename import bulk_rename


def next_image() -> None:
    try:
        sleep(random.random())
        driver.execute_script("document.getElementsByClassName('_afxw')[0].click();")
        sleep(2 + random.random())
    except Exception as e:
        print(f'Exception: {e}')

def get_image_urls() -> list[str]:
    """Get all image urls in the current page"""
    return driver.execute_script(
        "let urls = []; \
        let n = document.getElementsByClassName('x5yr21d xu96u03 x10l6tqk x13vifvy x87ps6o xh8yej3'); \
        urls.push(n[0].getAttribute('src'), n[1].getAttribute('src'), n[2].getAttribute('src')); \
        return urls;"
    )

def download_image(url: str, name: str, download_path: str | Path) -> None:
    """Download image to the folder"""
    try:
        image_content = requests.get(url).content
        image_file = io.BytesIO(image_content)
        image = Image.open(image_file)
        file_path = Path(download_path, name)
        with open(file_path, 'wb') as f:
            image.save(f, 'JPEG')
    except Exception as e:
        print(f"Can't save image {name}, {url = }")
        print(f'Exception: {e}')

# Initialize Selenium
time_start = perf_counter()
options = webdriver.ChromeOptions() 
options.add_argument('--start-maximized')
driver = webdriver.Chrome(
    service=ChromeService(ChromeDriverManager().install()),
    options=options
)

# Make folder 'Downloaded' if not exist
folder_name = 'Downloaded'
download_folder = Path(folder_name)
if not download_folder.exists():
    download_folder.mkdir()

with open('urls.txt', 'r') as f:
    data = f.read()

urls = [url for url in data.splitlines() if url]

# Main program
bulk_rename(folder_name)  # Bulk-rename to prevent overwrite other images
name = len(list(download_folder.glob('*'))) + 1
for url in urls:
    image_urls_in_post = set()
    driver.get(url)
    sleep(random.random()*2 + 5)  # Wait for fully loaded
    n_images = driver.execute_script("return document.getElementsByClassName('_acnb').length;")

    try:
        sleep(random.random()*1 + 2)
        if n_images == 0:  # Handle when the post has 1 image only
            image_urls = get_image_urls()
            download_image(image_urls[0], f'{name}.jpg', download_folder)
            name += 1
        elif n_images <= 3:
            if n_images == 3:  # All images will be loaded if go to the next image
                next_image()

            image_urls = get_image_urls()
            for i in range(n_images):
                download_image(image_urls[i], f'{name+i}.jpg', download_folder)
            name += n_images 
        else:
            # TODO - Fix: sometimes downloaded irrelevent images
            image_urls_in_post = set()
            for i in range(n_images - 1):
                next_image()
                image_urls_in_post.update(get_image_urls())
            
            for url in image_urls_in_post:
                download_image(url, f'{name}.jpg', download_folder)
                name += 1

    except Exception as e:
        print(f'Exception: {e}')
        continue

print(f'Done, took {(perf_counter() - time_start) / 60:4} mins.')
driver.quit()

python

阅读 359

1 个回答

得票最新

Seven

1.2k127

发布于
12 月 17 日广东

✓ 已被采纳

你这个问题，有三种可能
第一种是绝对路径，个人建议可以把项目拷贝到指定盘目录，然后用英文不要用中文
C:\Users\a5735\OneDrive\桌面\python\instagram-downloader\urls.txt这个路径不太友好

with open(r'C:\Users\a5735\OneDrive\桌面\python\instagram-downloader\urls.txt', 'r') as f:

第二种的话是文件权限问题

确认程序有权限读取该文件。

第三种编码问题

with open('urls.txt', 'r', encoding='utf-8') as f:

撰写回答

你尚未登录，登录后可以

和开发者交流问题的细节
关注并接收问题和回答的更新提醒
参与内容的编辑和改进，让解决方法与时俱进

推荐问题

明明有这个文件为什么还报错？

你尚未登录，登录后可以

请问： Python中是否有方式可以像前端的TSLint一样进行代码的自动风格格式检查？

为什么 pypi 的页面上的新版本在通过 pip 获取不到？

请问一下Python 可以进行强类型开发吗？

python中最好的单元测试是使用的什么呢？

duckdb 的 python sdk 读取 csv 的时候，如何指定列的字段类型？

Python类属性与实例属性自增行为差异？

可以打印全局命名空间：`globals()` 如何打印内置命名空间呢？