from webbrowser import get
from selenium import webdriver
from time import sleep, time
from selenium.webdriver import ActionChains
from bs4 import BeautifulSoup as Bs4
import csv
f = open('product.csv', mode='a', encoding='utf-8', newline='')
fieldnames = ['store_name', 'store_info', 'price', 'cost', 'image',
'recommend_image', 'slider_image']
csv_writer = csv.DictWriter(f, fieldnames=fieldnames)
csv_writer.writeheader()
options = webdriver.ChromeOptions()
options.add_experimental_option(
"excludeSwitches", ['enable-automation', 'enable-logging'])
browser = webdriver.Chrome(
executable_path='C:/Users/bruce/Desktop/ccc/img/chromedriver_win32/chromedriver.exe', options=options)
def write_csv(url='http://www.alaibao.cn/ProductDetail-83.html'):
browser.get(url)
browser.implicitly_wait(10)
store_name = browser.find_element_by_css_selector('#ProductTitle1').text
store_info = browser.find_element_by_css_selector('#ProductMainParam').text
price = browser.find_element_by_css_selector(
'#hdprice').get_attribute('value')
cost = float(price) * 0.5
slider_images = browser.find_elements_by_css_selector(
'#J_UlThumb li:not(#J_VideoThumb)')
slider_image = []
image = ''
for v in slider_images:
img = v.find_element_by_css_selector('.tb-pic img')
print(img.get_attribute('bimg'))
slider_image.append(img.get_attribute('bimg').replace('/', r'\/'))
image = slider_image[0].replace(r'\/', '/')
print(slider_image)
# .replace('/', '\/')
item = {'store_name': store_name, 'store_info': store_info, 'price': price, 'cost': cost,
'image': image, 'recommend_image': image, 'slider_image': slider_image}
csv_writer.writerow(item)
browser.quit()
write_csv()
这是完整代码。生成的路径都是http:\\/\\/
我想要的是http:\/\/
.replace('/', '\/')
没问题,但不规范,反斜杠在字符串中用于转义特殊字符,反斜杠本身也应转义,所以'\/'
应改为'\\/'
或用原始字符串r'\/'
截图的内容是 python 列表转字符串的结果,里面的字符串是字面量,可以看出
a
字符串本身没问题,所以只要正确写入和读取即可参考代码
test.csv
控制台