import requests
from lxml import etree
import csv
import time

# 确保已安装所需库:pip install requests lxml
url = 'https://www.dongchedi.com/sales'

# 添加请求头,模拟浏览器访问
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'
}

try:
    # 获取网页内容
    print("正在获取网页内容...")
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # 检查请求是否成功
    code = response.text
    html = etree.HTML(code)

    # 使用XPath提取数据 - 先获取所有车辆信息的父节点列表
    car_items = html.xpath('//*[@id="__next"]/div[1]/div[2]/div/div[4]/div/div/ol/li')
    print(f"找到 {len(car_items)} 条车辆信息")

    # 准备写入CSV文件
    with open("懂车帝车价.csv", 'w', encoding='utf-8', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["序号", "名称", "车型描述", "价格", "销量"])

        # 遍历每个车辆信息节点
        for index, item in enumerate(car_items, 1):
            # 从每个节点中提取具体信息
            car_name = item.xpath('.//div[3]/div[1]/a/text()')#名称
            car_sales = item.xpath('.//div[3]/div[1]/span/text()')#车型
            car_price = item.xpath('.//div[3]/p/text()')#价格
            car_description = item.xpath('.//div[4]/div/p/text()')#销量




            # 处理提取到的数据,确保不为空
            name = car_name[0].strip() if car_name else "未知名称"
            sales = car_sales[0].strip() if car_sales else "车型描述"
            price = car_price[0].strip() if car_price else "价格未知"
            description = car_description[0].strip() if car_description else "销量"





            # 写入数据
            writer.writerow([index, name, sales, price, description])
            print(f"已写入 {index}: {name} - {price}")
            time.sleep(0.1)  # 轻微延迟,避免过于频繁

    print("数据已成功保存到懂车帝车价.csv")

except requests.exceptions.RequestException as e:
    print(f"请求出错: {e}")
except Exception as e:
    print(f"发生错误: {e}")

Logo

加入社区!打开量化的大门,首批课程上线啦!

更多推荐