企查查——未完待续

import time
from selenium import webdriver
from selenium.webdriver.common.by import By


def get_company_info(driver, corp):
    try:
        driver.find_element(By.ID, 'searchKey').clear()
        driver.find_element(By.ID, 'searchKey').send_keys(corp)
        next_btn = driver.find_element(By.XPATH,
                                       '/html/body/div/div[1]/div/div[1]/div/div/div/div/span')
        driver.execute_script("arguments[0].click();", next_btn)
        time.sleep(5)
        href = driver.find_element(By.XPATH, '//a[@class="title copy-value"]').get_attribute("href")
        print('====================', corp, '==================')
        print(href)
        return href
    except Exception as e:
        print(f'未找到 {corp} 的信息:', e)
        return None


def get_basic_info(driver, href):
    basic_info = []
    try:
        driver.get(href)
        time.sleep(20)
        com_info_table = driver.find_elements(By.XPATH, '//*[@id="cominfo"]/div[2]/table/tr')
        for row in com_info_table:
            row_text = row.text.split(' ')[1]
            basic_info.append(row_text)
        brief_intro = driver.find_element(By.XPATH,
                                          '/html/body/div[1]/div[2]/div[2]/div/div[1]/div[2]/div[2]/div[3]/div[4]/span').text.split(
            ':')[1].replace(' ', '')
        basic_info.append(brief_intro)
    except Exception as e:
        print("获取基本信息时出现异常:", e)
    return basic_info


def main():
    corps = ["广州高新区投资集团有限公司", "广州开发区控股集团有限公司"]
    data_f = []

    # 启动浏览器
    driver = webdriver.Chrome()
    driver.get('https://www.qichacha.com/user_login')

    # 等待手动登录完成
    input("请在登录完成后按 Enter 继续...")

    for corp in corps:
        href = get_company_info(driver, corp)
        if href:
            basic_info = get_basic_info(driver, href)
            data_f.append(basic_info)

    driver.quit()
    print(data_f)


if __name__ == "__main__":
    main()

 

版权声明:
作者:夜阑
链接:http://yelan.xyz/index.php/2024/04/30/%e4%bc%81%e6%9f%a5%e6%9f%a5-%e6%9c%aa%e5%ae%8c%e5%be%85%e7%bb%ad/
来源:夜阑的小站
文章版权归作者所有,未经允许请勿转载。

THE END
分享
二维码
< <上一篇
下一篇>>