企查查——未完待续
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
def get_company_info(driver, corp):
try:
driver.find_element(By.ID, 'searchKey').clear()
driver.find_element(By.ID, 'searchKey').send_keys(corp)
next_btn = driver.find_element(By.XPATH,
'/html/body/div/div[1]/div/div[1]/div/div/div/div/span')
driver.execute_script("arguments[0].click();", next_btn)
time.sleep(5)
href = driver.find_element(By.XPATH, '//a[@class="title copy-value"]').get_attribute("href")
print('====================', corp, '==================')
print(href)
return href
except Exception as e:
print(f'未找到 {corp} 的信息:', e)
return None
def get_basic_info(driver, href):
basic_info = []
try:
driver.get(href)
time.sleep(20)
com_info_table = driver.find_elements(By.XPATH, '//*[@id="cominfo"]/div[2]/table/tr')
for row in com_info_table:
row_text = row.text.split(' ')[1]
basic_info.append(row_text)
brief_intro = driver.find_element(By.XPATH,
'/html/body/div[1]/div[2]/div[2]/div/div[1]/div[2]/div[2]/div[3]/div[4]/span').text.split(
':')[1].replace(' ', '')
basic_info.append(brief_intro)
except Exception as e:
print("获取基本信息时出现异常:", e)
return basic_info
def main():
corps = ["广州高新区投资集团有限公司", "广州开发区控股集团有限公司"]
data_f = []
# 启动浏览器
driver = webdriver.Chrome()
driver.get('https://www.qichacha.com/user_login')
# 等待手动登录完成
input("请在登录完成后按 Enter 继续...")
for corp in corps:
href = get_company_info(driver, corp)
if href:
basic_info = get_basic_info(driver, href)
data_f.append(basic_info)
driver.quit()
print(data_f)
if __name__ == "__main__":
main()
版权声明:
作者:夜阑
链接:http://yelan.xyz/index.php/2024/04/30/%e4%bc%81%e6%9f%a5%e6%9f%a5-%e6%9c%aa%e5%ae%8c%e5%be%85%e7%bb%ad/
来源:夜阑的小站
文章版权归作者所有,未经允许请勿转载。
THE END
二维码
共有 0 条评论