Python selenium 爬取cnvd(国家信息安全漏洞共享平台)剩余部分

时间:2021-08-10
本文章向大家介绍Python selenium 爬取cnvd(国家信息安全漏洞共享平台)剩余部分,主要包括Python selenium 爬取cnvd(国家信息安全漏洞共享平台)剩余部分使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。
# coding = utf-8                     
# @author :今夕
# @Time :2021.08.10 09:22
# @file :main2.py
# @software :PyCharm

import time
from selenium import webdriver
from bs4 import BeautifulSoup
import re
import pymysql
import random
def main():
driver = webdriver.Chrome()
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
db = pymysql.connect(user='root', password='123456', host='localhost', database='cnvd')
cursor = db.cursor()
cursor.execute("select link from cnvd")
links = cursor.fetchall()
cursor.close()
db.cursor()
i=0
while i<len(links):
print("第%s条"%i)
driver.get(links[i][0])
time.sleep(3)
driver.refresh()
html=driver.page_source
print(len(html))
if len(html)<16975:
print("休眠120秒")
time.sleep(120)
print(("休眠结束"))
driver.refresh()
html = driver.page_source
dat=Parse(html)
update(dat,links[i][0])
i+=1
def Parse(html):
dat=[]
soup = BeautifulSoup(html, "html.parser") # 指定Beautiful的解析器为“html.parser”
for item in soup.find_all('tr'):
temp=item.text
temp=temp.replace("\n","").replace("\t","").replace(" ","").replace("'","")
dat.append(temp)
#print(dat)
return dat
def update(dat,url):
db = pymysql.connect(user='root', password='123456', host='localhost', database='cnvd')
cursor = db.cursor()
Affectproduct = dat[3].split("影响产品")[1] # 影响产品
CVEID = dat[4].split("CVEID")[1] # CVEID
VulnerabilityDescribes = dat[5].split("漏洞描述")[1] # 漏洞描述
HoleType = dat[6].split("漏洞类型")[1] # 漏洞类型
referenceLinking = dat[7].split("参考链接")[1] # 参考链接
solution = dat[8].split("漏洞解决方案")[1] # 解决方案
ManufacturersPatch = dat[9].split("厂商补丁")[1] # 产品补丁
VerificationInformation = dat[10].split("验证信息")[1] # 验证信息
Vulnerabilityaccessories = dat[14].split("漏洞附件")[1] # 漏洞附件
# print(Affectproduct)
# print(CVEID)
# print(VulnerabilityDescribes)
# print(HoleType)
# print(referenceLinking)
# print(solution)
# print(ManufacturersPatch)
# print(VerificationInformation)
# print(Vulnerabilityaccessories)
sql="update cnvd set Affectproduct='%s',CVEID='%s',VulnerabilityDescribes='%s',HoleType='%s',referenceLinking='%s',solution='%s',ManufacturersPatch='%s',VerificationInformation='%s',Vulnerabilityaccessories='%s' where link='%s'"%(Affectproduct,CVEID,VulnerabilityDescribes,HoleType,referenceLinking,solution,ManufacturersPatch,VerificationInformation,Vulnerabilityaccessories,url)
print(sql)
cursor.execute(sql)
db.commit()
cursor.close()
db.cursor()
time.sleep(7)
if __name__ == '__main__':
main()
print("爬取完成")

原文地址:https://www.cnblogs.com/jinxi7021/p/15122988.html