python爬虫实现火山周报表格程序

时间:2020-01-18
本文章向大家介绍python爬虫实现火山周报表格程序,主要包括python爬虫实现火山周报表格程序使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。
import requests,bs4,re,xlwt
from bs4 import BeautifulSoup
def getHTMLText(url):
    try:
        r=requests.get(url)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        print("request failed")

def getLocation(addr):
    demo=getHTMLText(addr)
    a = re.findall(r"\d+.\d+°\w", demo)
    latitude = re.split("°", a[0])[0] + re.split("°", a[0])[1]
    longitude = re.split("°", a[1])[0] + re.split("°", a[1])[1]
    b = re.search(r"elev. \d+[.\d+] m", demo)
    ele = re.search(r"\d+[.\d+]", b.group()).group() + "m"
    return [latitude, longitude, ele]


def getObjectData(soup):
    li=[]
    for tr in soup.find('tbody').children:
        if isinstance(tr,bs4.element.Tag):
            tds=tr('td')
            if len(tds)==1:
                continue
            addr="http://volcano.si.edu/"+tds[0].a.attrs['href']
            ret=getLocation(addr)
            li.append([tds[0].string,tds[1].string,ret[0],ret[1],ret[2],tds[2].string])
    return li

def printUnivList(result,num):
    # print("{:^20}\t{:^20}\t{:^20}".format("Name","Location","Activity"))
    # print("火山名".ljust(30)+"\t"+"所属国家".ljust(40)+"\t"+"纬度".ljust(20)+"\t"+"经度".ljust(20)+"\t"
    #     +" 海拔".ljust(20)+"\t"+"新增/持续".ljust(20))
    print("火山名".ljust(30) + "所属国家".ljust(40) + "纬度".ljust(20) +  "经度".ljust(20) +
          " 海拔".ljust(20) +  "新增/持续".ljust(20))
    for i in range(num):
        # print("{:^20}\t{:^20}\t{:^20}".format(result[i][0],result[i][1],result[i][2]))
        # print("{:^6}".format(result[i][0]))
        print(result[i][0].ljust(30)+"\t"+result[i][1].ljust(40)+"\t"+result[i][2].ljust(20)+"\t"+result[i][3].ljust(20)
              +"\t"+result[i][4].ljust(20)+"\t"+result[i][5].ljust(20))
    return result.insert(0,["火山名","所属国家","纬度","经度","海拔","新增/持续"])

def data_write(file_path, datas):
    f = xlwt.Workbook()
    sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)  # 创建sheet

    # 将数据写入第 i 行,第 j 列
    i = 0
    for data in datas:
        for j in range(len(data)):
            sheet1.write(i, j, data[j])
        i = i + 1

    f.save(file_path)  # 保存文件

url="http://volcano.si.edu/reports_weekly.cfm"
demo = getHTMLText(url)
soup = BeautifulSoup(demo,"html.parser")
result = getObjectData(soup)
full_result=printUnivList(result,len(result))
file_path='/home/guoming/volcano1.xls'
data_write(file_path,result)

软件运行后屏幕打印如下信息:

火山名                           所属国家                                    纬度                  经度                   海拔                 新增/持续               
Fernandina                        Ecuador                                     0.37S                   91.55W                  1476m                   New                 
Krakatau                          Indonesia                                   6.102S                  105.423E                155m                    New                 
Kuchinoerabujima                  Ryukyu Islands (Japan)                      30.443N                 130.217E                657m                    New                 
Taal                              Luzon (Philippines)                         14.002N                 120.993E                311m                    New                 
Aira                              Kyushu (Japan)                              31.593N                 130.657E                1117m                   Ongoing             
Dukono                            Halmahera (Indonesia)                       1.693N                  127.894E                1229m                   Ongoing             
Ebeko                             Paramushir Island (Russia)                  50.686N                 156.014E                1103m                   Ongoing             
Karangetang                       Siau Island (Indonesia)                     2.781N                  125.407E                1797m                   Ongoing             
Klyuchevskoy                      Central Kamchatka (Russia)                  56.056N                 160.642E                4754m                   Ongoing             
Popocatepetl                      Mexico                                      19.023N                 98.622W                 5393m                   Ongoing             
Sabancaya                         Peru                                        15.787S                 71.857W                 5960m                   Ongoing             
Sangay                            Ecuador                                     2.005S                  78.341W                 5286m                   Ongoing             
Semisopochnoi                     United States                               51.93N                  179.58E                 1221m                   Ongoing             
Sheveluch                         Central Kamchatka (Russia)                  56.653N                 161.36E                 3283m                   Ongoing             
Shishaldin                        Fox Islands (USA)                           54.756N                 163.97W                 2857m                   Ongoing             
Suwanosejima                      Ryukyu Islands (Japan)                      29.638N                 129.714E                796m                    Ongoing             
White Island                      North Island (New Zealand)                  37.52S                  177.18E                 294m                    Ongoing             
程序获取了http://volcano.si.edu/reports_weekly.cfm中全球火山活动信息,并制作成表格,生成volcano1.xml文件

原文地址:https://www.cnblogs.com/iceberg710815/p/12209643.html