• 微信公众号:美女很有趣。 工作之余,放松一下,关注即送10G+美女照片!

爬虫案例 提取股票信息

开发技术 开发技术 5小时前 2次浏览

Ref:https://blog.csdn.net/weixin_50437588/article/details/119481864

import requests
from bs4 import BeautifulSoup
import re

def getHtml(url):
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 
                (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
    res = requests.get(url, headers=headers)
    res.raise_for_status()
    res.encoding = res.apparent_encoding
    return res.text

def getStock(lst, html):
    try:
        soup = BeautifulSoup(html, 'html.parser')
        # # stockTable = soup.find_all('section', class_='stockTable')
        # for each in soup.find('section', class_='stockTable'):
        #     print(each.child)

        sh_stock = []
        stockTable = soup.find('section', class_='stockTable').children
        for each in stockTable:
            if each.name == 'a':
                sh_stock.append(list(filter(None, each['href'].split('/')))[-1])
        return sh_stock
    except:
        print('获取股票代码错误')

def save_stock(lst, path_file):
    host = 'https://hq.gucheng.com/'
    count = 0

    for each in lst:
        info_stock = {}
        try:
            url = host + each + '/'
            html = getHtml(url)
            # html = getHtml('https://hq.gucheng.com/SZ300247/')
            soup = BeautifulSoup(html, 'html.parser')

            stockInfo = soup.find('div', class_='stock_top clearfix')
            # stockInfo = soup.find('div', attrs={'class': 'stock_top clearfix'})
            stock_name = soup.find('h1').string.strip()
            info_stock.update({'股票名称': stock_name})
            info_stock.update({'股票代码': each})

            keylist = stockInfo.find_all('dt')
            valuelist = stockInfo.find_all('dd')

            for i in range(len(keylist)):
                key = keylist[i].string
                value = valuelist[i].string
                info_stock[key] = value

            with open(path_file, 'a+', encoding='utf-8') as f:
                f.write(str(info_stock) + 'n')
                count = count + 1
                print('r当前完成度: {:.2f} %'.format(count*100/len(lst)), end=" ")
        except:
            print('r当前完成度: {:.2f} %'.format(count * 100 / len(lst)), end=" ")
            continue

def main():
    stock_list_url = r'https://hq.gucheng.com/gpdmylb.html'
    # stock_info_url = r'https://hq.gucheng.com/'
    path_file = 'p44_stock_information.txt'

    lst = []
    html = getHtml(stock_list_url)
    stock_info = getStock(lst, html)
    save_stock(stock_info, path_file)

if __name__ == "__main__":
    main()

程序员灯塔
转载请注明原文链接:爬虫案例 提取股票信息
喜欢 (0)