抓取股票当日信息

来源:互联网 发布:雅思经验 知乎 编辑:程序博客网 时间:2024/06/11 07:09

股票代码来源东方财富网,详细信息来自QQ股票

import requestsfrom bs4 import BeautifulSoupimport reimport tracebackimport timeimport numpy as np#修改请求头文件,避免封IPhds=[{'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'},{'User-Agent':'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11'},{'User-Agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)'}]def getHTMLText(url):#参数为页面    try:        r = requests.get(url, timeout = 30,headers=hds[np.random.randint(0,len(hds))])        r.raise_for_status()        r.encoding = r.apparent_encoding        return r.text    except:        return ""def getStockList(lst, stockURL):#参数为存取股票代码列表,以及股票信息网站    html = getHTMLText(stockURL)#解析获取股票代码的网站    soup = BeautifulSoup(html, 'html.parser')    a = soup.find_all('a')#找到存储代码的位置    for i in a:        try:            href = i.attrs['href']            lst.append(re.findall(r"[s][hz]\d{6}",href)[0])#获得代码        except:            continuedef getStockInfo(lst, stockURL, fpath):#参数为存取股票列表,股票信息网站,以及股票信息存储位置    count = 0    sum = 0    for s in lst:#对于sum这一块的代码主要是计算存入多少个,前90个股票存在问题,不考虑        sum = sum + 1        if sum < 90:            continue        if sum >= 100:            break        #time.sleep(np.random.rand() * 2)        url = stockURL + s + ".html"        html = getHTMLText(url)        try:            if html == "":                continue            soup = BeautifulSoup(html, 'html.parser')            StockDict = {}            stockInfo = soup.find('div', attrs = {'class':'bets-content'})            try:                name = soup.find_all(attrs={'class':'bets-name'})[0].text.split()[0]            except IndexError:                continue            StockDict.update({'股票信息':name})#将获取到的股票名字先存下来            keyList = stockInfo.find_all('dt')#获得该股票的信息名称            valueList = stockInfo.find_all('dd')#获得该股票的信息的详细资料            for i in range(len(keyList)):                key = keyList[i].text                value = valueList[i].text                StockDict[key] = value            with open(fpath, 'a',encoding='utf-8') as f:#将信息存到文件中                f.write(str(StockDict) + '\n')                count = count + 1                print("\r当前进度:{:.2f}%".format(count*100/len(lst)))        except:            traceback.print_exc()            count = count + 1            print("\r当前进度:{:.2f}%".format(count * 100 / len(lst)))            continueif __name__ == '__main__':    Stock_list_url = 'http://quote.eastmoney.com/stocklist.html'#获取股票代码地址    Stock_info_url = 'https://gupiao.baidu.com/stock/'#获取股票详细信息地址    file_save = 'D://python项目//Python_//爬虫项目实例//StockInfo.txt'#存储股票信息位置    slist = []#存储股票代码    getStockList(slist, Stock_list_url)    getStockInfo(slist, Stock_info_url,file_save)