python
来源:互联网 发布:淘宝 废铁战士 编辑:程序博客网 时间:2024/06/11 15:06
import requestsfrom pyquery import PyQuery as pqimport reimport pandasimport datetimedef save_as_csv(houses_info_list): houses_df = pandas.DataFrame(houses_info_list) houses_df.to_csv('{}上海二手房信息.csv'.format(datetime.date.today().strftime('%Y-%m-%d')))def get_house_info_list(url): domain = url+'{}' html = requests.get(url).text house_items = pq(html).find('.houseList > .list > .info').items() house_list = [] for house in house_items: next_url = domain.format(house.find('.title > a').attr('href')) house_info = get_house(next_url) house_list.append(house_info) return house_listdef get_house(url): info = {} html = requests.get(url).text doc = pq(html) info['标题'] = doc.find('#lpname').text() info['价格'] = doc.find('div.trl-item.sty1').text() items = doc.find('.trl-item1').items() for item in items: value, key = item.text().strip().split() info[key] = value items = doc.find('.trl-item2').items() for item in items: key = ''.join(item.find('.lab').text().split()) value = ''.join(item.find('.rcont').text().split()) info[key] = value.replace('地图', '') info['联系人'] = doc.find('#agentname').text() info['联系方式'] = doc.find('#mobilecode').text() items = doc.find('.qu_bianqu1 > .text-item').items() for item in items: key = item.find('.lab').text() value = item.find('.rcont').text() if key == '挂牌时间': value = re.compile(r'(\d{4}-\d{2}-\d{2})').search(value).group(1) info[key] = value print(info) return infoif __name__ == '__main__': houses_info_list = get_house_info_list('http://esf.sh.fang.com/') save_as_csv(houses_info_list)
阅读全文
0 0
- Python
- Python
- Python
- python
- Python
- PYTHON
- Python
- Python
- Python
- Python
- Python
- Python
- Python
- Python
- Python
- Python
- python
- Python
- 获取出现次数最多的字符及其次数
- Proxy-Stub模式分析
- HDOJ HDU 1027 Ignatius and the Princess II
- Angularjs中ng-select和ng-options用法【select联动数据】
- Nodejs 调试
- python
- History
- 浅谈Flux架构及Redux实践
- 外部中断和内部中断详解
- 没有tcp server能直接connect建立连接?
- 安卓 打造通用的SDK
- jvm内存架构模型
- android.view.View.getImportantForAccessibility()
- 官方Property Animation翻译(全部)以及批注