python 模拟登录csdn并获取博客列表等操作
来源:互联网 发布:黑客编程新手教学 编辑:程序博客网 时间:2024/06/09 19:31
# python3.3 可以登录成功import urllib.parse, urllib.request, http.cookiejar, reclass Csdn(): def __init__(self, username, password): self.username = username self.password = password self.createCookie() def createCookie(self): '创建cookie' cookie = http.cookiejar.CookieJar() cookieProc = urllib.request.HTTPCookieProcessor(cookie) self.opener = urllib.request.build_opener(cookieProc) def getKeyBeforeLogin(self): '在登录之前获取随机key' url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist' html = self.opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist').read().decode( "utf8") patten1 = re.compile(r'name="lt" value="(.*?)"') patten2 = re.compile(r'name="execution" value="(.*?)"') lt = patten1.search(html) execution = patten2.search(html) return {'lt': lt.group(1), 'execution': execution.group(1)} def login(self): '登录csdn' url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist' res = self.getKeyBeforeLogin() opener = self.opener postData = { 'username': self.username, 'password': self.password, 'lt': res['lt'], 'execution': res['execution'], '_eventId': 'submit', } opener.addheaders = [('host', 'passport.csdn.net'), ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'), ('Referer', 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit') ] postData = urllib.parse.urlencode(postData).encode(encoding='UTF8') response = opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit', data=postData) text = response.read().decode('utf-8', 'ignore') pattenerror = r'<span id="error-message">帐户名或登录密码不正确' error = re.search(pattenerror, text) if error: self.loginStatus = 0 self.errorInfo = '帐户名或登录密码不正确' raise Exception(self.errorInfo) return None else: self.loginStatus = 1 patten = re.compile(r'var redirect = "(.*?)"') redirect = patten.search(text) return redirect.group(1) def visitRedirectAfterLogin(self, redirect): '在登录之后访问跳转' opener = self.opener response = opener.open(redirect) text = response.read().decode('utf-8', 'ignore') # tools.log(text, 'csdn_test.html') def visitBlogList(self): '访问博客列表' opener = self.opener response = opener.open('http://write.blog.csdn.net/postlist') text = response.read().decode('utf-8', 'ignore') list = self.handleBlogList(text) return list def handleBlogList(self, text): '处理博客分类' pattern = r"<tr(.*?)<a href='(.*?)'(.*?)>(.*?)</a>(.*?)\((.*?)\)</span>(.*?)</td><td>([0-9]+)</td><td>([0-9]+)([\s\S]*?)</tr>" matchs = re.findall(pattern, text) res = [] if matchs: for i in matchs: list = {'url': i[1], 'name': i[3], 'time': i[5], 'readnum': i[7], 'comment': i[8]} res.append(list) return res def visitBlogCategory(self): '访问博客分类' opener = self.opener response = opener.open('http://write.blog.csdn.net/category') text = response.read().decode('utf-8', 'ignore') return self.handleBlogCategory(text) def handleBlogCategory(self, text): '处理博客分类' pattern = r"<td class='tdleft'><span>(.*?)</span></td>([\s\S]*?)<a href='#([0-9]+)'" matchs = re.findall(pattern, text) res = [] if matchs: for i in matchs: res.append({'name': i[0], 'id': i[2]}) return res def addBlogCategory(self, name): '添加博客分类,返回博客分类列表' opener = self.opener name = name.encode('utf-8', 'ignore') name = urllib.parse.quote(name) url = 'http://write.blog.csdn.net/category?t=add&name=%s' % name response = opener.open(url) text = response.read().decode('utf-8', 'ignore') return self.handleBlogCategory(text) def editBlogCategory(self, id, name): '修改博客分类名称' opener = self.opener name = name.encode('utf-8', 'ignore') name = urllib.parse.quote(name) url = 'http://write.blog.csdn.net/category?t=edit&id=%s&name=%s' % (id, name) response = opener.open(url) text = response.read().decode('utf-8', 'ignore') return self.handleBlogCategory(text) def main(self): '主方法' redirect = self.login() # self.visitRedirectAfterLogin(redirect) # 如果不调用 visitRedirectAfterLogin 方法 访问博客列表时有问题 不知道为什么 self.visitRedirectAfterLogin(redirect) blogList = self.visitBlogList() for i in blogList: print(i) categorys = self.visitBlogCategory() for i in categorys: print(i) # self.addBlogCategory('test_csdn12') res = [] # res = self.addBlogCategory('测试csdn2') if len(res) > 0: endData = res[-1] print(endData) self.editBlogCategory(endData['id'], 'test_add') print(self.visitBlogCategory())if __name__ == '__main__': csdn = Csdn('csdnusername', 'csdnpassword') csdn.main()
0 0
- python 模拟登录csdn并获取博客列表等操作
- Python登录并获取CSDN博客所有文章列表
- 爬虫 登录csdn并获取个人博客文章列表
- python模拟登录csdn并获取首页文章写入MySQL中(二)
- Python3.X登录模拟CSDN,获取文章列表
- python模拟登录csdn
- Python模拟登录CSDN
- python模拟登录csdn
- python 模拟登录CSDN
- 使用Python模拟登录QQ邮箱获取QQ好友列表
- 初试Scrapy(三)上---CSDN自动登录获取博客分类列表
- 初试Scrapy(三)下—CSDN自动登录获取博客分类列表
- VC使用libcurl模拟登录CSDN并自动评论资源以获取积分
- VC使用libcurl模拟登录CSDN并自动评论资源以获取积分
- VC使用libcurl模拟登录CSDN并自动评论资源以获取积分
- VC使用libcurl模拟登录CSDN并自动评论资源以获取积分
- 自动获取CSDN博客文章列表
- Python爬虫之模拟CSDN网站登录
- <<More Effective C++>>读书笔记2: 运算符
- day2 HDU 2141 Can you find it?
- [李景山php]每天laravel-20160921|Redis Database.php
- 基于FPGA的Alpha半透明图像叠加算法实现
- 为什么析构函数常声明为虚函数?
- python 模拟登录csdn并获取博客列表等操作
- 快学scala 第十一章 操作符 读书笔记及习题答案代码
- 深入理解 Android 中的 Matrix
- Android 6.0 通话UI设计模式分析(MVC\MVP\MVVM)
- Post Views Options怎么设置,WP-PostViews的后台设置五步法详解
- java反向工程神器ermaster
- 几款极好的 JavaScript 文件上传插件
- Hadoop:基础环境配置
- Linux线程同步-----互斥量(Mutex)