python 模拟登录csdn并获取博客列表等操作

来源:互联网 发布:黑客编程新手教学 编辑:程序博客网 时间:2024/06/09 19:31
# python3.3 可以登录成功import urllib.parse, urllib.request, http.cookiejar, reclass Csdn():    def __init__(self, username, password):        self.username = username        self.password = password        self.createCookie()    def createCookie(self):        '创建cookie'        cookie = http.cookiejar.CookieJar()        cookieProc = urllib.request.HTTPCookieProcessor(cookie)        self.opener = urllib.request.build_opener(cookieProc)    def getKeyBeforeLogin(self):        '在登录之前获取随机key'        url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist'        html = self.opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist').read().decode(            "utf8")        patten1 = re.compile(r'name="lt" value="(.*?)"')        patten2 = re.compile(r'name="execution" value="(.*?)"')        lt = patten1.search(html)        execution = patten2.search(html)        return {'lt': lt.group(1), 'execution': execution.group(1)}    def login(self):        '登录csdn'        url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist'        res = self.getKeyBeforeLogin()        opener = self.opener        postData = {            'username': self.username,            'password': self.password,            'lt': res['lt'],            'execution': res['execution'],            '_eventId': 'submit',        }        opener.addheaders = [('host', 'passport.csdn.net'),                             ('User-Agent',                              'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),                             ('Referer', 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit')                             ]        postData = urllib.parse.urlencode(postData).encode(encoding='UTF8')        response = opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit', data=postData)        text = response.read().decode('utf-8', 'ignore')        pattenerror = r'<span id="error-message">帐户名或登录密码不正确'        error = re.search(pattenerror, text)        if error:            self.loginStatus = 0            self.errorInfo = '帐户名或登录密码不正确'            raise Exception(self.errorInfo)            return None        else:            self.loginStatus = 1        patten = re.compile(r'var redirect = "(.*?)"')        redirect = patten.search(text)        return redirect.group(1)    def visitRedirectAfterLogin(self, redirect):        '在登录之后访问跳转'        opener = self.opener        response = opener.open(redirect)        text = response.read().decode('utf-8', 'ignore')        # tools.log(text, 'csdn_test.html')    def visitBlogList(self):        '访问博客列表'        opener = self.opener        response = opener.open('http://write.blog.csdn.net/postlist')        text = response.read().decode('utf-8', 'ignore')        list = self.handleBlogList(text)        return list    def handleBlogList(self, text):        '处理博客分类'        pattern = r"<tr(.*?)<a href='(.*?)'(.*?)>(.*?)</a>(.*?)\((.*?)\)</span>(.*?)</td><td>([0-9]+)</td><td>([0-9]+)([\s\S]*?)</tr>"        matchs = re.findall(pattern, text)        res = []        if matchs:            for i in matchs:                list = {'url': i[1], 'name': i[3], 'time': i[5], 'readnum': i[7], 'comment': i[8]}                res.append(list)        return res    def visitBlogCategory(self):        '访问博客分类'        opener = self.opener        response = opener.open('http://write.blog.csdn.net/category')        text = response.read().decode('utf-8', 'ignore')        return self.handleBlogCategory(text)    def handleBlogCategory(self, text):        '处理博客分类'        pattern = r"<td class='tdleft'><span>(.*?)</span></td>([\s\S]*?)<a href='#([0-9]+)'"        matchs = re.findall(pattern, text)        res = []        if matchs:            for i in matchs:                res.append({'name': i[0], 'id': i[2]})        return res    def addBlogCategory(self, name):        '添加博客分类,返回博客分类列表'        opener = self.opener        name = name.encode('utf-8', 'ignore')        name = urllib.parse.quote(name)        url = 'http://write.blog.csdn.net/category?t=add&name=%s' % name        response = opener.open(url)        text = response.read().decode('utf-8', 'ignore')        return self.handleBlogCategory(text)    def editBlogCategory(self, id, name):        '修改博客分类名称'        opener = self.opener        name = name.encode('utf-8', 'ignore')        name = urllib.parse.quote(name)        url = 'http://write.blog.csdn.net/category?t=edit&id=%s&name=%s' % (id, name)        response = opener.open(url)        text = response.read().decode('utf-8', 'ignore')        return self.handleBlogCategory(text)    def main(self):        '主方法'        redirect = self.login()        # self.visitRedirectAfterLogin(redirect)        # 如果不调用 visitRedirectAfterLogin 方法 访问博客列表时有问题  不知道为什么        self.visitRedirectAfterLogin(redirect)        blogList = self.visitBlogList()        for i in blogList:            print(i)        categorys = self.visitBlogCategory()        for i in categorys:            print(i)        # self.addBlogCategory('test_csdn12')        res = []        # res = self.addBlogCategory('测试csdn2')        if len(res) > 0:            endData = res[-1]            print(endData)            self.editBlogCategory(endData['id'], 'test_add')            print(self.visitBlogCategory())if __name__ == '__main__':    csdn = Csdn('csdnusername', 'csdnpassword')    csdn.main()

0 0
原创粉丝点击