伟的博客 研发工程师

python:Discuz发帖器的实现-修改版(可以随机选文字)2010-03-17

2011-03-10

#!/usr/bin/python #encoding=GB2312 #使用前请查找并更改用户名和密码 #http://www.cnepub.com/ 论坛回贴工具,以换得下载书用的书币

import cookielib, urllib2, urllib, sys, time, random from urllib import urlencode from xml.sax.saxutils import unescape from BeautifulSoup import BeautifulSoup # For processing HTML from time import sleep

def formalize(text): result = ‘’ lines = text.split(u’\n’) for line in lines: line = line.strip() if len(line) == 0: continue result += line + u’\n\n’ return result

#登陆discuz论坛 cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

loginurl=”http://www.cnepub.com/discuz/logging.php?action=login” loginsubmiturl=”http://www.cnepub.com/discuz/logging.php?action=login&loginsubmit=yes” posturl=”http://www.cnepub.com/discuz/post.php?action=newthread&fid=5” postsubmiturl=”http://xxx/post.php?&action=newthread&fid=5&extra=&topicsubmit=yes” replyposturl=”http://www.cnepub.com/discuz/post.php?action=reply&fid=22&tid=15848” replypostsubmiturl=”http://www.cnepub.com/discuz/post.php?&action=reply&fid=22&tid=15848&extra=&replysubmit=yes” sreplypostsubmiturl=”http://www.cnepub.com/discuz/post.php?&action=reply&fid=22&tid=” ereplypostsubmiturl=”&extra=&replysubmit=yes”

myvalues = { ‘formhash’:’11c8f248’, ‘username’:’**’, ‘password’:’**’, ‘cookietime’:’2592000’, ‘loginsubmit’:’true’ } data = urllib.urlencode(myvalues) myrequest = urllib2.Request(loginsubmiturl, data) rq = opener.open(myrequest)

’'’req = urllib2.Request(replyposturl) response = opener.open(req) the_page = response.read() print the_page’’’

’'’postdata = { ‘checkbox’:’0’, ‘formhash’:’36c581bc’, ‘message’:random.choice ( [‘有钱的捧个钱场’, ‘没钱的捧个人场哈’, ‘谢谢楼主哈哈哈’, ‘好期待好期待~~’, ‘我等不及了!!!!!’]), ‘subject’:random.choice ( [‘有钱的捧个钱场’, ‘没钱的捧个人场哈’, ‘谢谢楼主哈哈哈’, ‘好期待好期待~~’, ‘我等不及了!!!!!’]), ‘tags’:’’, ‘updateswfattach’ : ‘0’, ‘wysiwyg’ : ‘1’ }’’’

postdata = { ‘checkbox’:’0’, ‘formhash’:’36c581bc’, ‘message’:’有钱的捧个钱场,没钱的捧个人场哈’, ‘subject’:’’, ‘tags’:’’, ‘updateswfattach’ : ‘0’, ‘wysiwyg’ : ‘1’ } for i in range(2316, 2710): postdata[‘message’] = random.choice ([‘东西太多了,一时半会也看不完,哈哈’, ‘谢谢楼主哈哈哈谢谢楼主哈哈哈’, ‘好期待好期待好期待好期待好期待~~’, ‘我等不及了!我等不及了!我等不及了!我等不及了!!!!!’]) #postdata[‘subject’] = random.choice ([‘有钱的捧个钱场,没钱的捧个人场哈’, ‘谢谢楼主哈哈哈谢谢楼主哈哈哈’, ‘好期待好期待好期待好期待好期待~~’, ‘我等不及了!我等不及了!我等不及了!我等不及了!!!!!’]) myrequest2 = urllib2.Request(sreplypostsubmiturl+str(i)+ereplypostsubmiturl,urllib.urlencode(postdata)) data = opener.open(myrequest2) #print data.read() time.sleep(27)

           首先要清楚discuz论坛发帖的流程,简单地说就是以下流程: 进入登录页 ->登录 -> 进入版面 ->发帖 登录和发帖时要获取到页面的formhash值,否则会失败,如果启用了验证码,还得去分析验证码,这就比较复杂了。这里只用python来描述这一系列过程,涉及到验证码还请大家自行去实现。   #!/usr/bin/env python #coding=utf-8 from urllib import urlencode import cookielib, urllib2,urllib import os,sys import urllib2,cookielib,urllib,httplib,re import getpass import time from Queue import Queue import threading class Discuz: def __init__(self,uid,pwd,debug = False,**param): self.username = uid self.password = pwd self.para = param #self.timelimit = timelimit self.regex = { loginreg:, postreg: } self.opener = None self.request = None self.islogin = False self.donecount = 0 self.__login() self.threadcount = 10 def __login(self): try: loginPage = urllib2.urlopen(self.para[loginurl]).read() formhash = re.search(self.regex[loginreg],loginPage) formhash = formhash.group(1) print start login...... cookiejar = cookielib.CookieJar() self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) values = { formhash:formhash, username:self.username, password:self.password, loginsubmit:true } data = urllib.urlencode(values) self.request = urllib2.Request(self.para[loginsubmiturl], data) rq = self.opener.open(self.request) print login success...... self.islogin = True except Exception ,e: print e def Post(self,subject,wysiwyg,content): threads = [] for i in range(self.threadcount): t = threading.Thread( target=self.__postTopic, kwargs={_subject:subject,_wysiwyg:wysiwyg,_body:content} ) threads.append(t) for i in range(self.threadcount): threads[i].start() for i in range(self.threadcount): threads[i].join() print done def __postTopic(self,**para): if not self.islogin: print please login...... return while True: try: self.request = urllib2.Request(self.para[posturl]) rq = self.opener.open(self.request) data = rq.read() formhash = re.search(self.regex[postreg],data) formhash =  formhash.group(1) postdata = { addtags:+可用标签, checkbox:0, formhash:formhash, iconid:, message:para[_body], subject:para[_subject], tags:, updateswfattach : 0, wysiwyg : para[_wysiwyg] } self.request = urllib2.Request(self.para[postsubmiturl],urllib.urlencode(postdata)) self.opener.open(self.request) self.donecount+=1 print %d done..... % self.donecount except Exception,e: print e time.sleep(2) if __name__==__main__: name = raw_input(username:) password = getpass.getpass(password:) dz = Discuz(name,password, loginurl=http://xxx/logging.php?action=login, loginsubmiturl=http://xxx/logging.php?action=login&loginsubmit=yes, posturl=http://xxx/post.php?action=newthread&fid=5, postsubmiturl=http://xxx/post.php?&action=newthread&fid=5&extra=&topicsubmit=yes) content=这是帖子内容 dz.Post(这是帖子内容,1,content)  Discuz类中只有一个公开方法  def  Post(self,subject,wysiwyg,content)  三个参数,分别是帖子标题、是否是可视化编辑、帖子内容。  类实例化时会进行登录操作,并同时记录下cookie以备用,Post方法中会启用多个线程执行私有方法__postTopic(self,**para)进行真正的发帖操作。  所以,基本上Discuz生成的formhash就是一个鸡肋  -_-

Comments