Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Diffbot.py

From Botwiki
Jump to: navigation, search

This is a simple bot that gets recentchanges from a wiki and prints the diff between current version and the last version from a different user. If the page has been created or there are not previous users on it's history, the bot prints the page content. Of course, it has no utility because there are many more sophisticated bots that prints diff of all changes from a wiki, but it can be perfect to construct more sophisticated automatic bots from here by simply checking the diferences between contents and adding page.put(oldcontent, 'BOT reverting changes.') if the edit seems to be vandalism.

NOTE: If you have an old version of pywikipedia; you would need to upgrade it before using this bot. The function getOldVersion() was added recently.

######## IRC CONFIGURATION ########
server = 'irc.wikimedia.org'
port = 6667
nickname = 'rc'
channels = ['en.wikipedia', 'en.wiktionary', 'meta.wikimedia']
###################################
 
import wikipedia
def check(data):
	if data['channel'] == '#meta.wikimedia':
		site = wikipedia.getSite('meta', 'meta')
	elif data['channel'] == '#commons.wikimedia':
		site = wikipedia.getSite('commons', 'commons')
	elif data['channel'] == '#incubator.wikimedia':
		site = wikipedia.getSite('incubator', 'incubator')
	elif data['channel'] == '#species.wikipedia':
		site = wikipedia.getSite('species', 'species')
	elif data['channel'] == '#mediawiki.wikipedia':
		site = wikipedia.getSite('mediawiki', 'mediawiki')
	else:
		try:
			site = wikipedia.getSite(data['channel'][1:].split('.')[0], data['channel'].split('.')[1])
		except (ValueError, IndexError):
			wikipedia.output('Can\'t determinate the wikifamily of ' + data['channel'] + '. Skipping...')
	page = wikipedia.Page(site, data['title'])
	try:
		content = page.get()
	except wikipedia.IsRedirectPage:
		wikipedia.output('[[' + page.title() + ']] is a redirect page, skipping...')
		return
	except wikipedia.NoPage:
		wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...')
		return
	history = page.getVersionHistory(forceReload = True, revCount = 5)
	oldversion = None
	for version in history:
		if version[2] != data['user']:
			oldversion = version
			break
	if not oldversion and len(history) == 5:
		history = page.getVersionHistory(forceReload = True, getAll = True)
		oldversion = None
		for version in history:
			if version[2] != data['user']:
				oldversion = version
				break
	if oldversion:
		try:
			oldcontent = page.getOldVersion(oldid=oldversion[0])
		except wikipedia.IsRedirectPage:
			wikipedia.output('[[' + page.title() + ']] was a redirect page on its previous version by ' + oldversion[2] + ', skipping...')
			return
		except wikipedia.NoPage:
			wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...')
			return
		wikipedia.output('################################### NEW EDIT ###################################\nPage: ' + page.aslink() + '\nCurrent version: ' + data['diff'] + ' (User: ' + data['user'] + ')\nOld version: ' + oldversion[0] + ' (User: ' + oldversion[2] + ')\nDiff:')
		wikipedia.showDiff(oldcontent, content)
	else:
		wikipedia.output('################################### NEW PAGE ###################################\nPage: ' + page.aslink() + '\nUser: ' + data['user'] + '\nContent:\n' + content)
 
import re
regexps = [re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&diff=(?P<diff>[0-9]*)&oldid=(?P<oldid>[0-9]*)(&rcid=(?P<rcid>[0-9]*))?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&rcid=(?P<rcid>[0-9]*)\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/wiki/.*?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n')]
 
import socket, random, thread
def rcbot():
	rc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
	rc.connect((str(server), int(port)))
	rc.recv(4096)
	def identify():
		rand = str(random.randint(120, 9999))
		nick = '%s%s' %(nickname[0:9-len(rand)], rand)
		rc.send('NICK %s\r\n' %nick)
		rc.send('USER %s %s %s :%s\r\n' %(nick, nick, nick, nick))
		for channel in channels:
			if not str(channel)[0] in '&#!+':
				channel = '#%s' %channel
			rc.send('JOIN %s\r\n' %channel)
		return nick
	nick = identify()
	while True:
		try:
			d = rc.recv(4096)
			if d.find('PING') != -1:
				rc.send('PONG ' + d.split()[1] + '\r\n')
			elif d.endswith('433 * %s :Nickname is already in use.\r\n' %nick):
				nick = identify()
			elif d == '':
				try:
					rc.send('QUIT\r\n')
				except socket.error:
					pass
				rcbot()
				return
			d = d.decode('utf-8', 'replace')
			for r in regexps:
				m = r.match(d)
				if m:
					data = m.groupdict()
					thread.start_new_thread(check, (data,))
					break
		except KeyboardInterrupt:
			rc.send('QUIT\r\n')
			return
 
if __name__ == '__main__':
	try:
		rcbot()
	finally:
		wikipedia.stopme()
Personal tools
Share