Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.
UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.
Python:Diffbot.py
This is a simple bot that gets recentchanges from a wiki and prints the diff between current version and the last version from a different user. If the page has been created or there are not previous users on it's history, the bot prints the page content. Of course, it has no utility because there are many more sophisticated bots that prints diff of all changes from a wiki, but it can be perfect to construct more sophisticated automatic bots from here by simply checking the diferences between contents and adding page.put(oldcontent, 'BOT reverting changes.') if the edit seems to be vandalism.
NOTE: If you have an old version of pywikipedia; you would need to upgrade it before using this bot. The function getOldVersion() was added recently.
######## IRC CONFIGURATION ######## server = 'irc.wikimedia.org' port = 6667 nickname = 'rc' channels = ['en.wikipedia', 'en.wiktionary', 'meta.wikimedia'] ################################### import wikipedia def check(data): if data['channel'] == '#meta.wikimedia': site = wikipedia.getSite('meta', 'meta') elif data['channel'] == '#commons.wikimedia': site = wikipedia.getSite('commons', 'commons') elif data['channel'] == '#incubator.wikimedia': site = wikipedia.getSite('incubator', 'incubator') elif data['channel'] == '#species.wikipedia': site = wikipedia.getSite('species', 'species') elif data['channel'] == '#mediawiki.wikipedia': site = wikipedia.getSite('mediawiki', 'mediawiki') else: try: site = wikipedia.getSite(data['channel'][1:].split('.')[0], data['channel'].split('.')[1]) except (ValueError, IndexError): wikipedia.output('Can\'t determinate the wikifamily of ' + data['channel'] + '. Skipping...') page = wikipedia.Page(site, data['title']) try: content = page.get() except wikipedia.IsRedirectPage: wikipedia.output('[[' + page.title() + ']] is a redirect page, skipping...') return except wikipedia.NoPage: wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...') return history = page.getVersionHistory(forceReload = True, revCount = 5) oldversion = None for version in history: if version[2] != data['user']: oldversion = version break if not oldversion and len(history) == 5: history = page.getVersionHistory(forceReload = True, getAll = True) oldversion = None for version in history: if version[2] != data['user']: oldversion = version break if oldversion: try: oldcontent = page.getOldVersion(oldid=oldversion[0]) except wikipedia.IsRedirectPage: wikipedia.output('[[' + page.title() + ']] was a redirect page on its previous version by ' + oldversion[2] + ', skipping...') return except wikipedia.NoPage: wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...') return wikipedia.output('################################### NEW EDIT ###################################\nPage: ' + page.aslink() + '\nCurrent version: ' + data['diff'] + ' (User: ' + data['user'] + ')\nOld version: ' + oldversion[0] + ' (User: ' + oldversion[2] + ')\nDiff:') wikipedia.showDiff(oldcontent, content) else: wikipedia.output('################################### NEW PAGE ###################################\nPage: ' + page.aslink() + '\nUser: ' + data['user'] + '\nContent:\n' + content) import re regexps = [re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&diff=(?P<diff>[0-9]*)&oldid=(?P<oldid>[0-9]*)(&rcid=(?P<rcid>[0-9]*))?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&rcid=(?P<rcid>[0-9]*)\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/wiki/.*?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n')] import socket, random, thread def rcbot(): rc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) rc.connect((str(server), int(port))) rc.recv(4096) def identify(): rand = str(random.randint(120, 9999)) nick = '%s%s' %(nickname[0:9-len(rand)], rand) rc.send('NICK %s\r\n' %nick) rc.send('USER %s %s %s :%s\r\n' %(nick, nick, nick, nick)) for channel in channels: if not str(channel)[0] in '&#!+': channel = '#%s' %channel rc.send('JOIN %s\r\n' %channel) return nick nick = identify() while True: try: d = rc.recv(4096) if d.find('PING') != -1: rc.send('PONG ' + d.split()[1] + '\r\n') elif d.endswith('433 * %s :Nickname is already in use.\r\n' %nick): nick = identify() elif d == '': try: rc.send('QUIT\r\n') except socket.error: pass rcbot() return d = d.decode('utf-8', 'replace') for r in regexps: m = r.match(d) if m: data = m.groupdict() thread.start_new_thread(check, (data,)) break except KeyboardInterrupt: rc.send('QUIT\r\n') return if __name__ == '__main__': try: rcbot() finally: wikipedia.stopme()