Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.
UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.
Python:Mwclient/page.py
import client, errors from HTMLParser import HTMLParser from htmlentitydefs import name2codepoint import urllib import re class Pages(object): def __init__(self, mediawiki): self.mediawiki = mediawiki def __getitem__(self, name): return Page(self.mediawiki, name) def random(self): return self['Special:Random'] def get_infoless(self, name): return Page(self.mediawiki, name, ()) def from_prefix(self, prefix, redirects = 'all'): #Make iterator ns, pfx = self.split_title(prefix) data = self.mediawiki.api('query', list = 'allpages', aplimit = '500', apnamespace = str(ns), apprefix = pfx, apfilterredir = redirects) for page in data.get('query', {}).get('allpages', ()): yield Page(self.mediawiki, page['title']) def split_title(self, title): if title[0] == ':': title = title[1:] ns = self.normalize_title(title[:title.find(':') + 1])[:-1] for id, name in self.mediawiki.namespaces.iteritems(): if ns == name: return id, self.strip_namespace(title) return 0, title @staticmethod def strip_namespace(title): if title[0] == ':': title = title[1:] return title[title.find(':') + 1:] @staticmethod def normalize_title(title): if title[0] == ':': title[0] = title[1:] title = title[0].upper() + title[1:] title = title.strip() title = title.replace(' ', '_') return title class Page(object): edittoken = re.compile(r".*?name\=\'wpEditToken\' value\=\"(.*?)\".*", re.S) def __init__(self, mediawiki, name, info = None): if type(name) is type(self): return self.__dict__.update(name.__dict__) self.mediawiki = mediawiki self.name = name if not info: info = self.mediawiki.query('permissions', titles = name)['pages'].itervalues() for i in info: if 'normalizedTitle' not in i: self.exists = 'revid' in i self.name = i['title'] self.namespace = i['ns'] self.canEdit = i['canEdit'] == 'true' if self.exists: self.touched = client.parse_timestamp(i['touched']) self.revision = i['revid'] def edit(self, section = None, readonly = False): page = EditPage() page.feed(self.mediawiki.connection.get(self.mediawiki.basedir + 'index.php?action=edit&title=' +\ urllib.quote(self.name.encode('utf-8'))).read().decode('utf-8', 'ignore')) page.close() if page.readonly and not readonly: raise errors.ProtectedPageError(self) self._edit_data = page.data self.text = u''.join(page.textdata) return self.text def save(self, text = None, summary = ''): if not text: text = self.text data = dict(self._edit_data) data['wpTextbox1'] = text data['wpSummary'] = summary data['wpSave'] = 'Save page' qs = self.mediawiki.basedir + 'index.php?action=submit&title=' + urllib.quote(self.name.encode('utf-8')) postdata = urllib.urlencode([(k, v.encode('utf-8')) for k, v in data.iteritems()]) page = EditPage() page.feed(self.mediawiki.connection.post(qs, headers = {'Content-Type':'application/x-www-form-urlencoded'}, data = postdata).read().decode('utf-8', 'ignore')) page.close() if page.data: if page.readonly: raise errors.ProtectedPageError(self) raise errors.EditError(page.title, data) def get_latest_revision(self): info = self.mediawiki.query('revisions', titles = self.name, rvlimit = '1') for page in info['pages'].itervalues(): if page['title'] == self.name: if page.get('revisions'): return page['revisions'][0] return {} def __getattr__(self, name): if name == 'revisions': self.revisions = Revisions(self) return self.revisions if name == 'backlinks': self.backlinks = Backlinks(self) return self.backlinks raise AttributeError, name def __unicode__(self): return self.name def move(self, new_title, reason = '', move_talk = True): postdata = { 'wpNewTitle': new_title.encode('utf-8'), 'wpOldTitle': self.name.encode('utf-8'), 'wpReason': reason.encode('utf-8'), 'wpMove': '1'} if move_talk: postdata['wpMovetalk'] = '1' data = self.mediawiki.connection.get(self.mediawiki.basedir + 'index.php?title=Special:Movepage/%s' % \ urllib.quote(self.name.encode('utf-8'))).read() token = self.edittoken.match(data).group(1) postdata['wpEditToken'] = token data = self.mediawiki.connection.post(self.mediawiki.basedir + \ 'index.php?title=Special:Movepage&action=submit', headers = {'Content-Type':'application/x-www-form-urlencoded'}, data = urllib.urlencode(postdata)).read().decode('utf-8', 'ignore') if self.edittoken.match(data): raise errors.ProtectedPageError(self) class EditPage(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.in_form = False self.in_text = False self.data = {} self.textdata = [] self.readonly = True self.in_title = False self.title = u'' def handle_starttag(self, tag, attrs): if (u'id', u'editform') in attrs: attrs = dict(attrs) self.in_form = True self.action = attrs['action'] if tag == 'input' and self.in_form and (u'type', u'submit') \ not in attrs and (u'type', u'checkbox') not in attrs: attrs = dict(attrs) if u'name' in attrs: self.data[attrs[u'name']] = attrs.get(u'value', u'') if self.in_form and tag == 'textarea': self.in_text = True self.readonly = (u'readonly', u'readonly') in attrs self.in_title = (tag == 'title') def handle_endtag(self, tag): if self.in_form and tag == 'form': self.in_form = False if self.in_text and tag == 'textarea': self.in_text = False if self.in_title and tag == 'title': self.in_title = False def handle_data(self, data): if self.in_text: self.textdata.append(data) if self.in_title: self.title += data def handle_entityref(self, name): if name in name2codepoint: self.handle_data(unichr(name2codepoint[name])) else: self.handle_data(u'&%s;' % name) def handle_charref(self, name): try: self.handle_data(unichr(int(name))) except ValueError: self.handle_data(u'&#$s;' % name) class Revisions(list): def __init__(self, page, limit = 500): self.page = page info = page.query('revisions', titles = page.name, rvcomments = '1', rvlimit = str(limit)) for i in info['pages'].itervalues(): if i['title'] == page.name: return list.__init__(self, i.get('revisions', ())) list.__init__(self, ()) class Backlinks(list): def __init__(self, page): self.page = page info = page.query('backlinks', titles = page.name, bllimit = '500') for i in info['pages'].itervalues(): if i['title'] == page.name: return list.__init__(self, (j['*'] for j in i.get('backlinks', ()))) list.__init__(self, ())