Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Mwclient/page.py

From Botwiki
Jump to: navigation, search
import client, errors
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint 
import urllib
import re
 
class Pages(object):
	def __init__(self, mediawiki):
		self.mediawiki = mediawiki
	def __getitem__(self, name):
		return Page(self.mediawiki, name)
	def random(self):
		return self['Special:Random']
	def get_infoless(self, name):
		return Page(self.mediawiki, name, ())
	def from_prefix(self, prefix, redirects = 'all'):
		#Make iterator
		ns, pfx = self.split_title(prefix)
		data = self.mediawiki.api('query', list = 'allpages', aplimit = '500',
			apnamespace = str(ns), apprefix = pfx, apfilterredir = redirects)
		for page in data.get('query', {}).get('allpages', ()):
			yield Page(self.mediawiki, page['title'])
 
	def split_title(self, title):
		if title[0] == ':':
			title = title[1:]
		ns = self.normalize_title(title[:title.find(':') + 1])[:-1]
		for id, name in self.mediawiki.namespaces.iteritems():
			if ns == name: 
				return id, self.strip_namespace(title)
		return 0, title
 
	@staticmethod
	def strip_namespace(title):
		if title[0] == ':':
			title = title[1:]
		return title[title.find(':') + 1:]
	@staticmethod
	def normalize_title(title):
		if title[0] == ':':
			title[0] = title[1:]
		title = title[0].upper() + title[1:]
		title = title.strip()
		title = title.replace(' ', '_')
		return title
 
class Page(object):
	edittoken = re.compile(r".*?name\=\'wpEditToken\' value\=\"(.*?)\".*", re.S)
	def __init__(self, mediawiki, name, info = None):
		if type(name) is type(self):
			return self.__dict__.update(name.__dict__)
		self.mediawiki = mediawiki
		self.name = name
 
		if not info:
			info = self.mediawiki.query('permissions', titles = name)['pages'].itervalues()
 
		for i in info:
			if 'normalizedTitle' not in i:
				self.exists = 'revid' in i
				self.name = i['title']
				self.namespace = i['ns']
				self.canEdit = i['canEdit'] == 'true'
				if self.exists:
					self.touched = client.parse_timestamp(i['touched'])
					self.revision = i['revid']
 
	def edit(self, section = None, readonly = False):
		page = EditPage()
		page.feed(self.mediawiki.connection.get(self.mediawiki.basedir + 'index.php?action=edit&title=' +\
			urllib.quote(self.name.encode('utf-8'))).read().decode('utf-8', 'ignore'))
		page.close()
 
		if page.readonly and not readonly: raise errors.ProtectedPageError(self)
 
		self._edit_data = page.data
		self.text = u''.join(page.textdata)
		return self.text
 
	def save(self, text = None, summary = ''):
		if not text: text = self.text
 
		data = dict(self._edit_data)
		data['wpTextbox1'] = text
		data['wpSummary'] = summary
		data['wpSave'] = 'Save page'
 
		qs = self.mediawiki.basedir + 'index.php?action=submit&title=' + urllib.quote(self.name.encode('utf-8'))
		postdata = urllib.urlencode([(k, v.encode('utf-8')) for k, v in data.iteritems()])
 
		page = EditPage()
		page.feed(self.mediawiki.connection.post(qs, headers = {'Content-Type':'application/x-www-form-urlencoded'}, 
			data = postdata).read().decode('utf-8', 'ignore'))
		page.close()
 
		if page.data:
			if page.readonly: raise errors.ProtectedPageError(self)
			raise errors.EditError(page.title, data)
 
	def get_latest_revision(self):
		info = self.mediawiki.query('revisions', titles = self.name, rvlimit = '1')
		for page in info['pages'].itervalues():
			if page['title'] == self.name:
				if page.get('revisions'):
					return page['revisions'][0]
		return {}
 
	def __getattr__(self, name):
		if name == 'revisions':
			self.revisions = Revisions(self)
			return self.revisions
		if name == 'backlinks':
			self.backlinks = Backlinks(self)
			return self.backlinks
		raise AttributeError, name
 
	def __unicode__(self):
		return self.name
 
	def move(self, new_title, reason = '', move_talk = True):
		postdata = { 'wpNewTitle': new_title.encode('utf-8'),
			'wpOldTitle': self.name.encode('utf-8'),
			'wpReason': reason.encode('utf-8'),
			'wpMove': '1'}
		if move_talk: postdata['wpMovetalk'] = '1'
 
		data = self.mediawiki.connection.get(self.mediawiki.basedir + 'index.php?title=Special:Movepage/%s' % \
			urllib.quote(self.name.encode('utf-8'))).read()
		token = self.edittoken.match(data).group(1)
		postdata['wpEditToken'] = token
 
		data = self.mediawiki.connection.post(self.mediawiki.basedir + \
			'index.php?title=Special:Movepage&action=submit',
			headers = {'Content-Type':'application/x-www-form-urlencoded'},
			data = urllib.urlencode(postdata)).read().decode('utf-8', 'ignore')
 
		if self.edittoken.match(data):
			raise errors.ProtectedPageError(self)
 
 
 
 
class EditPage(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.in_form = False
        self.in_text = False
        self.data = {}
        self.textdata = []
	self.readonly = True
	self.in_title = False
	self.title = u''
 
    def handle_starttag(self, tag, attrs):
        if (u'id', u'editform') in attrs:
            attrs = dict(attrs)
            self.in_form = True
            self.action = attrs['action']
 
        if tag == 'input' and self.in_form and (u'type', u'submit') \
		not in attrs and (u'type', u'checkbox') not in attrs:
            attrs = dict(attrs)
            if u'name' in attrs: self.data[attrs[u'name']] = attrs.get(u'value', u'')
 
	if self.in_form and tag == 'textarea':
		self.in_text = True
		self.readonly = (u'readonly', u'readonly') in attrs
 
	self.in_title = (tag == 'title')
 
    def handle_endtag(self, tag):
        if self.in_form and tag == 'form': self.in_form = False
	if self.in_text and tag == 'textarea': self.in_text = False
	if self.in_title and tag == 'title': self.in_title = False
 
    def handle_data(self, data):
        if self.in_text: self.textdata.append(data)
	if self.in_title: self.title += data
    def handle_entityref(self, name):
        if name in name2codepoint: 
            self.handle_data(unichr(name2codepoint[name]))
        else:
            self.handle_data(u'&%s;' % name)
    def handle_charref(self, name):
        try:
            self.handle_data(unichr(int(name)))
        except ValueError:
            self.handle_data(u'&#$s;' % name)
 
 
class Revisions(list):
	def __init__(self, page, limit = 500):
		self.page = page
 
		info = page.query('revisions', titles = page.name,
			rvcomments = '1', rvlimit = str(limit))
 
		for i in info['pages'].itervalues():
			if i['title'] == page.name:
				return list.__init__(self, i.get('revisions', ()))
		list.__init__(self, ())
 
class Backlinks(list):
	def __init__(self, page):
		self.page = page
 
		info = page.query('backlinks', titles = page.name,
		bllimit = '500')
 
		for i in info['pages'].itervalues():
			if i['title'] == page.name:
				return list.__init__(self, (j['*'] for j in i.get('backlinks', ())))
		list.__init__(self, ())
Personal tools
Share