From Botwiki
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This bot gets a page or list of pages, and checks if it is categorized.
If the page is not a member of any category and has no templates on it, it will
add the {{uncat}} template. Else, if the page is not a member of any category,
but has a template on it, it will check if that template adds the page to a category.
If the template doesn't add the page to a category, again, it will add the {{uncat}}
template to the page.
"""
#__version__ = '$Id: uncat.py 3998 2007-08-07 20:28:27Z huji $'
import wikipedia
import pagegenerators
import sys, re
##################
# Settings #
##################
uncatTemplate = "{{uncat}}"
categorizingTemplates = "{{bd}} {{dicdef}}"
##################
# Settings end #
##################
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp
}
class UncatBot:
# Edit summary message that should be used.
# NOTE: Put a good description here, and add translations, if possible!
msg = {
'en': u'Robot: Marking as uncategorized',
}
def __init__(self, generator):
"""
Constructor. Parameters:
* generator - The page generator that determines on which pages
to work on.
"""
self.generator = generator
def run(self):
# Set the edit summary message
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
for page in self.generator:
self.treat(page)
def treat(self, page):
"""
Loads the given page, checks if the {{uncat}} template is needed, and applies the template.
"""
flag = False
try:
# Load the page
text = page.get()
except wikipedia.NoPage:
wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
return
except wikipedia.IsRedirectPage:
wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
return
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
return
if not page.canBeEdited():
wikipedia.output(u"Page %s cannot be edited; skipping." % page.aslink())
return
wikipedia.output(u'Start to proccess %s' % page.title())
if page.categories() != []:
wikipedia.output(u'>>Page \03{lightyellow}%s\03{default} is already categorized.' % page.title())
return
if page.templates() == []:
flag = True
else:
flag = True
"""
check for stopwords
"""
for t in page.templates():
if re.compile("{{%s" % t.lower()).search(categorizingTemplates) != None:
flag = False
#wikipedia.output(u'>>>>Stopword: %s' % t)
if flag == True:
text = text + "\n%s" % uncatTemplate
page.put(text)
wikipedia.output(u'>>Marked \03{lightpurple}%s\03{default} as uncategorized.' % page.title())
else:
wikipedia.output(u'>>Stopword found in \03{lightyellow}%s\03{default}; skipping.' % page.title())
def main():
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# This temporary array is used to read the page title if one single
# page to work on is specified by the arguments.
pageTitleParts = []
# Parse command line arguments
for arg in wikipedia.handleArgs():
# check if a standard argument like
# -start:XYZ or -ref:Asdf was given.
generator = genFactory.handleArg(arg)
if generator:
gen = generator
else:
pageTitleParts.append(arg)
if pageTitleParts != []:
# We will only work on a single page.
pageTitle = ' '.join(pageTitleParts)
page = wikipedia.Page(wikipedia.getSite(), pageTitle)
gen = iter([page])
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = UncatBot(gen)
bot.run()
else:
wikipedia.showHelp()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()