Python:Uncat.py

From Botwiki

Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
This bot gets a page or list of pages, and checks if it is categorized.
If the page is not a member of any category and has no templates on it, it will
add the {{uncat}} template. Else, if the page is not a member of any category,
but has a template on it, it will check if that template adds the page to a category.
If the template doesn't add the page to a category, again, it will add the {{uncat}}
template to the page.
"""
#__version__ = '$Id: uncat.py 3998 2007-08-07 20:28:27Z huji $'
import wikipedia
import pagegenerators
import sys, re
 
##################
#    Settings    #
##################
 
uncatTemplate = "{{uncat}}"
categorizingTemplates = "{{bd}} {{dicdef}}"
 
##################
#  Settings end  #
##################
 
 
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}
 
class UncatBot:
    # Edit summary message that should be used.
    # NOTE: Put a good description here, and add translations, if possible!
    msg = {
        'en': u'Robot: Marking as uncategorized',
    }
 
    def __init__(self, generator):
        """
        Constructor. Parameters:
            * generator - The page generator that determines on which pages
                          to work on.
        """
        self.generator = generator
 
    def run(self):
        # Set the edit summary message
        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
        for page in self.generator:
            self.treat(page)
 
    def treat(self, page):
        """
        Loads the given page, checks if the {{uncat}} template is needed, and applies the template.
        """
	flag = False
 
        try:
            # Load the page
            text = page.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
            return
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
            return
        except wikipedia.LockedPage:
            wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
            return
 
	if not page.canBeEdited():
	     wikipedia.output(u"Page %s cannot be edited; skipping." % page.aslink())
	     return
 
	wikipedia.output(u'Start to proccess %s' % page.title())
	if  page.categories() != []:
	    wikipedia.output(u'>>Page \03{lightyellow}%s\03{default} is already categorized.' % page.title())
	    return
 
	if page.templates() == []:
	    flag = True
	else:
	    flag = True
	    """
	    check for stopwords
	    """
	    for t in page.templates():
		if re.compile("{{%s" % t.lower()).search(categorizingTemplates) != None:
		    flag = False
		    #wikipedia.output(u'>>>>Stopword: %s' % t)
 
	if flag == True:
	    text = text + "\n%s" % uncatTemplate
	    page.put(text)
	    wikipedia.output(u'>>Marked \03{lightpurple}%s\03{default} as uncategorized.' % page.title())
	else:
	    wikipedia.output(u'>>Stopword found in \03{lightyellow}%s\03{default}; skipping.' % page.title())
 
def main():
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []
 
    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        # check if a standard argument like
        # -start:XYZ or -ref:Asdf was given.
        generator = genFactory.handleArg(arg)
        if generator:
            gen = generator
        else:
            pageTitleParts.append(arg)
 
    if pageTitleParts != []:
        # We will only work on a single page.
        pageTitle = ' '.join(pageTitleParts)
        page = wikipedia.Page(wikipedia.getSite(), pageTitle)
        gen = iter([page])
 
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = UncatBot(gen)
        bot.run()
    else:
        wikipedia.showHelp()
 
if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()
Personal tools