Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Create redirect accent.py

From Botwiki
Jump to: navigation, search
# -*- coding: utf-8  -*-
"""
Bot to create redirects. This version create redirect to title containing accented letters and various special chars
 
do NOT uses this version it with -start:, as it would currently change any page name
containing a negative number (which we do not want to effect). Plus it'd be a huge waste of resources.
instead, feed it a file ... basic example of SQL query to create said file : 
echo "SELECT CONCAT('[[', page_title, ']]')  FROM page  WHERE page_namespace=0   AND page_is_redirect=0   AND page_title REGEXP \"[^a-zA-Z_ 0-9,:!%'`-/+=.\[\]\<\>]\";" | sql frwiki_p > e.txt
of course, this query is not perfect. but it's a good start
-- Darkoneko 28/12/2007
 
Command-line arguments:
 
    -file       Work on all pages listed in a text file.
                Argument can also be given as "-file:filename".
 
    -cat        Work on all pages which are in a specific category.
                Argument can also be given as "-cat:categoryname".
 
    -ref        Work on all pages that link to a certain page.
                Argument can also be given as "-ref:referredpagetitle".
 
    -links      Work on all pages that are linked from a certain page.
                Argument can also be given as "-link:linkingpagetitle".
 
    -start      Work on all pages on the home wiki, starting at the named
                page.
 
    -page       Work on a single page.
 
    -namespace  Run over especific namespace.
                Argument can also be given as "-namespace:100" or
                "-namespace:Image".
 
    -always     Don't prompt to make changes, just do them.
"""
#
# (C) Yrithinnd, 2007
# (C) Filnik, 2007
# (C) Darkoneko, 2007
# (C) Phe, 2007
# Class licensed under terms of the MIT license
#
__version__ = '$Id: capitalize_redirects.py 4251 2007-09-12 10:36:03Z wikipedian $'
 
import time, sys, string, re
import wikipedia, pagegenerators
 
msg = {
    'en': u'Robot: Create redirect to [[%s]]',
    'es': u'Bot: Creando redirección a [[%s]]',
    'it': u'Bot: Creo redirect a [[%s]]',
    'fr': u'Robot : Creation redirect vers [[%s]]',
    'pt': u'Bot: Criando redirect para [[%s]]',
    }
 
 
def getTranscriptionTable():
    #pre-creation of char change transcription table, for use with string.maketrans()
    origin=u'×.,;:-ẢĄÀÁÂÃÄÅǍȦǠẠẬẶāąảàáâãäåąǎȧǡạậặăĒĘĖẸỆÈÉÊËĚēęėẹệèéêëěĪỊİÌÍÎÏǏĮīɨịìíîïǐıŌỔȮȰȱƠÒÓÔÕÖØǑƏōổȯỌọỘộơòóôõöøǒőŪŲǙÙÚÛÜǓỤūųǚùúûüǔụŭḂḄßḃḅĊÇČ¢ćċçčḌḊĎÐĐḍḋđďðĠǤǦġǥǧğĹḶŁĽḸḸĺḷłľḹḹṄṆŇṅṇńňṘŘṚṜřṛṝṙŞṠṢṦṤṨṦŚşṡṣṧṥṩŠšṧşŦŤṪṬŧťṫṭţẎỴẏỵÿýŻẒŽżẓžżźḞḟĦħȞȟḢḣḤḥƗįǰḲḳǨǩṀṁṂṃṖṗƯưṾṿẆẇẈẉҲҳẊẋñÑ'
    dest=u'x     AAAAAAAAAAAAAAaaaaaaaaaaaaaaaaaEEEEEEEEEEeeeeeeeeeeIIIIIIIIIiiiiiiiiiOOOOOOOOOOOOOoooooooooooooooooUUUUUUUUUuuuuuuuuuuBBBbbCCCcccccDDDDDdddddGGGggggLLLLLLllllllNNNnnnnRRRRrrrrSSSSSSSSssssssssssTTTTtttttYYyyyyZZZzzzzzFfHhHhHhHhIijKkKkMmMmPpUuVvWWWwXxXxnN'
    table = {}
    for pos, ch in enumerate(origin):
        table[ord(ch)] = ord(dest[pos])
    return table
 
 
#create the redirect title from the transcription table (every 1 -> 1 char change) + some others (1 -> 2+ char change)
def getRedirectTitle(table, page):
   redirect =  page.translate(table)
   redirect = redirect.replace(u'æ', u'ae').replace(u'Æ', u'Ae')
   redirect = redirect.replace('\u0153', u'oe').replace('\u0152', 'Oe')
   redirect = redirect.replace('\u2026', '...')
   redirect = redirect.replace(u'þ', 'Th').replace(u'Þ', 'th')
   return redirect
 
 
 
def checkRedirect(page):
    try:
        text = page.get()
    # Double redirect!
    except wikipedia.IsRedirectPage:
        return True
    # Break Redirect!
    except wikipedia.NoPage:
        return True
    # Something-else! :-)
    except wikipedia.Error:
        return True
    text = text.lower()
    # Checking the text in the page.
    res = re.findall(r'([^>]#redirect\s*?\[\[)', text)
    if res == []:
        return False
    else:
        return True
 
class CapitalizeBot:
    def __init__(self, generator, acceptall, msg):
        self.generator = generator
        self.acceptall = acceptall
        self.msg = msg
 
    def run(self):
        #-- put before the for..in so its not generated at each loop
        table = getTranscriptionTable()
        stat_created = 0       #number of created redirects
        stat_existed = 0       #redirect already existed
        stat_total = 0
 
        for page in self.generator:
            page_t = page.title()
 
            #stat show one time out of 50 pages : at the 10th, 60th, etc.
            if stat_total % 50 == 10:
                wikipedia.output(u'  Status : working on \03{lightblue}%s\03{default}. \n  \03{lightyellow}%s\03{default} pages created, \03{lightyellow}%s\03{default} already existed, \03{lightyellow}%s\03{default} pages parsed' % (page_t, stat_created, stat_existed, stat_total) )
 
            stat_total = stat_total + 1
            #is the page we're working on a redirect ?
            if checkRedirect(page):
                wikipedia.output(u'\03{lightblue}%s\03{default} is a redirect! Skip...' % page_t)
                continue
            site = wikipedia.getSite()
            redirectpage = wikipedia.Page(site, getRedirectTitle(table, page_t) )
 
            #is the redirect title the same as the original title ?
            if page.title() == redirectpage.title():
                continue #nothing to do, no output
 
            #does the redirect we want to create already exist ?            
            if redirectpage.exists():
                stat_existed = stat_existed + 1                
               # wikipedia.output(u'\03{lightyellow}%s\03{default} already exists, skip!' % redirectpage.title())
                continue
 
            # Show the title of the page we're working on.
            wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<" % page_t)
            wikipedia.output(u'Creating \03{lightyellow}%s\03{default}...' % redirectpage.title())
            created = True
            if not self.acceptall:
                choice = wikipedia.inputChoice(
                        u'Do you want to create the redirect?',
                        ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
                if choice in ['a', 'A']:
                    self.acceptall = True
            if self.acceptall or choice in ['y', 'Y']:
                try:
                    comment = wikipedia.translate(wikipedia.getSite(), self.msg) % page_t
                    #wikipedia.setAction(comment)
                    redirectpage._putPage(u"#REDIRECT [[%s]]" % page_t, comment, newPage = True)
                except wikipedia.EditConflict:
                    wikipedia.output(u"An edit conflict has occurred. Skip!...")
                    continue
                except wikipedia.LockedPage:
                    wikipedia.output(u"page was protected against creation. Skip!...")
                    continue
            stat_created = stat_created + 1
 
def main():
    gen = None
    source = None
    textfilename = None
    categoryname = None
    pageNames = []
    referredPageName = None
    acceptall = False
    namespaces = []
    startpage = None
 
    for arg in wikipedia.handleArgs():
        if arg.startswith('-file'):
            if len(arg) == 5:
                textfilename = wikipedia.input(u'Please enter the filename:')
            else:
                textfilename = arg[6:]
            source = 'textfile'
        elif arg.startswith('-cat'):
            if len(arg) == 4:
                categoryname = wikipedia.input(
                               u'Please enter the category name:')
            else:
                categoryname = arg[5:]
            source = 'category'
        elif arg.startswith('-page'):
            if len(arg) == 5:
                pageNames.append(wikipedia.input(
                                 u'Which page do you want to change?'))
            else:
                pageNames.append(arg[6:])
            source = 'singlepage'
        elif arg.startswith('-ref'):
            if len(arg) == 4:
                referredPageName = wikipedia.input(
                                   u'Links to which page should be processed?')
            else:
                referredPageName = arg[5:]
            source = 'ref'
        elif arg.startswith('-start'):
            if len(arg) == 6:
                firstPageTitle = wikipedia.input(
                                 u'Which page do you want to change?')
            else:
                firstPageTitle = arg[7:]
            source = 'allpages'
        elif arg == '-always':
            acceptall = True
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        else:
            commandline_replacements.append(arg)
 
    if source == 'textfile':
        gen = pagegenerators.TextfilePageGenerator(textfilename)
    elif source == 'category':
        cat = catlib.Category(wikipedia.getSite(), categoryname)
        gen = pagegenerators.CategorizedPageGenerator(cat)
    elif source == 'singlepage':
        pages = [wikipedia.Page(wikipedia.getSite(), pageName)
                 for pageName in pageNames]
        gen = iter(pages)
    elif source == 'allpages':
        namespace = wikipedia.Page(wikipedia.getSite(),
                                   firstPageTitle).namespace()
        gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)
    elif source == 'ref':
        referredPage = wikipedia.Page(wikipedia.getSite(), referredPageName)
        gen = pagegenerators.ReferringPageGenerator(referredPage)
    elif source == None or len(commandline_replacements) not in [0, 2]:
        wikipedia.stopme()
        wikipedia.showHelp(u'capitalize_redirects')
        sys.exit()
    if namespaces != []:
        gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 25)
    bot = CapitalizeBot(preloadingGen, acceptall, msg)
    bot.run()
 
if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()
Personal tools
Share