Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Delnosource.py

From Botwiki
Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
 
This is a script used in the italian Wikipedia to remove the "no-source" images from the articles
that used them. In this way the category where there are the old no-source can be deleted.
 
This version is commented so i think that you won't have problem to understood (more or less) what the
bot does. However, for asking, bugs or something else, ask me here: http://it.wikipedia.org/wiki/Discussioni_utente:Filnik
 
Parameters:
 
-cat        Defing the right category where are the images (it must end with %s %s %s) (if the default it's corret you don't need
            to use this option.
 
-ric        Use it to enable the recursive.
 
-auto       Use it to let the bot working without ask you every image if the article where
            it is need changes or not.
"""
#
# Delnosource.py Version 1.0
#
# (C) Filnik it.wiki 2007
#
# Distributed under the terms of the MIT license.
#
 
import wikipedia, catlib, re
import codecs, time, pagegenerators
 
# Defing if the script must be recursive (i've choose Rock only because when i've add this variable
# i've just finished the code ^__^ (and also because it doesn't change if the variable's name is rock...)
rock = False
 
# A variable to understood if the 
dont_ask = False
 
# Defing args (parameters) (first part)
for arg in wikipedia.handleArgs():
    if arg == '-ric':
        rock = True
    elif arg == '-auto':
        dont_ask = True
 
# Find out what project and language the bot will use.
site = wikipedia.getSite()
lang = site.lang                   # example: 'en'
project = wikipedia.default_family # example: 'wikipedia'
 
# Two small dictionary to defing what the bot has to use with the relative project.
# If you project isn't there, you have to add it if you want that the script will work.
# ----------------------------------- Change Below! -------------------------------------- #
 
# There is the image namespace, put the translation in your language
immagine =  {
            'en':'Image:',
            'it':'Immagine:',
            }
 
# The comment that the bot will use when it remove an image.
comm =      {
            'en':'Bot: Removing an image tagged with {{no source}}',
            'it':"Bot: Tolgo immagine taggata come unverified.",
            }
 
# The category name with %s %s %s at the end (for example 12 april 2007)
def cat_find(seven_days_ago, name_months, year):
 
    cat_name =  {
                'en' : 'Images with unknown source as of %s %s %s' % (seven_days_ago, name_months, year),
                'it' : 'Immagini senza informazioni dal %s %s %s' % (seven_days_ago, name_months, year),
                }
 
    cat_translation = wikipedia.translate(wikipedia.getSite(), cat_name)
    return cat_translation
# The comment that the bot will use when it add the {{delete}} template
comment_i_dunno_what_number = {
            'en' : "Bot: Adding {{delete}} to the old unverified",
            'it' : "Bot: Aggiungo {{delete}} ai vecchi unverified",
            }
newtext = {
            'en':'{{db|Image no source from 7 days}}',
            'it':'{{cancella subito|Unverified da 7 giorni}}',
            }
 
if lang == 'it':
    name_months = {
                    1  : "gennaio",
                    2  : "febbraio",
                    3  : "marzo",
                    4  : "aprile",
                    5  : "maggio",
                    6  : "giugno",
                    7  : "luglio",
                    8  : "agosto",
                    9  : "settembre",
                    10 : "ottobre",
                    11 : "novembre",
                    12 : "dicembre",
                    }
elif lang == 'en':
    name_months = {
                    1  : "january",
                    2  : "february",
                    3  : "march",
                    4  : "april",
                    5  : "may",
                    6  : "june",
                    7  : "july",
                    8  : "august",
                    9  : "september",
                    10 : "october",
                    11 : "november",
                    12 : "december",
                    }
 
# Add your project (in alphabetical order) if you want that the bot start
project_inserted = ['en', 'it']
 
# ----------------------------------- Change Above! --------------------------------------- #
#*******************************************************************************************#
 
#Translating according to the language.
commento = wikipedia.translate(wikipedia.getSite(), comm)
image_namespace = wikipedia.translate(wikipedia.getSite(), immagine)
comm_idwn = wikipedia.translate(wikipedia.getSite(), comment_i_dunno_what_number)
ntext = wikipedia.translate(wikipedia.getSite(), newtext)
 
# A little block-statement to ensure that the bot won't start with en-parameters
# instead of yours.
if lang not in project_inserted:
    wikipedia.output(u"Your project isn't inserted in the Bot, you have to open the script and add it!")
    wikipedia.stopme()
 
# Defing args (parameters)
for arg in wikipedia.handleArgs():
    if arg.startswith('-cat'):
        if len(arg) == 4:
            CatTitle = wikipedia.input(u'What category of unverified do you want to load?')
        else:
            CatTitle = arg[5:]
 
#Deduct the correct sub page name form the current date.
rightime = time.localtime(time.time())
year = str(rightime[0])
month = str(rightime[1])
day = str(rightime[2])
 
month28 = range(1, 29) # Month of 28 days
month29 = range(1, 30) # Month of 29 days
month30 = range(1, 31) # Month of 30 days
month31 = range(1, 32) # Month of 31 days
 
# I'm going to define how many days has every month
january = month30
 
check_right_days = int(year)/4.0
# A little check to see if febrary has 28 or 29 days ;-)
 
q = str(check_right_days).split('.')
if q[1] == '0':
    february = month29
elif q[1] != '0':
    february = month28
# Explain: if you divide a leap year (anno bisestile in italian) by 4
# you will have an integer (always) like: 1996:4 = 499 ^__^ so, if after
# the division we have an integer we have a leap year, otherwise we have
# a normal year. Easy, no? ^__^
 
march = month30
april = month31
may = month30
june = month31
july = month30
august = month30
september = month31
october = month30
november = month31
december = month30
 
# I will use this dictionary below in the "seven_days_ago" block
 
months = {
        1  : january,
        2  : february,
        3  : march,
        4  : april,
        5  : may,
        6  : june,
        7  : july,
        8  : august,
        9  : september,
        10 : october,
        11 : november,
        12 : december,
        }
 
# Here there is the block to understood what is the famous
# "seven_days_ago" day ^__^
if int(month) > 1:
    if int(day) > 7:
        seven_days_ago = str(int(day) - 7)
        month_to_take = int(month)
    elif int(day) <= 7:
        day_to_delete = 7 - int(day)
        month_to_take = int(month)-1
        number_of_days = months[month_to_take]
        seven_days_ago = number_of_days[-1] - day_to_delete
elif int(month) == 1:
    if int(day) > 7:
        seven_days_ago = str(int(day) - 7)
        month_to_take = int(month)
    elif int(day) <= 7:
        day_to_delete = 7 - int(day)
        month_to_take = 12
        year = str(int(year)-1)
        number_of_days = months[month_to_take]
        seven_days_ago = number_of_days[-1] - day_to_delete
 
# Set some parameters, don't mind.
site = wikipedia.getSite()
cat_translation = cat_find(seven_days_ago, name_months[month_to_take], year)
CatTitle = cat_translation
wikipedia.setAction(comm_idwn)
 
# Loading the category and the articles.
cat = catlib.Category(site, "Categoria:" + CatTitle)
 
# Defing the references function to get the pages that link to a certain image.
def references(p, image):
    gen = pagegenerators.FileLinksGenerator(p)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 200)
    report = list()
    none = True
    for w in preloadingGen:
        none = False
        # The problem is that the page has the [[Page]] format and not "Page"
        link_page = w.urlname()
        # I add the image to the list generated by taking images from the category ;-)
        realpage = w.title()
        wikipedia.output('Loading %s...' % realpage)
        one = image.replace(' ', '_')
        realimage = one.split(':')[1]
        print realimage
        ref = w.removeImage(realimage, True, 'Bot: delinko le immagini con unverified')
        if ref == False:
            l = ['Error: ' + realpage]
            report += l         
        else:
            report.append(realpage)
    if none == True:
        wikipedia.output(u"The image isn't used by any page.")
        return list()
    else:
        wikipedia.output(u'Image used on: ' + str(report))
        return report
 
# Ladies and gentlemen, here there is the main class!
class main:
    def __init__(self, pos2):
        self.pos2 = pos2
    def whattodo(self):
        # A loop to check when there aren't no more pages
        for pag in cat.articles():
            # The same problem as before ([[Image:something.jpg]])
            page = pag.title()
            wikipedia.output('Loading %s...' % page)
            p = wikipedia.Page(wikipedia.getSite('it', 'wikipedia'), page)
            try:
                pos1 = 0
                text = p.get()
                # Regex to search if there is an unverified or a unverdata in the image
                rgx = '(\{\{[Uu]nverified(.*?)\}\}|\{\{unverdata\}\})'
                rx = re.compile(rgx, re.UNICODE)
                mx = rx.search(text, pos1)
                if mx == None:
                    if 'rimossa' in text.lower():
                        wikipedia.output(u'Image, already parsed')
                        continue
                    else:
                        wikipedia.output(u"Unverified not found!")
                        p2 = wikipedia.Page(wikipedia.getSite(), 'User:Filbot/Log')
                        txt = p2.get()
                        p2.put(txt + "\n*[[:%s]] hasn't the unverified's template, please check!" % page, comment = 'Bot: Reporting Error!')
                        continue
                new_text_to_add = re.sub(rgx, ntext, text)#text
                ref = references(p, page)
                print ref
                if ref != list():
                    for i in ref:                        
                        if not 'Error: ' in i:
                            wikipedia.output('Image deleted from: [[%s]]' % i)
                            txt_t = '\n*Rimossa da [[%s]]' % i
                        else:
                            wikipedia.output('Image not deleted from: [[%s]]' % i)
                            splitting = i.split('Error: ')
                            txt_t = "\n*'''Non''' rimossa da: [[%s]]" % splitting[1]
                        new_text_to_add = new_text_to_add + txt_t
                else:
                    txt_t = "\nQuesta immagine non e' stata rimossa da nessuna pagina."
                    new_text_to_add = new_text_to_add + txt_t
                p.put(new_text_to_add)
                wikipedia.output(u"Done!")
            except wikipedia.NoPage:
                wikipedia.output(u"Strange! The image doesn't exist!")
                p2 = wikipedia.Page(wikipedia.getSite(), 'User:Filbot/Log')
                txt = p2.get()
                p.put(txt + "%s doesn't exist, please check!" % page, comment = 'Bot: Reporting Error!')
                continue
 
# Loop to use the ricorsive mode ^__^
try:
    while 1:           
        run = main(0)
        run.whattodo()
        if rock == True:
            wikipedia.output(u"Sleeping for 90000 seconds before rerun ;)")
            time.sleep(90000)
            continue
        else:
            break
    wikipedia.output(u'Stop!')
    wikipedia.stopme()
finally:
    wikipedia.stopme()
Personal tools
Share