Python:Delnosource.py

From Botwiki

Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
 
This is a script used in the italian Wikipedia to remove the "no-source" images from the articles
that used them. In this way the category where there are the old no-source can be deleted.
 
This version is commented so i think that you won't have problem to understood (more or less) what the
bot does. However, for asking, bugs or something else, ask me here: http://it.wikipedia.org/wiki/Discussioni_utente:Filnik
 
Parameters:
 
-cat        Defing the right category where are the images (it must end with %s %s %s) (if the default it's corret you don't need
            to use this option.
 
-ric        Use it to enable the recursive.
 
-auto       Use it to let the bot working without ask you every image if the article where
            it is need changes or not.
"""
#
# Delnosource.py Version 1.0
#
# (C) Filnik it.wiki 2007
#
# Distributed under the terms of the MIT license.
#
 
import wikipedia, catlib, re
import codecs, time, pagegenerators
 
# Defing if the script must be recursive (i've choose Rock only because when i've add this variable
# i've just finished the code ^__^ (and also because it doesn't change if the variable's name is rock...)
rock = False
 
# A variable to understood if the 
dont_ask = False
 
# Defing args (parameters) (first part)
for arg in wikipedia.handleArgs():
    if arg == '-ric':
        rock = True
    elif arg == '-auto':
        dont_ask = True
 
# Find out what project and language the bot will use.
site = wikipedia.getSite()
lang = site.lang                   # example: 'en'
project = wikipedia.default_family # example: 'wikipedia'
 
# Two small dictionary to defing what the bot has to use with the relative project.
# If you project isn't there, you have to add it if you want that the script will work.
# ----------------------------------- Change Below! -------------------------------------- #
 
# There is the image namespace, put the translation in your language
immagine =  {
            'en':'Image:',
            'it':'Immagine:',
            }
 
# The comment that the bot will use when it remove an image.
comm =      {
            'en':'Bot: Removing an image tagged with {{no source}}',
            'it':"Bot: Tolgo immagine taggata come unverified.",
            }
 
# The category name with %s %s %s at the end (for example 12 april 2007)
def cat_find(seven_days_ago, name_months, year):
 
    cat_name =  {
                'en' : 'Images with unknown source as of %s %s %s' % (seven_days_ago, name_months, year),
                'it' : 'Immagini senza informazioni dal %s %s %s' % (seven_days_ago, name_months, year),
                }
 
    cat_translation = wikipedia.translate(wikipedia.getSite(), cat_name)
    return cat_translation
# The comment that the bot will use when it add the {{delete}} template
comment_i_dunno_what_number = {
            'en' : "Bot: Adding {{delete}} to the old unverified",
            'it' : "Bot: Aggiungo {{delete}} ai vecchi unverified",
            }
newtext = {
            'en':'{{db|Image no source from 7 days}}',
            'it':'{{cancella subito|Unverified da 7 giorni}}',
            }
 
if lang == 'it':
    name_months = {
                    1  : "gennaio",
                    2  : "febbraio",
                    3  : "marzo",
                    4  : "aprile",
                    5  : "maggio",
                    6  : "giugno",
                    7  : "luglio",
                    8  : "agosto",
                    9  : "settembre",
                    10 : "ottobre",
                    11 : "novembre",
                    12 : "dicembre",
                    }
elif lang == 'en':
    name_months = {
                    1  : "january",
                    2  : "february",
                    3  : "march",
                    4  : "april",
                    5  : "may",
                    6  : "june",
                    7  : "july",
                    8  : "august",
                    9  : "september",
                    10 : "october",
                    11 : "november",
                    12 : "december",
                    }
 
# Add your project (in alphabetical order) if you want that the bot start
project_inserted = ['en', 'it']
 
# ----------------------------------- Change Above! --------------------------------------- #
#*******************************************************************************************#
 
#Translating according to the language.
commento = wikipedia.translate(wikipedia.getSite(), comm)
image_namespace = wikipedia.translate(wikipedia.getSite(), immagine)
comm_idwn = wikipedia.translate(wikipedia.getSite(), comment_i_dunno_what_number)
ntext = wikipedia.translate(wikipedia.getSite(), newtext)
 
# A little block-statement to ensure that the bot won't start with en-parameters
# instead of yours.
if lang not in project_inserted:
    wikipedia.output(u"Your project isn't inserted in the Bot, you have to open the script and add it!")
    wikipedia.stopme()
 
# Defing args (parameters)
for arg in wikipedia.handleArgs():
    if arg.startswith('-cat'):
        if len(arg) == 4:
            CatTitle = wikipedia.input(u'What category of unverified do you want to load?')
        else:
            CatTitle = arg[5:]
 
#Deduct the correct sub page name form the current date.
rightime = time.localtime(time.time())
year = str(rightime[0])
month = str(rightime[1])
day = str(rightime[2])
 
month28 = range(1, 29) # Month of 28 days
month29 = range(1, 30) # Month of 29 days
month30 = range(1, 31) # Month of 30 days
month31 = range(1, 32) # Month of 31 days
 
# I'm going to define how many days has every month
january = month30
 
check_right_days = int(year)/4.0
# A little check to see if febrary has 28 or 29 days ;-)
 
q = str(check_right_days).split('.')
if q[1] == '0':
    february = month29
elif q[1] != '0':
    february = month28
# Explain: if you divide a leap year (anno bisestile in italian) by 4
# you will have an integer (always) like: 1996:4 = 499 ^__^ so, if after
# the division we have an integer we have a leap year, otherwise we have
# a normal year. Easy, no? ^__^
 
march = month30
april = month31
may = month30
june = month31
july = month30
august = month30
september = month31
october = month30
november = month31
december = month30
 
# I will use this dictionary below in the "seven_days_ago" block
 
months = {
        1  : january,
        2  : february,
        3  : march,
        4  : april,
        5  : may,
        6  : june,
        7  : july,
        8  : august,
        9  : september,
        10 : october,
        11 : november,
        12 : december,
        }
 
# Here there is the block to understood what is the famous
# "seven_days_ago" day ^__^
if int(month) > 1:
    if int(day) > 7:
        seven_days_ago = str(int(day) - 7)
        month_to_take = int(month)
    elif int(day) <= 7:
        day_to_delete = 7 - int(day)
        month_to_take = int(month)-1
        number_of_days = months[month_to_take]
        seven_days_ago = number_of_days[-1] - day_to_delete
elif int(month) == 1:
    if int(day) > 7:
        seven_days_ago = str(int(day) - 7)
        month_to_take = int(month)
    elif int(day) <= 7:
        day_to_delete = 7 - int(day)
        month_to_take = 12
        year = str(int(year)-1)
        number_of_days = months[month_to_take]
        seven_days_ago = number_of_days[-1] - day_to_delete
 
# Set some parameters, don't mind.
site = wikipedia.getSite()
cat_translation = cat_find(seven_days_ago, name_months[month_to_take], year)
CatTitle = cat_translation
wikipedia.setAction(comm_idwn)
 
# Loading the category and the articles.
cat = catlib.Category(site, "Categoria:" + CatTitle)
 
# Defing the references function to get the pages that link to a certain image.
def references(p, image):
    gen = pagegenerators.FileLinksGenerator(p)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 200)
    report = list()
    none = True
    for w in preloadingGen:
        none = False
        # The problem is that the page has the [[Page]] format and not "Page"
        link_page = w.urlname()
        # I add the image to the list generated by taking images from the category ;-)
        realpage = w.title()
        wikipedia.output('Loading %s...' % realpage)
        one = image.replace(' ', '_')
        realimage = one.split(':')[1]
        print realimage
        ref = w.removeImage(realimage, True, 'Bot: delinko le immagini con unverified')
        if ref == False:
            l = ['Error: ' + realpage]
            report += l         
        else:
            report.append(realpage)
    if none == True:
        wikipedia.output(u"The image isn't used by any page.")
        return list()
    else:
        wikipedia.output(u'Image used on: ' + str(report))
        return report
 
# Ladies and gentlemen, here there is the main class!
class main:
    def __init__(self, pos2):
        self.pos2 = pos2
    def whattodo(self):
        # A loop to check when there aren't no more pages
        for pag in cat.articles():
            # The same problem as before ([[Image:something.jpg]])
            page = pag.title()
            wikipedia.output('Loading %s...' % page)
            p = wikipedia.Page(wikipedia.getSite('it', 'wikipedia'), page)
            try:
                pos1 = 0
                text = p.get()
                # Regex to search if there is an unverified or a unverdata in the image
                rgx = '(\{\{[Uu]nverified(.*?)\}\}|\{\{unverdata\}\})'
                rx = re.compile(rgx, re.UNICODE)
                mx = rx.search(text, pos1)
                if mx == None:
                    if 'rimossa' in text.lower():
                        wikipedia.output(u'Image, already parsed')
                        continue
                    else:
                        wikipedia.output(u"Unverified not found!")
                        p2 = wikipedia.Page(wikipedia.getSite(), 'User:Filbot/Log')
                        txt = p2.get()
                        p2.put(txt + "\n*[[:%s]] hasn't the unverified's template, please check!" % page, comment = 'Bot: Reporting Error!')
                        continue
                new_text_to_add = re.sub(rgx, ntext, text)#text
                ref = references(p, page)
                print ref
                if ref != list():
                    for i in ref:                        
                        if not 'Error: ' in i:
                            wikipedia.output('Image deleted from: [[%s]]' % i)
                            txt_t = '\n*Rimossa da [[%s]]' % i
                        else:
                            wikipedia.output('Image not deleted from: [[%s]]' % i)
                            splitting = i.split('Error: ')
                            txt_t = "\n*'''Non''' rimossa da: [[%s]]" % splitting[1]
                        new_text_to_add = new_text_to_add + txt_t
                else:
                    txt_t = "\nQuesta immagine non e' stata rimossa da nessuna pagina."
                    new_text_to_add = new_text_to_add + txt_t
                p.put(new_text_to_add)
                wikipedia.output(u"Done!")
            except wikipedia.NoPage:
                wikipedia.output(u"Strange! The image doesn't exist!")
                p2 = wikipedia.Page(wikipedia.getSite(), 'User:Filbot/Log')
                txt = p2.get()
                p.put(txt + "%s doesn't exist, please check!" % page, comment = 'Bot: Reporting Error!')
                continue
 
# Loop to use the ricorsive mode ^__^
try:
    while 1:           
        run = main(0)
        run.whattodo()
        if rock == True:
            wikipedia.output(u"Sleeping for 90000 seconds before rerun ;)")
            time.sleep(90000)
            continue
        else:
            break
    wikipedia.output(u'Stop!')
    wikipedia.stopme()
finally:
    wikipedia.stopme()
Personal tools