#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This is a script used in the italian Wikipedia to remove the "no-source" images from the articles
that used them. In this way the category where there are the old no-source can be deleted.
This version is commented so i think that you won't have problem to understood (more or less) what the
bot does. However, for asking, bugs or something else, ask me here: http://it.wikipedia.org/wiki/Discussioni_utente:Filnik
Parameters:
-cat Defing the right category where are the images (it must end with %s %s %s) (if the default it's corret you don't need
to use this option.
-ric Use it to enable the recursive.
-auto Use it to let the bot working without ask you every image if the article where
it is need changes or not.
"""
#
# Delnosource.py Version 1.0
#
# (C) Filnik it.wiki 2007
#
# Distributed under the terms of the MIT license.
#
import wikipedia, catlib, re
import codecs, time, pagegenerators
# Defing if the script must be recursive (i've choose Rock only because when i've add this variable
# i've just finished the code ^__^ (and also because it doesn't change if the variable's name is rock...)
rock = False
# A variable to understood if the
dont_ask = False
# Defing args (parameters) (first part)
for arg in wikipedia.handleArgs():
if arg == '-ric':
rock = True
elif arg == '-auto':
dont_ask = True
# Find out what project and language the bot will use.
site = wikipedia.getSite()
lang = site.lang # example: 'en'
project = wikipedia.default_family # example: 'wikipedia'
# Two small dictionary to defing what the bot has to use with the relative project.
# If you project isn't there, you have to add it if you want that the script will work.
# ----------------------------------- Change Below! -------------------------------------- #
# There is the image namespace, put the translation in your language
immagine = {
'en':'Image:',
'it':'Immagine:',
}
# The comment that the bot will use when it remove an image.
comm = {
'en':'Bot: Removing an image tagged with {{no source}}',
'it':"Bot: Tolgo immagine taggata come unverified.",
}
# The category name with %s %s %s at the end (for example 12 april 2007)
def cat_find(seven_days_ago, name_months, year):
cat_name = {
'en' : 'Images with unknown source as of %s %s %s' % (seven_days_ago, name_months, year),
'it' : 'Immagini senza informazioni dal %s %s %s' % (seven_days_ago, name_months, year),
}
cat_translation = wikipedia.translate(wikipedia.getSite(), cat_name)
return cat_translation
# The comment that the bot will use when it add the {{delete}} template
comment_i_dunno_what_number = {
'en' : "Bot: Adding {{delete}} to the old unverified",
'it' : "Bot: Aggiungo {{delete}} ai vecchi unverified",
}
newtext = {
'en':'{{db|Image no source from 7 days}}',
'it':'{{cancella subito|Unverified da 7 giorni}}',
}
if lang == 'it':
name_months = {
1 : "gennaio",
2 : "febbraio",
3 : "marzo",
4 : "aprile",
5 : "maggio",
6 : "giugno",
7 : "luglio",
8 : "agosto",
9 : "settembre",
10 : "ottobre",
11 : "novembre",
12 : "dicembre",
}
elif lang == 'en':
name_months = {
1 : "january",
2 : "february",
3 : "march",
4 : "april",
5 : "may",
6 : "june",
7 : "july",
8 : "august",
9 : "september",
10 : "october",
11 : "november",
12 : "december",
}
# Add your project (in alphabetical order) if you want that the bot start
project_inserted = ['en', 'it']
# ----------------------------------- Change Above! --------------------------------------- #
#*******************************************************************************************#
#Translating according to the language.
commento = wikipedia.translate(wikipedia.getSite(), comm)
image_namespace = wikipedia.translate(wikipedia.getSite(), immagine)
comm_idwn = wikipedia.translate(wikipedia.getSite(), comment_i_dunno_what_number)
ntext = wikipedia.translate(wikipedia.getSite(), newtext)
# A little block-statement to ensure that the bot won't start with en-parameters
# instead of yours.
if lang not in project_inserted:
wikipedia.output(u"Your project isn't inserted in the Bot, you have to open the script and add it!")
wikipedia.stopme()
# Defing args (parameters)
for arg in wikipedia.handleArgs():
if arg.startswith('-cat'):
if len(arg) == 4:
CatTitle = wikipedia.input(u'What category of unverified do you want to load?')
else:
CatTitle = arg[5:]
#Deduct the correct sub page name form the current date.
rightime = time.localtime(time.time())
year = str(rightime[0])
month = str(rightime[1])
day = str(rightime[2])
month28 = range(1, 29) # Month of 28 days
month29 = range(1, 30) # Month of 29 days
month30 = range(1, 31) # Month of 30 days
month31 = range(1, 32) # Month of 31 days
# I'm going to define how many days has every month
january = month30
check_right_days = int(year)/4.0
# A little check to see if febrary has 28 or 29 days ;-)
q = str(check_right_days).split('.')
if q[1] == '0':
february = month29
elif q[1] != '0':
february = month28
# Explain: if you divide a leap year (anno bisestile in italian) by 4
# you will have an integer (always) like: 1996:4 = 499 ^__^ so, if after
# the division we have an integer we have a leap year, otherwise we have
# a normal year. Easy, no? ^__^
march = month30
april = month31
may = month30
june = month31
july = month30
august = month30
september = month31
october = month30
november = month31
december = month30
# I will use this dictionary below in the "seven_days_ago" block
months = {
1 : january,
2 : february,
3 : march,
4 : april,
5 : may,
6 : june,
7 : july,
8 : august,
9 : september,
10 : october,
11 : november,
12 : december,
}
# Here there is the block to understood what is the famous
# "seven_days_ago" day ^__^
if int(month) > 1:
if int(day) > 7:
seven_days_ago = str(int(day) - 7)
month_to_take = int(month)
elif int(day) <= 7:
day_to_delete = 7 - int(day)
month_to_take = int(month)-1
number_of_days = months[month_to_take]
seven_days_ago = number_of_days[-1] - day_to_delete
elif int(month) == 1:
if int(day) > 7:
seven_days_ago = str(int(day) - 7)
month_to_take = int(month)
elif int(day) <= 7:
day_to_delete = 7 - int(day)
month_to_take = 12
year = str(int(year)-1)
number_of_days = months[month_to_take]
seven_days_ago = number_of_days[-1] - day_to_delete
# Set some parameters, don't mind.
site = wikipedia.getSite()
cat_translation = cat_find(seven_days_ago, name_months[month_to_take], year)
CatTitle = cat_translation
wikipedia.setAction(comm_idwn)
# Loading the category and the articles.
cat = catlib.Category(site, "Categoria:" + CatTitle)
# Defing the references function to get the pages that link to a certain image.
def references(p, image):
gen = pagegenerators.FileLinksGenerator(p)
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 200)
report = list()
none = True
for w in preloadingGen:
none = False
# The problem is that the page has the [[Page]] format and not "Page"
link_page = w.urlname()
# I add the image to the list generated by taking images from the category ;-)
realpage = w.title()
wikipedia.output('Loading %s...' % realpage)
one = image.replace(' ', '_')
realimage = one.split(':')[1]
print realimage
ref = w.removeImage(realimage, True, 'Bot: delinko le immagini con unverified')
if ref == False:
l = ['Error: ' + realpage]
report += l
else:
report.append(realpage)
if none == True:
wikipedia.output(u"The image isn't used by any page.")
return list()
else:
wikipedia.output(u'Image used on: ' + str(report))
return report
# Ladies and gentlemen, here there is the main class!
class main:
def __init__(self, pos2):
self.pos2 = pos2
def whattodo(self):
# A loop to check when there aren't no more pages
for pag in cat.articles():
# The same problem as before ([[Image:something.jpg]])
page = pag.title()
wikipedia.output('Loading %s...' % page)
p = wikipedia.Page(wikipedia.getSite('it', 'wikipedia'), page)
try:
pos1 = 0
text = p.get()
# Regex to search if there is an unverified or a unverdata in the image
rgx = '(\{\{[Uu]nverified(.*?)\}\}|\{\{unverdata\}\})'
rx = re.compile(rgx, re.UNICODE)
mx = rx.search(text, pos1)
if mx == None:
if 'rimossa' in text.lower():
wikipedia.output(u'Image, already parsed')
continue
else:
wikipedia.output(u"Unverified not found!")
p2 = wikipedia.Page(wikipedia.getSite(), 'User:Filbot/Log')
txt = p2.get()
p2.put(txt + "\n*[[:%s]] hasn't the unverified's template, please check!" % page, comment = 'Bot: Reporting Error!')
continue
new_text_to_add = re.sub(rgx, ntext, text)#text
ref = references(p, page)
print ref
if ref != list():
for i in ref:
if not 'Error: ' in i:
wikipedia.output('Image deleted from: [[%s]]' % i)
txt_t = '\n*Rimossa da [[%s]]' % i
else:
wikipedia.output('Image not deleted from: [[%s]]' % i)
splitting = i.split('Error: ')
txt_t = "\n*'''Non''' rimossa da: [[%s]]" % splitting[1]
new_text_to_add = new_text_to_add + txt_t
else:
txt_t = "\nQuesta immagine non e' stata rimossa da nessuna pagina."
new_text_to_add = new_text_to_add + txt_t
p.put(new_text_to_add)
wikipedia.output(u"Done!")
except wikipedia.NoPage:
wikipedia.output(u"Strange! The image doesn't exist!")
p2 = wikipedia.Page(wikipedia.getSite(), 'User:Filbot/Log')
txt = p2.get()
p.put(txt + "%s doesn't exist, please check!" % page, comment = 'Bot: Reporting Error!')
continue
# Loop to use the ricorsive mode ^__^
try:
while 1:
run = main(0)
run.whattodo()
if rock == True:
wikipedia.output(u"Sleeping for 90000 seconds before rerun ;)")
time.sleep(90000)
continue
else:
break
wikipedia.output(u'Stop!')
wikipedia.stopme()
finally:
wikipedia.stopme()