Python:Vandal Bazooka/VandalParser.py

From Botwiki

Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
parser.py - Main file.
Author: Filnik, 2007
About: http://botwiki.sno.cc/wiki/Botwiki:Vandal_Bazooka
License: Distributed under the terms of the MIT license.
Version: 1.2
"""
 
import re, webbrowser
import os, time
import config
 
def parser(raw):
    list_loaded = list()
    list_info = list()
    pos = 0
    splitting = config.channel.split('.')#'#it.wikipedia'
    lang = splitting[0].split('#')[1]
    project = splitting[1]
    while 1:
        #:rc!~rc@localhost PRIVMSG #it.wikipedia :�14[[�07Il volo della fenice (film 1965)�14]]�4 N�10 �02http://it.wikipedia.org/w/index.php?title=Il_volo_della_fenice_%28film_1965%29&rcid=9767308� �5*� �03Bart ryker� �5*� (+1455) �10Nuova pagina: {{S|film}} {{Film| |titoloitaliano = Il volo della fenice |titolooriginale = The flight of the Phoenix |nomepaese = [[Stati Uniti d'America|Stati Uniti]] |annoproduzione = [[1965]] |du...�<br>
        regl = "\:rc\!~rc@localhost PRIVMSG \#" + lang + "\." + project + " \:�14\[\[�07(.*?)�14\]\]�4 (.*?)�10 �02http\://" + lang + \
        "\."+project+"\.org/w/index\.php\?title=(.*?)(&diff=(.*?)&oldid=(.*?)|)&rcid=(.*?)� �5\*� �03(.*?)� �5\*\(([-+])(.*?)\) �10(.*?)�"
        page = re.compile(regl, re.UNICODE)
        xl = page.search(raw, pos)
        if xl == None:
            if len(list_loaded) >= 1:
                print "Starting...\n"
                return (list_loaded, list_info)
                break
            elif len(list_loaded) == 0:
                print "Our Regex cannot load this edit, skipping..."
                return (list(), list())
                break
        pos = xl.end()
        pagename = xl.group(1)
        status = xl.group(2) #N, M
        urlname = xl.group(3)
        diffgroup = xl.group(4)
        if '&diff=' in diffgroup:
            diff = xl.group(5)
            oldid = xl.group(6)
        else:
            diff = ''
            oldid = ''
        rcid = xl.group(7)
        url = 'http://it.wikipedia.org/w/index.php?title=' + urlname + '&diff=' + diff + '&oldid=' + oldid + '&rcid=' + rcid
        user = xl.group(8)
        plusminus = xl.group(9)
        plusminusnumber = xl.group(10)
        summary = xl.group(11)
        if '/' in summary:
            summary = re.sub(r'(.*?)/\*(.*?)\*/(.*?)', r'\1\2\3', summary)
        if status == '':
            string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \
                     + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + 'None'
            status = None
        elif status == 'N':
            url = 'http://it.wikipedia.org/wiki/' + urlname
            string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \
                     + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + 'NUOVA PAGINA'            
        else:
            string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \
                     + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + status + '\n'
        datapars = [pagename, url, user, plusminus + plusminusnumber, summary, status]
        if string not in list_loaded:
             list_loaded.append(datapars)
             list_info.append(string)
 
loadingnummer = 0
loadlot = False#True
while 1:
    if loadingnummer == 10 and loadlot == True:
        while 1:
            risp = raw_input("Shall i go? > ")
            if risp == 'y':
                loadingnummer = 0
                break
            elif risp == 'n':
                exit()
                break
            else:
                continue
    listadatogliere = ['alnoktabot', 'brownout', 'filnik', 'valepert', 'filbot', 'Nikbot','Kiado',
                       'Jalo', 'Sailko', 'snowdog', 'Claudio Sanna', 'giovannigobbin', 'Paulatz bot',
                       'MM', 'senpai', 'Bella situazione', 'Lord Randal', 'Alexander VIII', 'ChemicalBit',
                       'Squattari', '%Pier%', 'D.N.R.', 'Kal-El']
    filename = 'VandalData.txt'
    try:    
        rock = file(filename, 'r')
        raw = rock.read()
        rock.close()
        diR = str(os.getcwd())
    except:
        print "Waiting..."
        time.sleep(1.5)
        continue
    try:
        os.remove(diR + '/' + filename)
    except Exception, e:
        print e
    (lista, listainfo) = parser(raw)
    """
    for k in listainfo:
 
        rock2 = file('report.txt', 'a')
        rock2.write(k)
        rock2.close()
        """
    if lista == []:
        time.sleep(1)
        continue
    numm = -1
    print 'Caricate %s pagine...' % len(lista)
    numdefined = 200
    for i in lista:
        #datapars = [pagename, url, user, plusminus + plusminusnumber, summary, status]
        pagename = i[0]; url = i[1]; user = i[2]; plminumber = i[3]; summary = i[4]; status = i[5] #<-- Kids, don't try this at home!
        numm += 1
        stop = False
        if '+' in plminumber:
            res = plminumber.split('+')
            if int(res[-1]) >= numdefined:
                stop = False
        elif '-' in plminumber:
            res = plminumber.split('-')
            if int(res[-1]) >= numdefined:
                stop = False
        if user in pagename:
            print "An user has edited his pages, skipping..."
            continue
        if not user.count('.') >= 2:
            stop = True
        if 'collegamenti esterni' in summary.lower():
            print "The summary has 'collegamenti esterni' inside!"
            stop = False
        if status == None:
            pass
        elif status.lower() == 'n':
            if config.cat.lower() in pagename.lower():
                pass
            elif config.talk.lower() in pagename.lower():
                stop = False
            else:
                stop = False
        for x in listadatogliere:
            if x.lower() == user.lower():
                stop = True
###########################################################
        if stop == True:
            print "%s edited by %s is ok, skipping..." % (pagename, user)
            continue
        if pagename.lower() == 'wikipedia:pagina delle prove':
            print "Pagina delle prove! Skipping... "
            continue
        loadingnummer += 1
        print listainfo[numm]
 
        cont = True
        risp = 'y'
        while 1:
            #risp = raw_input("Shall i go? > ")
            if risp == 'y':
                break
            elif risp == 'n':
                cont = False
                break
            else:
                continue
        if cont == False:
            continue
        webbrowser.open(url, 0, False)
        continue
    cont2 = True
    while 1:
        risp = 'y'#risp = raw_input("Pagine finite, restart? ")
        if risp == 'y':
            break
        elif risp == 'n':
            cont2 = False
            break
        else:
            continue
    if cont2 == False:
        break
    else:
        while not os.path.exists(diR + '/' + filename):
            print "Please wait, reloading..."
            time.sleep(2)
            continue
        continue
Personal tools