Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Vandal Bazooka/VandalParser.py

From Botwiki
(Redirected from Python:Parser.py)
Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
parser.py - Main file.
Author: Filnik, 2007
About: http://botwiki.sno.cc/wiki/Botwiki:Vandal_Bazooka
License: Distributed under the terms of the MIT license.
Version: 1.2
"""
 
import re, webbrowser
import os, time
import config
 
def parser(raw):
    list_loaded = list()
    list_info = list()
    pos = 0
    splitting = config.channel.split('.')#'#it.wikipedia'
    lang = splitting[0].split('#')[1]
    project = splitting[1]
    while 1:
        #:rc!~rc@localhost PRIVMSG #it.wikipedia :�14[[�07Il volo della fenice (film 1965)�14]]�4 N�10 �02http://it.wikipedia.org/w/index.php?title=Il_volo_della_fenice_%28film_1965%29&rcid=9767308� �5*� �03Bart ryker� �5*� (+1455) �10Nuova pagina: {{S|film}} {{Film| |titoloitaliano = Il volo della fenice |titolooriginale = The flight of the Phoenix |nomepaese = [[Stati Uniti d'America|Stati Uniti]] |annoproduzione = [[1965]] |du...�<br>
        regl = "\:rc\!~rc@localhost PRIVMSG \#" + lang + "\." + project + " \:�14\[\[�07(.*?)�14\]\]�4 (.*?)�10 �02http\://" + lang + \
        "\."+project+"\.org/w/index\.php\?title=(.*?)(&diff=(.*?)&oldid=(.*?)|)&rcid=(.*?)� �5\*� �03(.*?)� �5\*\(([-+])(.*?)\) �10(.*?)�"
        page = re.compile(regl, re.UNICODE)
        xl = page.search(raw, pos)
        if xl == None:
            if len(list_loaded) >= 1:
                print "Starting...\n"
                return (list_loaded, list_info)
                break
            elif len(list_loaded) == 0:
                print "Our Regex cannot load this edit, skipping..."
                return (list(), list())
                break
        pos = xl.end()
        pagename = xl.group(1)
        status = xl.group(2) #N, M
        urlname = xl.group(3)
        diffgroup = xl.group(4)
        if '&diff=' in diffgroup:
            diff = xl.group(5)
            oldid = xl.group(6)
        else:
            diff = ''
            oldid = ''
        rcid = xl.group(7)
        url = 'http://it.wikipedia.org/w/index.php?title=' + urlname + '&diff=' + diff + '&oldid=' + oldid + '&rcid=' + rcid
        user = xl.group(8)
        plusminus = xl.group(9)
        plusminusnumber = xl.group(10)
        summary = xl.group(11)
        if '/' in summary:
            summary = re.sub(r'(.*?)/\*(.*?)\*/(.*?)', r'\1\2\3', summary)
        if status == '':
            string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \
                     + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + 'None'
            status = None
        elif status == 'N':
            url = 'http://it.wikipedia.org/wiki/' + urlname
            string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \
                     + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + 'NUOVA PAGINA'            
        else:
            string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \
                     + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + status + '\n'
        datapars = [pagename, url, user, plusminus + plusminusnumber, summary, status]
        if string not in list_loaded:
             list_loaded.append(datapars)
             list_info.append(string)
 
loadingnummer = 0
loadlot = False#True
while 1:
    if loadingnummer == 10 and loadlot == True:
        while 1:
            risp = raw_input("Shall i go? > ")
            if risp == 'y':
                loadingnummer = 0
                break
            elif risp == 'n':
                exit()
                break
            else:
                continue
    listadatogliere = ['alnoktabot', 'brownout', 'filnik', 'valepert', 'filbot', 'Nikbot','Kiado',
                       'Jalo', 'Sailko', 'snowdog', 'Claudio Sanna', 'giovannigobbin', 'Paulatz bot',
                       'MM', 'senpai', 'Bella situazione', 'Lord Randal', 'Alexander VIII', 'ChemicalBit',
                       'Squattari', '%Pier%', 'D.N.R.', 'Kal-El']
    filename = 'VandalData.txt'
    try:    
        rock = file(filename, 'r')
        raw = rock.read()
        rock.close()
        diR = str(os.getcwd())
    except:
        print "Waiting..."
        time.sleep(1.5)
        continue
    try:
        os.remove(diR + '/' + filename)
    except Exception, e:
        print e
    (lista, listainfo) = parser(raw)
    """
    for k in listainfo:
 
        rock2 = file('report.txt', 'a')
        rock2.write(k)
        rock2.close()
        """
    if lista == []:
        time.sleep(1)
        continue
    numm = -1
    print 'Caricate %s pagine...' % len(lista)
    numdefined = 200
    for i in lista:
        #datapars = [pagename, url, user, plusminus + plusminusnumber, summary, status]
        pagename = i[0]; url = i[1]; user = i[2]; plminumber = i[3]; summary = i[4]; status = i[5] #<-- Kids, don't try this at home!
        numm += 1
        stop = False
        if '+' in plminumber:
            res = plminumber.split('+')
            if int(res[-1]) >= numdefined:
                stop = False
        elif '-' in plminumber:
            res = plminumber.split('-')
            if int(res[-1]) >= numdefined:
                stop = False
        if user in pagename:
            print "An user has edited his pages, skipping..."
            continue
        if not user.count('.') >= 2:
            stop = True
        if 'collegamenti esterni' in summary.lower():
            print "The summary has 'collegamenti esterni' inside!"
            stop = False
        if status == None:
            pass
        elif status.lower() == 'n':
            if config.cat.lower() in pagename.lower():
                pass
            elif config.talk.lower() in pagename.lower():
                stop = False
            else:
                stop = False
        for x in listadatogliere:
            if x.lower() == user.lower():
                stop = True
###########################################################
        if stop == True:
            print "%s edited by %s is ok, skipping..." % (pagename, user)
            continue
        if pagename.lower() == 'wikipedia:pagina delle prove':
            print "Pagina delle prove! Skipping... "
            continue
        loadingnummer += 1
        print listainfo[numm]
 
        cont = True
        risp = 'y'
        while 1:
            #risp = raw_input("Shall i go? > ")
            if risp == 'y':
                break
            elif risp == 'n':
                cont = False
                break
            else:
                continue
        if cont == False:
            continue
        webbrowser.open(url, 0, False)
        continue
    cont2 = True
    while 1:
        risp = 'y'#risp = raw_input("Pagine finite, restart? ")
        if risp == 'y':
            break
        elif risp == 'n':
            cont2 = False
            break
        else:
            continue
    if cont2 == False:
        break
    else:
        while not os.path.exists(diR + '/' + filename):
            print "Please wait, reloading..."
            time.sleep(2)
            continue
        continue
Personal tools
Share