Python:Vandal Bazooka/VandalParser.py
From Botwiki
(Redirected from Python:Parser.py)
#!/usr/bin/python # -*- coding: utf-8 -*- """ parser.py - Main file. Author: Filnik, 2007 About: http://botwiki.sno.cc/wiki/Botwiki:Vandal_Bazooka License: Distributed under the terms of the MIT license. Version: 1.2 """ import re, webbrowser import os, time import config def parser(raw): list_loaded = list() list_info = list() pos = 0 splitting = config.channel.split('.')#'#it.wikipedia' lang = splitting[0].split('#')[1] project = splitting[1] while 1: #:rc!~rc@localhost PRIVMSG #it.wikipedia :�14[[�07Il volo della fenice (film 1965)�14]]�4 N�10 �02http://it.wikipedia.org/w/index.php?title=Il_volo_della_fenice_%28film_1965%29&rcid=9767308� �5*� �03Bart ryker� �5*� (+1455) �10Nuova pagina: {{S|film}} {{Film| |titoloitaliano = Il volo della fenice |titolooriginale = The flight of the Phoenix |nomepaese = [[Stati Uniti d'America|Stati Uniti]] |annoproduzione = [[1965]] |du...�<br> regl = "\:rc\!~rc@localhost PRIVMSG \#" + lang + "\." + project + " \:�14\[\[�07(.*?)�14\]\]�4 (.*?)�10 �02http\://" + lang + \ "\."+project+"\.org/w/index\.php\?title=(.*?)(&diff=(.*?)&oldid=(.*?)|)&rcid=(.*?)� �5\*� �03(.*?)� �5\*� \(([-+])(.*?)\) �10(.*?)�" page = re.compile(regl, re.UNICODE) xl = page.search(raw, pos) if xl == None: if len(list_loaded) >= 1: print "Starting...\n" return (list_loaded, list_info) break elif len(list_loaded) == 0: print "Our Regex cannot load this edit, skipping..." return (list(), list()) break pos = xl.end() pagename = xl.group(1) status = xl.group(2) #N, M urlname = xl.group(3) diffgroup = xl.group(4) if '&diff=' in diffgroup: diff = xl.group(5) oldid = xl.group(6) else: diff = '' oldid = '' rcid = xl.group(7) url = 'http://it.wikipedia.org/w/index.php?title=' + urlname + '&diff=' + diff + '&oldid=' + oldid + '&rcid=' + rcid user = xl.group(8) plusminus = xl.group(9) plusminusnumber = xl.group(10) summary = xl.group(11) if '/' in summary: summary = re.sub(r'(.*?)/\*(.*?)\*/(.*?)', r'\1\2\3', summary) if status == '': string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \ + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + 'None' status = None elif status == 'N': url = 'http://it.wikipedia.org/wiki/' + urlname string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \ + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + 'NUOVA PAGINA' else: string = '\n>>> ' + pagename + ' <<<' + '\nUtente: ' + user \ + '\nEdit: ' + '(' + plusminus + plusminusnumber + ')' + '\nOggetto:' + summary + '\nStatus: ' + status + '\n' datapars = [pagename, url, user, plusminus + plusminusnumber, summary, status] if string not in list_loaded: list_loaded.append(datapars) list_info.append(string) loadingnummer = 0 loadlot = False#True while 1: if loadingnummer == 10 and loadlot == True: while 1: risp = raw_input("Shall i go? > ") if risp == 'y': loadingnummer = 0 break elif risp == 'n': exit() break else: continue listadatogliere = ['alnoktabot', 'brownout', 'filnik', 'valepert', 'filbot', 'Nikbot','Kiado', 'Jalo', 'Sailko', 'snowdog', 'Claudio Sanna', 'giovannigobbin', 'Paulatz bot', 'MM', 'senpai', 'Bella situazione', 'Lord Randal', 'Alexander VIII', 'ChemicalBit', 'Squattari', '%Pier%', 'D.N.R.', 'Kal-El'] filename = 'VandalData.txt' try: rock = file(filename, 'r') raw = rock.read() rock.close() diR = str(os.getcwd()) except: print "Waiting..." time.sleep(1.5) continue try: os.remove(diR + '/' + filename) except Exception, e: print e (lista, listainfo) = parser(raw) """ for k in listainfo: rock2 = file('report.txt', 'a') rock2.write(k) rock2.close() """ if lista == []: time.sleep(1) continue numm = -1 print 'Caricate %s pagine...' % len(lista) numdefined = 200 for i in lista: #datapars = [pagename, url, user, plusminus + plusminusnumber, summary, status] pagename = i[0]; url = i[1]; user = i[2]; plminumber = i[3]; summary = i[4]; status = i[5] #<-- Kids, don't try this at home! numm += 1 stop = False if '+' in plminumber: res = plminumber.split('+') if int(res[-1]) >= numdefined: stop = False elif '-' in plminumber: res = plminumber.split('-') if int(res[-1]) >= numdefined: stop = False if user in pagename: print "An user has edited his pages, skipping..." continue if not user.count('.') >= 2: stop = True if 'collegamenti esterni' in summary.lower(): print "The summary has 'collegamenti esterni' inside!" stop = False if status == None: pass elif status.lower() == 'n': if config.cat.lower() in pagename.lower(): pass elif config.talk.lower() in pagename.lower(): stop = False else: stop = False for x in listadatogliere: if x.lower() == user.lower(): stop = True ########################################################### if stop == True: print "%s edited by %s is ok, skipping..." % (pagename, user) continue if pagename.lower() == 'wikipedia:pagina delle prove': print "Pagina delle prove! Skipping... " continue loadingnummer += 1 print listainfo[numm] cont = True risp = 'y' while 1: #risp = raw_input("Shall i go? > ") if risp == 'y': break elif risp == 'n': cont = False break else: continue if cont == False: continue webbrowser.open(url, 0, False) continue cont2 = True while 1: risp = 'y'#risp = raw_input("Pagine finite, restart? ") if risp == 'y': break elif risp == 'n': cont2 = False break else: continue if cont2 == False: break else: while not os.path.exists(diR + '/' + filename): print "Please wait, reloading..." time.sleep(2) continue continue
BlogMarks
del.icio.us
digg
Fark
Furl
Newsvine
reddit
Segnalo
Simpy
Slashdot
smarking
Spurl
Wists
