#Adapted from userContributions.py:
# PLAN:
# TO RETURN A LIST OF CONTRIBUTIONS OF A USER ON A WIKI,
# FROM THE api, OR WHATEVER
# EACH ELEMENT IN THE LIST BEING A MULTIPLE OF
#(0. type='edit'/'pagemove', 1.pageid, 2.revid, 3.ns=namespace,
#4.title=pagename, 5.timestamp , 6.minor=(boolean or string?), 7.comment=string) [DO WE NEED ALL THAT?]
#http://en.wikipedia.org/w/api.php?action=query&list=usercontribs&ucuser=YurikBot
#GIVES
# <item pageid="1887246" revid="75331142" ns="0" title="Louge Town Arc" timestamp="2006-09-12T17:55:46Z" minor="" comment="Robot: Fixing double redirect" />
# IN THE pyWikipedia FRAMEWORK
# THIS FUNCTION SHOULD BE IN wikipedia.py BUT I CANNOT FIND IT
## GENERAL SET-UP
#family = 'wikipedia'
#lang = 'en'
moveText =' moved to ' # SEE http://en.wikiversity.org/w/index.php?title=Special%3ALog&type=move&user=&page=
### IMPORT
import wikipedia
import re
import urllib
### REGEX SET-UP
itemX = re.compile(ur'<item .*?>', flags=re.U) # AN ITEM IN THE API
pageidX = re.compile(ur'(?<=pageid\=").*?(?=")', flags=re.U)
revidX = re.compile(ur'(?<=revid\=").*?(?=")', flags=re.U)
nsX = re.compile(ur'(?<=ns\=").*?(?=")', flags=re.U)
titleX = re.compile(ur'(?<=title\=").*?(?=")', flags=re.U)
timestampX = re.compile(ur'(?<=timestamp\=").*?(?=")', flags=re.U)
minorX = re.compile(ur'(?<=minor\"").*?(?=")', flags=re.U)
commentX = re.compile(ur'(?<=comment\=").*?(?=")', flags=re.U)
moveX = re.compile( moveText , flags=re.U) #SIGNATURE OF A PAGEMOVE (IN THE COMMENT)
### FUNCTION TO RETURN A LIST OF CONTRIBUTIONS OF user:userName after timestamplowerlimit
def userContributions(siteName,userName,timestamplowerlimit): #siteName='en.wikiversity'
""" TO BE CONTINUED
"""
## CREATE THE PAGE OBJECT
apiPage = urllib.urlopen( 'http://' + siteName + '/api.php?action=query&list=usercontribs&ucuser=' + userName)
## READ THE PAGE
x = apiPage.read()
## EXTRACT THE INFORMATION
listItem = itemX.findall(x)
## INITIALISE THE INDIVIDUAL "LISTS" AS DICTIONARIES ; WE MAY NOT NEED THEM, BUT ANYHOW
pageid = {} ; revid = {} ; ns ={} ; title = {} ; timestamp = {} ; minor ={} ; comment = {}
n = 0
for i in listItem: #I DON'T KNOW HOW TO GET THE FIRST AND ONLY MATCH IN STRING i, SO...
n += 1
l = pageidX.findall(i)
for j in l:
pageid =j
l = revidX.findall(i)
for j in l:
revid = j
l = nsX.findall(i)
for j in l:
ns = j
l = titleX.findall(i)
for j in l:
title = unicode(j,'utf-8')
l = timestampX.findall(i)
for j in l:
timestamp = j
l = minorX.findall(i)
for j in l:
minor = j
comment ='' #JUST IN CASE l IS EMPTY LIST
l = commentX.findall(i)
for j in l:
comment = j
if moveX.findall(comment)==[]:
type = 'edit'
else:
type = 'pagemove'
#print timestamp, timestamplowerlimit - TESTING
if timestamp > timestamplowerlimit:
print(type , pageid , revid , ns, title , timestamp , minor , comment) #ALL STRINGS
yield (type , pageid , revid , ns, title, timestamp , minor , comment) #ALL STRINGS
### MAIN
try:
if __name__ == '__main__':
# SET-UP : MY COMMAND LINE DOES NOT SUPPORT UNICODE INPUT
siteName = 'aoc.wikia.com'
userName ='hillgentleman'
timestamplowerlimit='2008-01-01T12:34:56Z'
x=raw_input('userName?')
if x!='': userName= x
a=raw_input('timestamplowerlimit?, e.g. "2008-01-01", or "2008-01-01T12:34:56Z" in full ')
if a!='': timestamplowerlimit = a
list = userContributions(siteName, userName, timestamplowerlimit)
for p,q,r,s, t,u,v,w in list:
print(p,q,r,s,t,u,v,w) #SOME MAY NOT BE STRINGS - FIX THAT LATER
#I DON'T LIKE wikipedia.output()
finally:
wikipedia.stopme()