Python:Print.py

From Botwiki

Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
That's a simple script (if you have start to program in python
I suggest youto take a look in it) that simply save in
a .txt file or print in the DOS screen the HTML Code of a website's page.
 
It can be useful if you need to take parameters or info from a log or a
similar wikipedia's page.
 
You can use the following parameters:
 
-url        To set what url get (default: http://en.wikipedia.org/wiki/Main_Page)
 
-print      To set if you want that the program print in the DOS screen instead of
            write the output in a .txt file (print.txt) (default: save the file)
 
This script is take from: http://botwiki.sno.cc/
"""
 
#
#
# (C) Filnik, 2007
#
# Distributed under the terms of the MIT license.
#
# Version: 1.0
#
 
import wikipedia, urllib2
import time, config
 
# Some default parameters
url = "http://en.wikipedia.org/wiki/Main_Page"
printme = False
 
# That's a block where you define what parameters use and what
# each parameter will do ^__^
for arg in wikipedia.handleArgs():
    if arg.startswith('-url'):
        start = True
        if len(arg) == 4:
            url = str(wikipedia.input(u'What url do you need to get?'))
        else:
            url = str(arg[5:])
    elif arg == '-print':
        printme = True
 
# If the url hasn't the http:// before, it will crash. In this way i prevent
# this bug ^_-
 
if 'http://' in url:
    pass
else:
    url = 'http://' + url
# That's a function, i use it to get the text from the url
def pageText(url):
    try:
	request = urllib2.Request(url)
	user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7'
	request.add_header("User-Agent", user_agent)
	response = urllib2.urlopen(request)
	text = response.read().decode(config.console_encoding)
	response.close()
    # When you load to many users, urllib2 can give this error.
    except urllib2.HTTPError:
        wikipedia.output(u"Server error. Pausing for 10 seconds before continuing. " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()))
        time.sleep(10)
	request = urllib2.Request(url)
	user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7'
	request.add_header("User-Agent", user_agent)
	response = urllib2.urlopen(request)
	text = response.read().decode(config.console_encoding)
	response.close()
    return text
# That's the main block, where there's a finally to permit to the bot to drop the processes
# before stop.
try:
    if printme == False:
        rock = file('print.txt', 'w')
        rock.write(pageText(url).encode(config.console_encoding))
    else:
        wikipedia.output('u%s' % pageText(url).encode(config.console_encoding))
    wikipedia.output('Done!')
finally:
    wikipedia.stopme()
Personal tools