#!/usr/local/bin/python
'''CGI script structure
*Check form fields
- use cgi.FieldStorage class to parse query
+ takes care of decoding, handles GET and POST
+ "foo=ab+cd%21ef&bar=spam" -->{'foo': 'ab cd!ef', 'bar': 'spam'} # (well, actually, ...)
*Perform action
- this is up to you!
- database interfaces available
*Generate HTTP + HTML output
- print statements are simplest
- template solutions available
'''
'''Structure refinement:
form = cgi.FieldStorage()
if not form:
display blank form...
elif ...valid form...:
perform action, display results (or next form)...
else:
display error message (maybe repeating form)......
'''
'''FieldStorage details
*Behaves like a dictionary:
- .keys(), .has_key() # but not others!
- dictionary-like object ("mapping")
*Items
- values are MiniFieldStorage instances
+ .value gives field value!
- if multiple values: list of MiniFieldStorage instances
+ if type(...) == types.ListType: ...
- may also be FieldStorage instances
+ used for file upload (test .file attribute)
'''
'''Other CGI niceties
*cgi.escape(s)
- translate "<", "&", ">" to "<", "&", ">"
*cgi.parse_qs(string, keep_blank_values=0)
- parse query string to dictionary {"foo": ["bar"], ...}
*cgi.parse([file], ...)
- ditto, takes query string from default locations
*urllib.quote(s), urllib.unquote(s)
- convert between "~" and "%7e" (etc.)
*urllib.urlencode(dict)
- convert dictionary {"foo": "bar", ...} to query string "foo=bar&..." # note asymmetry with parse_qs() above
'''
'''Debugging framework
import cgi
def main():
print "Content-type: text/html\n" # Do this first
try:
import worker # module that does the real work
except:
print "
Oops. An error occurred.
"
cgi.print_exception() # Prints traceback, safely
main()
'''
'''Security notes
*Watch out when passing fields to the shell
- e.g. os.popen("finger %s" % form["user"].value)
- what if the value is "; cat /etc/passwd" ...
*Solutions:
- Quote:
+ user = pipes.quote(form["user"].value)
- Refuse:
+ if not re.match(r"^\w+$", user): ...error...
- Sanitize:
+ user = re.sub(r"\W", "", form["user"].value)
'''
'''Using persistent data
*Store/update data:
1. In plain files (simplest)
+ FAQ wizard uses this
2. In a (g)dbm file (better performance) http://docs.python.org/library/dbm.html?highlight=dbm#module-dbm
+ string keys, string values
3. In a "shelf" (stores objects) http://docs.python.org/library/shelve.html
+ avoids parsing/unparsing the values
4. In a real database (if you must)
+ 3rd party database extensions available
+ not my field of expertise
'''
'''Plain files
key = ...username, or session key, or whatever...
try:
f = open(key, "r")
data = f.read() # read previous data
f.close()
except IOError:
data = "" # no file yet: provide initial data
data = update(data, form) # do whatever must be done
f = open(key, "w")
f.write(data) # write new data
f.close()
# (could delete the file instead if updated data is empty)
'''
'''(G)DBM files
# better performance if there are many records
import gdbm
key = ...username, or session key, or whatever...
db = gdbm.open("DATABASE", "w") # open for reading+writing
if db.has_key(key):
data = db[key] # read previous data
else:
data = "" # provide initial data
data = update(data, form)
db[key] = data # write new data
db.close()
'''
'''Shelves
# a shelf is a (g)dbm files that stores pickled Python objects
import shelve
class UserData: ...
key = ...username, or session key, or whatever...
db = shelve.open("DATABASE", "w") # open for reading+writing
if db.has_key(key):
data = db[key] # an object!
else:
data = UserData(key) # create a new instance
data.update(form)
db[key] = data
db.close()
'''
'''Locking
*(G)DBM files and shelves are not protected against concurrent updates!
*Multiple readers, single writer usually OK
- simplest approach: only lock when writing
*Good filesystem-based locking is hard
- no cross-platform solutions
- unpleasant facts of life:
+ processes sometimes die without unlocking
+ processes sometimes take longer than expected
+ NFS semantics
'''
'''A simple lock solution
import os, time
class Lock:
def __init__(self, filename):
self.filename = filename
self.locked = 0
def lock(self):
assert not self.locked
while 1:
try:
os.mkdir(self.filename)
self.locked = 1
return # or break
except os.error, err:
time.sleep(1)
def unlock(self):
assert self.locked
self.locked = 0
os.rmdir(self.filename)
# auto-unlock when lock object is deleted
def __del__(self):
if self.locked:
self.unlock()
# for a big production with timeouts,
# see the Mailman source code (LockFile.py);
# it works on all Unixes and supports NFS;
# but not on Windows,
# and the code is very complex...
'''
'''Sessions
*How to correlate requests from same user?
- Assign session key on first contact
- Incorporate session key in form or in URL
- In form: use hidden input field:
+
- In URL:
+ http://myhost.com/cgi-bin/myprog.py/1f9a2
+ passed in environment (os.environ[...]):
# PATH_INFO=/1f9a2
# PATH_TRANSLATED=/1f9a2
'''
'''Cookies
*How to correlate sessions from the same user?
- Store "cookie" in browser
+ controversial, but useful
- Module: Cookie.py (Tim O'Malley)
+ writes "Set-Cookie" headers
+ parses HTTP_COOKIE environment variable
- Note: using cookies affects our debug framework
+ cookies must be printed as part of HTTP headers
+ cheapest solution:
# move printing of blank line into worker module
# (and into exception handler of debug framework)
'''
'''Cookie example
import os, cgi, Cookie
c = Cookie.Cookie()
try:
c.load(os.environ["HTTP_COOKIE"])
except KeyError:
pass
form = cgi.FieldStorage()
try:
user = form["user"].value
except KeyError:
try:
user = c["user"].value
except KeyError:
user = "nobody"
c["user"] = user
print c
print """
""" % cgi.escape(user)
# debug: show the cookie header we wrote
print "
"
print cgi.escape(str(c))
print "
"
'''
'''File upload example
import cgi
form = cgi.FieldStorage()
if not form:
print """
"""
elif form.has_key("filename"):
item = form["filename"]
if item.file:
data = item.file.read() # read contents of file
print cgi.escape(data) # rather dumb action
'''
'''Generating HTML
*HTMLgen (Robin Friedrich)
http://starship.python.net/crew/friedrich/HTMLgen/html/main.html
>>> print H(1, "Chapter One")
Chapter One
>>> print A("http://www.python.org/", "Home page")
Home page
>>> # etc. (tables, forms, the works)
*HTMLcreate (Laurence Tratt)
http://www.spods.dcs.kcl.ac.uk/~laurie/comp/python/htmlcreate/
+ not accessible at this time
'''
'''CGI performance
*What causes slow response?
- One process per CGI invocation
+ process creation (fork+exec)
+ Python interpreter startup time
+ importing library modules (somewhat fixable)
- Connecting to a database!
+ this can be the killer if you use a real database
- Your code?
+ probably not the bottleneck!
*Avoiding fork()
*Python in Apache (mod_pyapache)
- problems: stability; internal design
- advantage: CGI compatible
- may work if CGI scripts are simple and trusted
- doesn't avoid database connection delay
*Use Python as webserver
- slow for static content (use different port)
- advantage: total control; session state is easy
*FastCGI, HTTPDAPI etc.
*ZOPE == Z Object Publishing Environment
- http://www.zope.org
- complete dynamic website management tool
+ written in cross-platform Python; Open Source
- http://host/path/to/object?size=5&type=spam
+ calls path.to.object(size=5, type="spam")
- DTML: templatized HTML (embedded Python code)
o ZOBD (Z Object DataBase; stores Python objects)
+ transactionsm selective undo, etc.
- etc., etc.
'''
'''Case Study - FAQ wizard
*Tools/faqwiz/faqwiz.py in Python distribution
*http://www.python.org/cgi-bin/faqw.py
(see next slide == 65/75) == http://www.python.org/doc/essays/ppt/sd99east/sld065.htm
'''
'''
TODO: make a simple error checking/ debuggable cgi template.
'''
#cgi debug structure
import cgi
def main():
print "Content-type: text/html\n\n" # Do this first
try:
mainWorker()
except:
print "
Oops. An error occurred.
"
cgi.print_exception() # Prints traceback, safely
def mainWorker():
#import worker # module that does the real work
form = cgi.FieldStorage() # parse query
if form.has_key("firstname") and form["firstname"].value != "":
print "