Parse HTML pages, absolutize links and invoke a translate on the text.
This module handles all the HTML code and
allows for invoking a custom routine
that translates the pure text included in the HTML document.
Imported modules
|
|
from ExtDict import ExtDict
import cgi
import fileinput
import htmlentitydefs
import os
import re
from sgmllib import SGMLParser
import string
import sys
import urllib
from urllib import urlencode
|
Functions
|
|
|
|
get_args
|
get_args ()
|
|
has_no_bin_ext
|
has_no_bin_ext ( fn )
|
|
has_prefix
|
has_prefix ( word, prefix )
|
|
has_text_ext
|
has_text_ext ( fn )
|
|
multiurlencode
|
multiurlencode ( form )
version of urlencode that processes multiple values per option
|
|
pl_getitem
|
pl_getitem ( ls, key )
Exceptions
|
|
KeyError, "no attribute %s found" % key
|
|
|
pl_getitemdefault
|
pl_getitemdefault (
ls,
key,
default,
)
|
|
pl_has_key
|
pl_has_key ( ls, key )
for HTML tag attributes we use a list of (key,value) pairs instead of a dictionary
because some attributes may be given multiple times
we implement some routines known from dictionaries
|
|
pl_setitem
|
pl_setitem (
ls,
key,
value,
)
|
|
sub_isoentity
|
sub_isoentity ( mt )
|
|
write_ct
|
write_ct ( fh, contenttype )
|
Classes
|
|
|
|