1 #! /usr/bin/env python 2 3 # Extract line numbers and marked-up text. 4 # 5 # SYNTAX EXAMPLE: 6 # 7 # python test.py where.xml > where.xmllib_c 8 # spell where.xmllib_c | sort | uniq > where.misspell 9 # vi where.misspell 10 # egrep -if where.misspell where.xmllib_c > where.misspell.final 11 # cat where.misspell.final 12 # where.xml:340: calling for the formation of a stong well-organised party, whose 13 # where.xml:369: in waging poliiical struggle under all circumstances and at all 14 # where.xml:421: sound a vigorous warning against becoming infatuated with 15 # where.xml:516: possibly fulfil our task 16 # where.xml:547: settlements and comununities, it is quite feasible for the 17 # where.xml:558: collective propagandist and a colluctive agitator, it is also a 18 # where.xml:627: most skilful propagandists, but the most capable organisers, the 19 20 21 22 import sys 23 import xmllib 24 import string 25 import fileinput 26 27 # --------------------------------------------------------- 28 # Unfortunately, documentation in: 29 # /usr/share/doc/python/html/lib/module-xmllib.html 30 # does not match defined class functions in: 31 # /usr/lib/python1.5/site-packages/xml/parsers/xmllib.py 32 # --------------------------------------------------------- 33 # 34 class Program(xmllib.XMLParser): 35 36 def stderr(self, message): 37 if message and (not message[-1] == '\n'): message = message + '\n' 38 sys.stderr.write(message) 39 pass 40 def stdout(self, message): 41 if message and (not message[-1] == '\n'): message = message + '\n' 42 sys.stdout.write(message) 43 pass 44 45 def __init__(self, options): 46 47 self.debug = options['debug'] 48 self.filename_position = options['filename_position'] 49 50 # 12.4 xmllib -- A parser for XML documents 51 # file://localhost/usr/share/doc/python/html/lib/module-xmllib.html 52 # 53 # reset () Reset the instance. Loses all unprocessed 54 # data. This is called implicitly at the 55 # instantiation time. 56 self.reset() 57 pass 58 59 def __call__(self): 60 61 # if self.debug: self.stderr(str(self.attributes)) 62 # {} 63 # 64 # if self.debug: self.stderr(str(self.elements)) 65 # {} 66 # 67 # if self.debug: self.stderr(str(self.entitydefs.keys())) 68 # ['quot', 'apos', 'gt', 'lt', 'amp'] 69 # if self.debug: self.stderr(str(self.entitydefs)) 70 # {'quot': '"', 'apos': ''', 'gt': '>', ... 71 # 72 for filename in sys.argv[1:]: 73 if self.filename_position == 'top': self.stdout(filename) 74 for line in fileinput.input(filename): 75 if line[-1] == '\n': line = line[:-1] 76 # 77 # self.stderr(line + '\n') 78 self.feed(line) 79 80 pass 81 pass 82 83 pass 84 85 def feed(self, data): 86 xmllib.XMLParser.feed(self, data) 87 if self.debug: self.stderr(' --->>> ' +data) 88 pass 89 90 # HANDLEs ------------------------------------------------------- 91 def handle_xml(self, encoding, standalone): 92 if self.debug: self.stderr('handle_xml: ' + \ 'encoding=' +str(encoding) +', ' + \ 'standalone=' +str(standalone) 95 ) 96 pass 97 98 def handle_doctype(self, tag, pubid, syslit, data): 99 # Documentation has: ``handle_doctype (tag, data)'' 100 101 # if self.debug: self.stderr('handle_doctype: ' + \ 102 # 'tag=' +tag +', ' + \ 103 # 'pubid=' +pubid +', ' + \ 104 # 'syslit=' +syslit +', ' + \ 105 # 'data=' +data) 106 107 if data == None: 108 return 109 110 where = None 111 for i in range(len(data)): 112 if not where and data[i] == '<': 113 if data[i:i+4] == '<!--': 114 where = 'comment' 115 start = i 116 pass 117 else: 118 pass 119 pass 120 elif where == 'comment': 121 if data[i:i+3] == '-->': 122 self.handle_comment(data[start+4:i+3-3]) 123 where = None 124 start = None 125 pass 126 pass 127 128 # self.handle_special(data) 129 pass 130 131 pass 132 133 def handle_starttag(self, tag, method, attrs): 134 if self.debug: self.stderr('handle_starttag: ' + tag) 135 pass 136 137 def handle_endtag(self, tag, method): 138 if self.debug: self.stderr('handle_endtag: ' + tag) 139 pass 140 141 def handle_data(self, data): 142 if self.debug: self.stderr('handle_data: ' + data) 143 144 # CONTENT: 145 data_down = string.strip(data) 146 if len(data_down) > 0: 147 # self.stderr('handle_data: ' + data) 148 # 149 if self.filename_position == 'side': spam = fileinput.filename() +':' 150 else: spam = '' 151 spam = spam + str(fileinput.lineno()) + ':' + ' ' +data 152 self.stdout(spam) 153 154 pass 155 156 pass 157 158 def handle_charref(self, name): 159 if self.debug: self.stderr('handle_charref: ' + name) 160 pass 161 162 # 163 # 771: def handle_entity(self, name, strval, pubid, syslit, ndata): 164 def handle_entityref(self, name): 165 if self.debug: self.stderr('handle_entityref: ' + name) 166 pass 167 168 def handle_comment(self, data): 169 if self.debug: self.stderr('handle_comment: ' + data) 170 pass 171 172 def handle_cdata(self, data): 173 if self.debug: self.stderr('handle_cdata: ' + data) 174 pass 175 176 def handle_proc(self, name, data): 177 if self.debug: self.stderr('handle_proc: ' + data) 178 pass 179 180 # def handle_special(self, data): 181 # if self.debug: self.stderr('handle_special: ' + data) 182 # pass 183 184 # SYNTAX_ERROR ------------------------------------------------------- 185 def syntax_error(self, message): 186 # xmllib.syntax_error(message) 187 pass 188 189 # UNKNOWNs ------------------------------------------------------- 190 def unknown_starttag(self, tag, attrs): 191 if self.debug: self.stderr('oh no! ...an unknown starttag: ' +tag) 192 pass 193 def unknown_endtag(self, tag): 194 if self.debug: self.stderr('oh no! ...an unknown endtag: ' +tag) 195 pass 196 def unknown_charref(self, ref): 197 if self.debug: self.stderr('oh no! ...an unknown charref: ' +ref) 198 pass 199 def unknown_entityref(self, ref): 200 if self.debug: self.stderr('oh no! ...an unknown entityref: ' +ref) 201 pass 202 203 pass 204 205 206 # MAIN: 207 filename_position = 'top' 208 filename_position = 'side' 209 # 210 options = {'debug': None, 211 'filename_position': filename_position} 212 program = Program(options) 213 program() 214 215 216 ### 217 # 218 219 # NEW definitions (not in documentation): 220 # 94: def __fixelements(self): 221 # 100: def __fixclass(self, kl): 222 # 105: def __fixdict(self, dict): 223 # 119: def reset(self): 224 # 136: def setnomoretags(self): 225 # 140: def setliteral(self, *args): 226 # 152: def close(self): 227 # 160: def translate_references(self, data, all = 1): 228 # 201: def goahead(self, end): 229 # 382: def parse_comment(self, i): 230 # 399: def parse_doctype(self, res): 231 # 445: def parse_cdata(self, i): 232 # 461: def parse_proc(self, i): 233 # 504: def parse_attributes(self, tag, i, j): 234 # 539: def parse_starttag(self, i): 235 # 615: def parse_endtag(self, i): 236 # 642: def finish_starttag(self, tagname, attrdict, method): 237 # 649: def finish_endtag(self, tag): 238 # 691: def handle_starttag(self, tag, method, attrs): 239 # 699: def handle_charref(self, name): 240 # 739: def handle_comment(self, data): 241 # 771: def handle_entity(self, name, strval, pubid, syslit, ndata): 242 # 780: def flush(self): 243 # 794: def handle_comment(self, data): 244 # 826: def close(self): 245 # 830:def test(args = None): 246 # |