1 #! /usr/bin/env python 2 # Time-stamp: <2001-07-10 22:02:36 cymbala> 3 # ~/bin/do_ma_un.py 4 5 6 # SYNTAX EXAMPLE: 7 # python bin/do_ma_un.py `find /cdrom/archive/lenin/works/1901/ -name '*htm*'` 8 9 10 # 11 # WARNING! WARNING! WARNING! 12 # This script breaks a fundamental rule. Usually, UNIX text 13 # files use a linefeed to separate lines. This script thinks 14 # either a linefeed or a carriage return is a line separator. 15 # Usually a carriage return is _NOT_ considered a line separator. 16 # You have been warned! 17 # 18 # This script will (illegally) read logical lines properly from all 19 # three types of text files: 20 # DOS - CR/LF between logical lines. 21 # MAC - CR between logical lines. 22 # UNIX - LF between logical lines. 23 24 import sys 25 import re 26 import string 27 import fileinput 28 29 30 # To put this class into production: 31 # - rename "Program" to something more descriptive. 32 # - remove "program" lines from end. 33 # - put this file into a subdirectory that is seen by Python's path. 34 # - import this file. 35 # - use class "Something_more_descriptive" like any other class. 36 37 class Do_ma_un: 38 def __init__(self): 39 """Called when class is instantiated.""" 40 41 self.re_http = re.compile('^http:', re.I) 42 pass 43 44 45 def __call__(self, dictionary): 46 """Called when instance is called without an attribute.""" 47 48 for input in dictionary['inputs']: 49 dictionary['input'] = input 50 self.fileinput_line_handler(dictionary) 51 pass 52 pass 53 54 55 def lineno(self): 56 """Keep track of logical line number within a file.""" 57 return self.linen 58 pass 59 60 61 def fileinput_line_handler(self, dictionary): 62 """Examine logical lines regardless of file-type (MAC, DOC or UNIX).""" 63 64 input = dictionary['input'] 65 66 # Read from text file, whether it's a disk file or a Web page. 67 if self.re_http.match(input): 68 import urllib 69 page = urllib.urlopen(input) 70 raw_lines = page.readlines() 71 pass 72 else: 73 raw_lines = fileinput.input(input) 74 pass 75 76 self.linen = 0 77 for line in raw_lines: 78 # 79 # Handle DOS files (assume DOS file ends with CR-LF): 80 if line[-2:] == '\r\n': 81 line = line[:-2] + line[-1] 82 pass 83 # 84 # Handle MAC and UNIX files (in case of UNIX files, lines 85 # is simply line converted to a list because the above 86 # fileinput has already split by linefeeds. 87 lines = string.split(line, '\r') 88 # 89 # 90 # Handle all logical lines, from DOC, MAC and UNIX files: 91 self.logical_line_handler(dictionary, lines) 92 pass 93 94 95 def logical_line_handler(self, dictionary, list): 96 """Process physical line read by normal fileinput.input() attribute.""" 97 for i in range(len(list)): 98 logical_line = list[i] 99 # 100 if logical_line == '': 101 continue 102 else: 103 self.linen = self.linen + 1 104 pass 105 # 106 # Lines from MAC files will not end in linefeed. 107 # Lines from other files will end in linefeed. 108 # 109 if logical_line[-1] == '\n': 110 logical_line = logical_line[:-1] 111 pass 112 # 113 dictionary['lineno'] = self.lineno() 114 dictionary['logical_line'] = logical_line 115 dictionary['function_to_call'](dictionary) 116 # 117 pass 118 pass 119 120 121 122 def print_first_three(self, dictionary): 123 """This attribute is fed a line no. and line to do something with.""" 124 if dictionary['lineno'] <= 3: 125 print dictionary['input'], dictionary['logical_line'] 126 pass 127 pass 128 129 pass 130 # --------------------------------- 131 132 if __name__ == '__main__': 133 Program = Do_ma_un() 134 135 dictionary = {} 136 dictionary['inputs'] = sys.argv[1:] 137 dictionary['function_to_call'] = Program.print_first_three 138 # 139 Program(dictionary) 140 pass 141 142 ### 143 # |