1 #!/usr/bin/python 2 3 # m2fchk.py 4 # male-to-female markup check. 5 6 # Required changes to your documents: put "<!--m2f-->" after a male 7 # word (AND BEFORE ANY SPACES!) that needs to remain male due to 8 # context. 9 10 # Pertains to: 11 # http://www.marxists.org/en/archive/spirkin/1983/dm/index.html 12 # ~/www.marxists.org/en/archive/spirkin/1983/dm/dm1983.tgz (XML) 13 14 # The only output from this script should be <!ENTITY lines. 15 16 # ------------------------------------------------------------------ 17 18 debug = 1 19 debug = 0 20 21 import fileinput 22 import os 23 import re 24 import string 25 26 regexp = {} 27 regexp['other'] = {} 28 regexp['other']['amp'] = re.compile('&') 29 regexp['other']['alphas'] = re.compile('[' + string.letters + ']') 30 31 regexp['other']['open_document_tag'] = re.compile('<(book|html)', re.I) 32 regexp['other']['open_entity'] = re.compile('<!ENTITY', re.I) 33 34 m2f_entities = ['~/www.marxists.org/en/archive/spirkin/1983/dm/dm1983.xml'] 35 36 37 38 m2f = {} 39 m2f["he"] = "she" 40 m2f["He"] = "She" 41 42 m2f["him"] = "her" 43 m2f["Him"] = "Her" 44 m2f["his"] = "hers" 45 m2f["His"] = "Hers" 46 47 m2f["himself"] = "herself" 48 m2f["Himself"] = "Herself" 49 50 m2f["man"] = "woman" 51 m2f["Man"] = "Woman" 52 m2f["husband"] = "wife" 53 m2f["Husband"] = "Wife" 54 55 m2f["master"] = "mistress" 56 m2f["Master"] = "Mistress" 57 m2f["hero"] = "heroine" 58 m2f["Hero"] = "Heroine" 59 m2f["actor"] = "actress" 60 m2f["Actor"] = "Actress" 61 m2f["king"] = "queen" 62 m2f["King"] = "Queen" 63 64 # Get additional entities from external file(s): 65 for filename in m2f_entities: 66 f = open(os.path.expanduser(filename)) 67 while 1: 68 d = f.readline() 69 parts = string.split(d) 70 for i in range(len(parts)): 71 if regexp['other']['open_entity'].match(parts[i]): 72 m2f[parts[i+1]] = parts[i+2] 73 pass 74 pass 75 if regexp['other']['open_document_tag'].search(d): break 76 pass 77 f.close() 78 pass 79 80 regexp['m2f'] = {} 81 for key in m2f.keys(): 82 regexp['m2f'][key] = re.compile(key) 83 84 # Do not include female words: 85 # regexp['m2f'][m2f[key]] = re.compile(m2f[key]) 86 pass 87 88 def printit_fn(description, word, line): 89 # Ignore marked words, e.g.: man,<!--m2f--> 90 word = string.rstrip(word) 91 s = '<!--m2f-->' 92 if not word[-len(s):] == s: 93 if debug: print description, word 94 return 1 95 else: 96 return 0 97 pass 98 99 for line in fileinput.input(): 100 printit = 0 101 words = string.split(line) 102 for word in words: 103 for exp in regexp['m2f'].keys(): 104 start = 0 105 matchobj = regexp['m2f'][exp].search(word[start:]) 106 while matchobj: 107 108 # If either start or .start() is zero ... 109 if start - matchobj.start() == 0: 110 if len(matchobj.group()) == len(word): 111 112 printit = printit + printit_fn( 113 'Exact match:', word, line) 114 pass 115 else: 116 # If character after match is not a letter ... 117 if not regexp['other']['alphas'].match( 118 word[matchobj.end()]): 119 120 printit = printit + printit_fn( 121 'Leading match:', word, line) 122 pass 123 pass 124 pass 125 126 # If character before match is not a letter ... 127 elif not regexp['other']['alphas'].match( 128 word[matchobj.start() - 1 + start]): 129 130 if len(word) == start + matchobj.end(): 131 printit = printit + printit_fn( 132 'Trailing match:', word, line) 133 pass 134 135 # If character after match is not a letter ... 136 elif not regexp['other']['alphas'].match( 137 word[start + matchobj.end()]): 138 139 # If match isn't surrounded by &; ... 140 if not ('&' == word[matchobj.start() - 1 + start] 141 and 142 ';' == word[start + matchobj.end()]): 143 144 printit = printit + printit_fn( 145 'Internal match:', word, line) 146 pass 147 pass 148 pass 149 150 start = start + matchobj.end() 151 matchobj = regexp['m2f'][exp].search(word[start:]) 152 pass 153 pass 154 pass 155 156 if not debug and printit > 0: print string.join([ 157 fileinput.filename(), 158 ':', 159 str(fileinput.filelineno()), 160 ': ', 161 string.rstrip(line)], '') 162 163 # if fileinput.lineno() == 300: break 164 # pass 165 166 ### 167 # |