1    #!/usr/bin/python
       2    
       3    # m2fchk.py
       4    # male-to-female markup check.
       5    
       6    # Required changes to your documents: put "<!--m2f-->" after a male
       7    # word (AND BEFORE ANY SPACES!) that needs to remain male due to
       8    # context.
       9    
      10    # Pertains to:
      11    # http://www.marxists.org/en/archive/spirkin/1983/dm/index.html
      12    #      ~/www.marxists.org/en/archive/spirkin/1983/dm/dm1983.tgz (XML)
      13    
      14    # The only output from this script should be <!ENTITY lines.
      15    
      16    # ------------------------------------------------------------------
      17    
      18    debug = 1
      19    debug = 0
      20    
      21    import fileinput
      22    import os
      23    import re
      24    import string
      25    
      26    regexp = {}
      27    regexp['other'] = {}
      28    regexp['other']['amp'] = re.compile('&')
      29    regexp['other']['alphas'] = re.compile('[' + string.letters + ']')
      30    
      31    regexp['other']['open_document_tag'] = re.compile('<(book|html)', re.I)
      32    regexp['other']['open_entity'] = re.compile('<!ENTITY', re.I)
      33    
      34    m2f_entities = ['~/www.marxists.org/en/archive/spirkin/1983/dm/dm1983.xml']
      35    
      36    
      37    
      38    m2f = {}
      39    m2f["he"] = "she"
      40    m2f["He"] = "She"
      41    
      42    m2f["him"] = "her"
      43    m2f["Him"] = "Her"
      44    m2f["his"] = "hers"
      45    m2f["His"] = "Hers"
      46    
      47    m2f["himself"] = "herself"
      48    m2f["Himself"] = "Herself"
      49    
      50    m2f["man"] = "woman"
      51    m2f["Man"] = "Woman"
      52    m2f["husband"] = "wife"
      53    m2f["Husband"] = "Wife"
      54    
      55    m2f["master"] = "mistress"
      56    m2f["Master"] = "Mistress"
      57    m2f["hero"] = "heroine"
      58    m2f["Hero"] = "Heroine"
      59    m2f["actor"] = "actress"
      60    m2f["Actor"] = "Actress"
      61    m2f["king"] = "queen"
      62    m2f["King"] = "Queen"
      63    
      64    # Get additional entities from external file(s):
      65    for filename in m2f_entities:
      66        f = open(os.path.expanduser(filename))
      67        while 1:
      68            d = f.readline()
      69            parts = string.split(d)
      70            for i in range(len(parts)):
      71                if regexp['other']['open_entity'].match(parts[i]):
      72                    m2f[parts[i+1]] = parts[i+2]
      73                    pass
      74                pass
      75            if regexp['other']['open_document_tag'].search(d): break
      76            pass
      77        f.close()
      78        pass
      79    
      80    regexp['m2f'] = {}
      81    for key in m2f.keys():
      82        regexp['m2f'][key] = re.compile(key)
      83    
      84        # Do not include female words:
      85        # regexp['m2f'][m2f[key]] = re.compile(m2f[key])
      86        pass
      87    
      88    def printit_fn(description, word, line):
      89        # Ignore marked words, e.g.: man,<!--m2f-->
      90        word = string.rstrip(word)
      91        s = '<!--m2f-->'
      92        if not word[-len(s):] == s:
      93            if debug: print description, word
      94            return 1
      95        else:
      96            return 0
      97        pass
      98    
      99    for line in fileinput.input():
     100        printit = 0
     101        words = string.split(line)
     102        for word in words:
     103            for exp in regexp['m2f'].keys():
     104                start = 0
     105                matchobj = regexp['m2f'][exp].search(word[start:])
     106                while matchobj:
     107    
     108                    # If either start or .start() is zero ...
     109                    if start - matchobj.start() == 0:
     110                        if len(matchobj.group()) == len(word):
     111    
     112                            printit = printit + printit_fn(
     113                                'Exact match:', word, line)
     114                            pass
     115                        else:
     116                            # If character after match is not a letter ...
     117                            if not regexp['other']['alphas'].match(
     118                                word[matchobj.end()]):
     119    
     120                                printit = printit + printit_fn(
     121                                    'Leading match:', word, line)
     122                                pass
     123                            pass
     124                        pass
     125    
     126                    # If character before match is not a letter ...
     127                    elif not regexp['other']['alphas'].match(
     128                        word[matchobj.start() - 1 + start]):
     129    
     130                        if len(word) == start + matchobj.end():
     131                            printit = printit + printit_fn(
     132                                'Trailing match:', word, line)
     133                            pass
     134    
     135                        # If character after match is not a letter ...
     136                        elif not regexp['other']['alphas'].match(
     137                            word[start + matchobj.end()]):
     138    
     139                            # If match isn't surrounded by &; ...
     140                            if not ('&' == word[matchobj.start() - 1 + start]
     141                                      and
     142                                      ';' == word[start + matchobj.end()]):
     143    
     144                                printit = printit + printit_fn(
     145                                    'Internal match:', word, line)
     146                                pass
     147                            pass
     148                        pass
     149    
     150                    start = start + matchobj.end()
     151                    matchobj = regexp['m2f'][exp].search(word[start:])
     152                    pass
     153                pass
     154            pass
     155    
     156        if not debug and printit > 0: print string.join([
     157            fileinput.filename(),
     158            ':',
     159            str(fileinput.filelineno()),
     160            ': ',
     161            string.rstrip(line)], '')
     162    
     163        # if fileinput.lineno() == 300: break
     164        # pass
     165    
     166    ###
     167    #