code.vuplus.com Git - vuplus_dvbapp-plugin/blob - fritzcall/src/ldif.py

   1 # -*- coding: utf-8 -*-
   2 """
   3 ldif - generate and parse LDIF data (see RFC 2849)
   4
   5 See http://python-ldap.sourceforge.net for details.
   6
   7 $Id$
   8
   9 Python compability note:
  10 Tested with Python 2.0+, but should work with Python 1.5.2+.
  11 """
  12
  13 __version__ = '0.5.5'
  14
  15 __all__ = [
  16   # constants
  17   'ldif_pattern',
  18   # functions
  19   'AttrTypeandValueLDIF','CreateLDIF','ParseLDIF',
  20   # classes
  21   'LDIFWriter',
  22   'LDIFParser',
  23   'LDIFRecordList',
  24   'LDIFCopy',
  25 ]
  26
  27 import urlparse,urllib,base64,re,types
  28
  29 try:
  30   from cStringIO import StringIO
  31 except ImportError:
  32   from StringIO import StringIO
  33
  34 attrtype_pattern = r'[\w;.]+(;[\w_-]+)*'
  35 attrvalue_pattern = r'(([^,]|\\,)+|".*?")'
  36 rdn_pattern = attrtype_pattern + r'[ ]*=[ ]*' + attrvalue_pattern
  37 dn_pattern   = rdn_pattern + r'([ ]*,[ ]*' + rdn_pattern + r')*[ ]*'
  38 dn_regex   = re.compile('^%s$' % dn_pattern)
  39
  40 ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars()
  41
  42 MOD_OP_INTEGER = {
  43   'add':0,'delete':1,'replace':2
  44 }
  45
  46 MOD_OP_STR = {
  47   0:'add',1:'delete',2:'replace'
  48 }
  49
  50 CHANGE_TYPES = ['add','delete','modify','modrdn']
  51 valid_changetype_dict = {}
  52 for c in CHANGE_TYPES:
  53   valid_changetype_dict[c]=None
  54
  55
  56 SAFE_STRING_PATTERN = '(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)'
  57 safe_string_re = re.compile(SAFE_STRING_PATTERN)
  58
  59 def is_dn(s):
  60   """
  61   returns 1 if s is a LDAP DN
  62   """
  63   if s=='':
  64     return 1
  65   rm = dn_regex.match(s)
  66   return rm!=None and rm.group(0)==s
  67
  68
  69 def needs_base64(s):
  70   """
  71   returns 1 if s has to be base-64 encoded because of special chars
  72   """
  73   return not safe_string_re.search(s) is None
  74
  75
  76 def list_dict(l):
  77   """
  78   return a dictionary with all items of l being the keys of the dictionary
  79   """
  80   return dict([(i,None) for i in l])
  81
  82
  83 class LDIFWriter:
  84   """
  85   Write LDIF entry or change records to file object
  86   Copy LDIF input to a file output object containing all data retrieved
  87   via URLs
  88   """
  89
  90   def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'):
  91     """
  92     output_file
  93         file object for output
  94     base64_attrs
  95         list of attribute types to be base64-encoded in any case
  96     cols
  97         Specifies how many columns a line may have before it's
  98         folded into many lines.
  99     line_sep
 100         String used as line separator
 101     """
 102     self._output_file = output_file
 103     self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
 104     self._cols = cols
 105     self._line_sep = line_sep
 106     self.records_written = 0
 107
 108   def _unfoldLDIFLine(self,line):
 109     """
 110     Write string line as one or more folded lines
 111     """
 112     # Check maximum line length
 113     line_len = len(line)
 114     if line_len<=self._cols:
 115       self._output_file.write(line)
 116       self._output_file.write(self._line_sep)
 117     else:
 118       # Fold line
 119       pos = self._cols
 120       self._output_file.write(line[0:min(line_len,self._cols)])
 121       self._output_file.write(self._line_sep)
 122       while pos<line_len:
 123         self._output_file.write(' ')
 124         self._output_file.write(line[pos:min(line_len,pos+self._cols-1)])
 125         self._output_file.write(self._line_sep)
 126         pos = pos+self._cols-1
 127     return # _unfoldLDIFLine()
 128
 129   def _unparseAttrTypeandValue(self,attr_type,attr_value):
 130     """
 131     Write a single attribute type/value pair
 132
 133     attr_type
 134           attribute type
 135     attr_value
 136           attribute value
 137     """
 138     if self._base64_attrs.has_key(attr_type.lower()) or \
 139        needs_base64(attr_value):
 140       # Encode with base64
 141       self._unfoldLDIFLine(':: '.join([attr_type,base64.encodestring(attr_value).replace('\n','')]))
 142     else:
 143       self._unfoldLDIFLine(': '.join([attr_type,attr_value]))
 144     return # _unparseAttrTypeandValue()
 145
 146   def _unparseEntryRecord(self,entry):
 147     """
 148     entry
 149         dictionary holding an entry
 150     """
 151     attr_types = entry.keys()[:]
 152     attr_types.sort()
 153     for attr_type in attr_types:
 154       for attr_value in entry[attr_type]:
 155         self._unparseAttrTypeandValue(attr_type,attr_value)
 156
 157   def _unparseChangeRecord(self,modlist):
 158     """
 159     modlist
 160         list of additions (2-tuple) or modifications (3-tuple)
 161     """
 162     mod_len = len(modlist[0])
 163     if mod_len==2:
 164       changetype = 'add'
 165     elif mod_len==3:
 166       changetype = 'modify'
 167     else:
 168       raise ValueError,"modlist item of wrong length"
 169     self._unparseAttrTypeandValue('changetype',changetype)
 170     for mod in modlist:
 171       if mod_len==2:
 172         mod_type,mod_vals = mod
 173       elif mod_len==3:
 174         mod_op,mod_type,mod_vals = mod
 175         self._unparseAttrTypeandValue(MOD_OP_STR[mod_op],mod_type)
 176       else:
 177         raise ValueError,"Subsequent modlist item of wrong length"
 178       if mod_vals:
 179         for mod_val in mod_vals:
 180           self._unparseAttrTypeandValue(mod_type,mod_val)
 181       if mod_len==3:
 182         self._output_file.write('-'+self._line_sep)
 183
 184   def unparse(self,dn,record):
 185     """
 186     dn
 187           string-representation of distinguished name
 188     record
 189           Either a dictionary holding the LDAP entry {attrtype:record}
 190           or a list with a modify list like for LDAPObject.modify().
 191     """
 192     if not record:
 193       # Simply ignore empty records
 194       return
 195     # Start with line containing the distinguished name
 196     self._unparseAttrTypeandValue('dn',dn)
 197     # Dispatch to record type specific writers
 198     if isinstance(record,types.DictType):
 199       self._unparseEntryRecord(record)
 200     elif isinstance(record,types.ListType):
 201       self._unparseChangeRecord(record)
 202     else:
 203       raise ValueError, "Argument record must be dictionary or list"
 204     # Write empty line separating the records
 205     self._output_file.write(self._line_sep)
 206     # Count records written
 207     self.records_written = self.records_written+1
 208     return # unparse()
 209
 210
 211 def CreateLDIF(dn,record,base64_attrs=None,cols=76):
 212   """
 213   Create LDIF single formatted record including trailing empty line.
 214   This is a compability function. Use is deprecated!
 215
 216   dn
 217         string-representation of distinguished name
 218   record
 219         Either a dictionary holding the LDAP entry {attrtype:record}
 220         or a list with a modify list like for LDAPObject.modify().
 221   base64_attrs
 222         list of attribute types to be base64-encoded in any case
 223   cols
 224         Specifies how many columns a line may have before it's
 225         folded into many lines.
 226   """
 227   f = StringIO()
 228   ldif_writer = LDIFWriter(f,base64_attrs,cols,'\n')
 229   ldif_writer.unparse(dn,record)
 230   s = f.getvalue()
 231   f.close()
 232   return s
 233
 234
 235 class LDIFParser:
 236   """
 237   Base class for a LDIF parser. Applications should sub-class this
 238   class and override method handle() to implement something meaningful.
 239
 240   Public class attributes:
 241   records_read
 242         Counter for records processed so far
 243   """
 244
 245   def _stripLineSep(self,s):
 246     """
 247     Strip trailing line separators from s, but no other whitespaces
 248     """
 249     if s[-2:]=='\r\n':
 250       return s[:-2]
 251     elif s[-1:]=='\n':
 252       return s[:-1]
 253     else:
 254       return s
 255
 256   def __init__(
 257     self,
 258     input_file,
 259     ignored_attr_types=None,
 260     max_entries=0,
 261     process_url_schemes=None,
 262     line_sep='\n'
 263   ):
 264     """
 265     Parameters:
 266     input_file
 267         File-object to read the LDIF input from
 268     ignored_attr_types
 269         Attributes with these attribute type names will be ignored.
 270     max_entries
 271         If non-zero specifies the maximum number of entries to be
 272         read from f.
 273     process_url_schemes
 274         List containing strings with URLs schemes to process with urllib.
 275         An empty list turns off all URL processing and the attribute
 276         is ignored completely.
 277     line_sep
 278         String used as line separator
 279     """
 280     self._input_file = input_file
 281     self._max_entries = max_entries
 282     self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
 283     self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
 284     self._line_sep = line_sep
 285     self.records_read = 0
 286
 287   def handle(self,dn,entry):
 288     """
 289     Process a single content LDIF record. This method should be
 290     implemented by applications using LDIFParser.
 291     """
 292
 293   def _unfoldLDIFLine(self):
 294     """
 295     Unfold several folded lines with trailing space into one line
 296     """
 297     unfolded_lines = [ self._stripLineSep(self._line) ]
 298     self._line = self._input_file.readline()
 299     while self._line and self._line[0]==' ':
 300       unfolded_lines.append(self._stripLineSep(self._line[1:]))
 301       self._line = self._input_file.readline()
 302     return ''.join(unfolded_lines)
 303
 304   def _parseAttrTypeandValue(self):
 305     """
 306     Parse a single attribute type and value pair from one or
 307     more lines of LDIF data
 308     """
 309     # Reading new attribute line
 310     unfolded_line = self._unfoldLDIFLine()
 311     # Ignore comments which can also be folded
 312     while unfolded_line and unfolded_line[0]=='#':
 313       unfolded_line = self._unfoldLDIFLine()
 314     if not unfolded_line or unfolded_line=='\n' or unfolded_line=='\r\n':
 315       return None,None
 316     try:
 317       colon_pos = unfolded_line.index(':')
 318     except ValueError:
 319       # Treat malformed lines without colon as non-existent
 320       return None,None
 321     attr_type = unfolded_line[0:colon_pos]
 322     # if needed attribute value is BASE64 decoded
 323     value_spec = unfolded_line[colon_pos:colon_pos+2]
 324     if value_spec=='::':
 325       # attribute value needs base64-decoding
 326       attr_value = base64.decodestring(unfolded_line[colon_pos+2:])
 327     elif value_spec==':<':
 328       # fetch attribute value from URL
 329       url = unfolded_line[colon_pos+2:].strip()
 330       attr_value = None
 331       if self._process_url_schemes:
 332         u = urlparse.urlparse(url)
 333         if self._process_url_schemes.has_key(u[0]):
 334           attr_value = urllib.urlopen(url).read()
 335     elif value_spec==':\r\n' or value_spec=='\n':
 336       attr_value = ''
 337     else:
 338       attr_value = unfolded_line[colon_pos+2:].lstrip()
 339     return attr_type,attr_value
 340
 341   def parse(self):
 342     """
 343     Continously read and parse LDIF records
 344     """
 345     self._line = self._input_file.readline()
 346
 347     while self._line and \
 348           (not self._max_entries or self.records_read<self._max_entries):
 349
 350       # Reset record
 351       version = None; dn = None; changetype = None; modop = None; entry = {}
 352
 353       attr_type,attr_value = self._parseAttrTypeandValue()
 354
 355       while attr_type!=None and attr_value!=None:
 356         if attr_type=='dn':
 357           # attr type and value pair was DN of LDIF record
 358           if dn!=None:
 359             raise ValueError, 'Two lines starting with dn: in one record.'
 360           if not is_dn(attr_value):
 361             raise ValueError, 'No valid string-representation of distinguished name %s.' % (repr(attr_value))
 362           dn = attr_value
 363         elif attr_type=='version' and dn is None:
 364           version = 1
 365         elif attr_type=='changetype':
 366           # attr type and value pair was DN of LDIF record
 367           if dn is None:
 368             raise ValueError, 'Read changetype: before getting valid dn: line.'
 369           if changetype!=None:
 370             raise ValueError, 'Two lines starting with changetype: in one record.'
 371           if not valid_changetype_dict.has_key(attr_value):
 372             raise ValueError, 'changetype value %s is invalid.' % (repr(attr_value))
 373           changetype = attr_value
 374         elif attr_value!=None and \
 375              not self._ignored_attr_types.has_key(attr_type.lower()):
 376           # Add the attribute to the entry if not ignored attribute
 377           if entry.has_key(attr_type):
 378             entry[attr_type].append(attr_value)
 379           else:
 380             entry[attr_type]=[attr_value]
 381
 382         # Read the next line within an entry
 383         attr_type,attr_value = self._parseAttrTypeandValue()
 384
 385       if entry:
 386         # append entry to result list
 387         self.handle(dn,entry)
 388         self.records_read = self.records_read+1
 389
 390     return # parse()
 391
 392
 393 class LDIFRecordList(LDIFParser):
 394   """
 395   Collect all records of LDIF input into a single list.
 396   of 2-tuples (dn,entry). It can be a memory hog!
 397   """
 398
 399   def __init__(
 400     self,
 401     input_file,
 402     ignored_attr_types=None,max_entries=0,process_url_schemes=None
 403   ):
 404     """
 405     See LDIFParser.__init__()
 406
 407     Additional Parameters:
 408     all_records
 409         List instance for storing parsed records
 410     """
 411     LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
 412     self.all_records = []
 413
 414   def handle(self,dn,entry):
 415     """
 416     Append single record to dictionary of all records.
 417     """
 418     self.all_records.append((dn,entry))
 419
 420
 421 class LDIFCopy(LDIFParser):
 422   """
 423   Copy LDIF input to LDIF output containing all data retrieved
 424   via URLs
 425   """
 426
 427   def __init__(
 428     self,
 429     input_file,output_file,
 430     ignored_attr_types=None,max_entries=0,process_url_schemes=None,
 431     base64_attrs=None,cols=76,line_sep='\n'
 432   ):
 433     """
 434     See LDIFParser.__init__() and LDIFWriter.__init__()
 435     """
 436     LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
 437     self._output_ldif = LDIFWriter(output_file,base64_attrs,cols,line_sep)
 438
 439   def handle(self,dn,entry):
 440     """
 441     Write single LDIF record to output file.
 442     """
 443     self._output_ldif.unparse(dn,entry)
 444
 445
 446 def ParseLDIF(f,ignore_attrs=None,maxentries=0):
 447   """
 448   Parse LDIF records read from file.
 449   This is a compability function. Use is deprecated!
 450   """
 451   ldif_parser = LDIFRecordList(
 452     f,ignored_attr_types=ignore_attrs,max_entries=maxentries,process_url_schemes=0
 453   )
 454   ldif_parser.parse()
 455   return ldif_parser.all_records