1 # -*- coding: utf-8 -*-
3 ldif - generate and parse LDIF data (see RFC 2849)
5 See http://python-ldap.sourceforge.net for details.
9 Python compability note:
10 Tested with Python 2.0+, but should work with Python 1.5.2+.
19 'AttrTypeandValueLDIF','CreateLDIF','ParseLDIF',
27 import urlparse,urllib,base64,re,types
30 from cStringIO import StringIO
32 from StringIO import StringIO
34 attrtype_pattern = r'[\w;.]+(;[\w_-]+)*'
35 attrvalue_pattern = r'(([^,]|\\,)+|".*?")'
36 rdn_pattern = attrtype_pattern + r'[ ]*=[ ]*' + attrvalue_pattern
37 dn_pattern = rdn_pattern + r'([ ]*,[ ]*' + rdn_pattern + r')*[ ]*'
38 dn_regex = re.compile('^%s$' % dn_pattern)
40 ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars()
43 'add':0,'delete':1,'replace':2
47 0:'add',1:'delete',2:'replace'
50 CHANGE_TYPES = ['add','delete','modify','modrdn']
51 valid_changetype_dict = {}
52 for c in CHANGE_TYPES:
53 valid_changetype_dict[c]=None
56 SAFE_STRING_PATTERN = '(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)'
57 safe_string_re = re.compile(SAFE_STRING_PATTERN)
61 returns 1 if s is a LDAP DN
65 rm = dn_regex.match(s)
66 return rm!=None and rm.group(0)==s
71 returns 1 if s has to be base-64 encoded because of special chars
73 return not safe_string_re.search(s) is None
78 return a dictionary with all items of l being the keys of the dictionary
80 return dict([(i,None) for i in l])
85 Write LDIF entry or change records to file object
86 Copy LDIF input to a file output object containing all data retrieved
90 def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'):
93 file object for output
95 list of attribute types to be base64-encoded in any case
97 Specifies how many columns a line may have before it's
98 folded into many lines.
100 String used as line separator
102 self._output_file = output_file
103 self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
105 self._line_sep = line_sep
106 self.records_written = 0
108 def _unfoldLDIFLine(self,line):
110 Write string line as one or more folded lines
112 # Check maximum line length
114 if line_len<=self._cols:
115 self._output_file.write(line)
116 self._output_file.write(self._line_sep)
120 self._output_file.write(line[0:min(line_len,self._cols)])
121 self._output_file.write(self._line_sep)
123 self._output_file.write(' ')
124 self._output_file.write(line[pos:min(line_len,pos+self._cols-1)])
125 self._output_file.write(self._line_sep)
126 pos = pos+self._cols-1
127 return # _unfoldLDIFLine()
129 def _unparseAttrTypeandValue(self,attr_type,attr_value):
131 Write a single attribute type/value pair
138 if self._base64_attrs.has_key(attr_type.lower()) or \
139 needs_base64(attr_value):
141 self._unfoldLDIFLine(':: '.join([attr_type,base64.encodestring(attr_value).replace('\n','')]))
143 self._unfoldLDIFLine(': '.join([attr_type,attr_value]))
144 return # _unparseAttrTypeandValue()
146 def _unparseEntryRecord(self,entry):
149 dictionary holding an entry
151 attr_types = entry.keys()[:]
153 for attr_type in attr_types:
154 for attr_value in entry[attr_type]:
155 self._unparseAttrTypeandValue(attr_type,attr_value)
157 def _unparseChangeRecord(self,modlist):
160 list of additions (2-tuple) or modifications (3-tuple)
162 mod_len = len(modlist[0])
166 changetype = 'modify'
168 raise ValueError,"modlist item of wrong length"
169 self._unparseAttrTypeandValue('changetype',changetype)
172 mod_type,mod_vals = mod
174 mod_op,mod_type,mod_vals = mod
175 self._unparseAttrTypeandValue(MOD_OP_STR[mod_op],mod_type)
177 raise ValueError,"Subsequent modlist item of wrong length"
179 for mod_val in mod_vals:
180 self._unparseAttrTypeandValue(mod_type,mod_val)
182 self._output_file.write('-'+self._line_sep)
184 def unparse(self,dn,record):
187 string-representation of distinguished name
189 Either a dictionary holding the LDAP entry {attrtype:record}
190 or a list with a modify list like for LDAPObject.modify().
193 # Simply ignore empty records
195 # Start with line containing the distinguished name
196 self._unparseAttrTypeandValue('dn',dn)
197 # Dispatch to record type specific writers
198 if isinstance(record,types.DictType):
199 self._unparseEntryRecord(record)
200 elif isinstance(record,types.ListType):
201 self._unparseChangeRecord(record)
203 raise ValueError, "Argument record must be dictionary or list"
204 # Write empty line separating the records
205 self._output_file.write(self._line_sep)
206 # Count records written
207 self.records_written = self.records_written+1
211 def CreateLDIF(dn,record,base64_attrs=None,cols=76):
213 Create LDIF single formatted record including trailing empty line.
214 This is a compability function. Use is deprecated!
217 string-representation of distinguished name
219 Either a dictionary holding the LDAP entry {attrtype:record}
220 or a list with a modify list like for LDAPObject.modify().
222 list of attribute types to be base64-encoded in any case
224 Specifies how many columns a line may have before it's
225 folded into many lines.
228 ldif_writer = LDIFWriter(f,base64_attrs,cols,'\n')
229 ldif_writer.unparse(dn,record)
237 Base class for a LDIF parser. Applications should sub-class this
238 class and override method handle() to implement something meaningful.
240 Public class attributes:
242 Counter for records processed so far
245 def _stripLineSep(self,s):
247 Strip trailing line separators from s, but no other whitespaces
259 ignored_attr_types=None,
261 process_url_schemes=None,
267 File-object to read the LDIF input from
269 Attributes with these attribute type names will be ignored.
271 If non-zero specifies the maximum number of entries to be
274 List containing strings with URLs schemes to process with urllib.
275 An empty list turns off all URL processing and the attribute
276 is ignored completely.
278 String used as line separator
280 self._input_file = input_file
281 self._max_entries = max_entries
282 self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
283 self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
284 self._line_sep = line_sep
285 self.records_read = 0
287 def handle(self,dn,entry):
289 Process a single content LDIF record. This method should be
290 implemented by applications using LDIFParser.
293 def _unfoldLDIFLine(self):
295 Unfold several folded lines with trailing space into one line
297 unfolded_lines = [ self._stripLineSep(self._line) ]
298 self._line = self._input_file.readline()
299 while self._line and self._line[0]==' ':
300 unfolded_lines.append(self._stripLineSep(self._line[1:]))
301 self._line = self._input_file.readline()
302 return ''.join(unfolded_lines)
304 def _parseAttrTypeandValue(self):
306 Parse a single attribute type and value pair from one or
307 more lines of LDIF data
309 # Reading new attribute line
310 unfolded_line = self._unfoldLDIFLine()
311 # Ignore comments which can also be folded
312 while unfolded_line and unfolded_line[0]=='#':
313 unfolded_line = self._unfoldLDIFLine()
314 if not unfolded_line or unfolded_line=='\n' or unfolded_line=='\r\n':
317 colon_pos = unfolded_line.index(':')
319 # Treat malformed lines without colon as non-existent
321 attr_type = unfolded_line[0:colon_pos]
322 # if needed attribute value is BASE64 decoded
323 value_spec = unfolded_line[colon_pos:colon_pos+2]
325 # attribute value needs base64-decoding
326 attr_value = base64.decodestring(unfolded_line[colon_pos+2:])
327 elif value_spec==':<':
328 # fetch attribute value from URL
329 url = unfolded_line[colon_pos+2:].strip()
331 if self._process_url_schemes:
332 u = urlparse.urlparse(url)
333 if self._process_url_schemes.has_key(u[0]):
334 attr_value = urllib.urlopen(url).read()
335 elif value_spec==':\r\n' or value_spec=='\n':
338 attr_value = unfolded_line[colon_pos+2:].lstrip()
339 return attr_type,attr_value
343 Continously read and parse LDIF records
345 self._line = self._input_file.readline()
347 while self._line and \
348 (not self._max_entries or self.records_read<self._max_entries):
351 version = None; dn = None; changetype = None; modop = None; entry = {}
353 attr_type,attr_value = self._parseAttrTypeandValue()
355 while attr_type!=None and attr_value!=None:
357 # attr type and value pair was DN of LDIF record
359 raise ValueError, 'Two lines starting with dn: in one record.'
360 if not is_dn(attr_value):
361 raise ValueError, 'No valid string-representation of distinguished name %s.' % (repr(attr_value))
363 elif attr_type=='version' and dn is None:
365 elif attr_type=='changetype':
366 # attr type and value pair was DN of LDIF record
368 raise ValueError, 'Read changetype: before getting valid dn: line.'
370 raise ValueError, 'Two lines starting with changetype: in one record.'
371 if not valid_changetype_dict.has_key(attr_value):
372 raise ValueError, 'changetype value %s is invalid.' % (repr(attr_value))
373 changetype = attr_value
374 elif attr_value!=None and \
375 not self._ignored_attr_types.has_key(attr_type.lower()):
376 # Add the attribute to the entry if not ignored attribute
377 if entry.has_key(attr_type):
378 entry[attr_type].append(attr_value)
380 entry[attr_type]=[attr_value]
382 # Read the next line within an entry
383 attr_type,attr_value = self._parseAttrTypeandValue()
386 # append entry to result list
387 self.handle(dn,entry)
388 self.records_read = self.records_read+1
393 class LDIFRecordList(LDIFParser):
395 Collect all records of LDIF input into a single list.
396 of 2-tuples (dn,entry). It can be a memory hog!
402 ignored_attr_types=None,max_entries=0,process_url_schemes=None
405 See LDIFParser.__init__()
407 Additional Parameters:
409 List instance for storing parsed records
411 LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
412 self.all_records = []
414 def handle(self,dn,entry):
416 Append single record to dictionary of all records.
418 self.all_records.append((dn,entry))
421 class LDIFCopy(LDIFParser):
423 Copy LDIF input to LDIF output containing all data retrieved
429 input_file,output_file,
430 ignored_attr_types=None,max_entries=0,process_url_schemes=None,
431 base64_attrs=None,cols=76,line_sep='\n'
434 See LDIFParser.__init__() and LDIFWriter.__init__()
436 LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
437 self._output_ldif = LDIFWriter(output_file,base64_attrs,cols,line_sep)
439 def handle(self,dn,entry):
441 Write single LDIF record to output file.
443 self._output_ldif.unparse(dn,entry)
446 def ParseLDIF(f,ignore_attrs=None,maxentries=0):
448 Parse LDIF records read from file.
449 This is a compability function. Use is deprecated!
451 ldif_parser = LDIFRecordList(
452 f,ignored_attr_types=ignore_attrs,max_entries=maxentries,process_url_schemes=0
455 return ldif_parser.all_records