2 # -*- coding: UTF-8 -*-
13 from xml.dom.minidom import parse
14 from twisted.web.client import getPage #@UnresolvedImport
15 from twisted.internet import reactor #@UnresolvedImport
18 def html2unicode(in_html, charset):
19 # first convert some WML codes from hex: e.g. ä -> ä
20 htmlentityhexnumbermask = re.compile('(&#x(..);)')
21 entities = htmlentityhexnumbermask.finditer(in_html)
23 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
25 htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
27 entities = htmlentitynamemask.finditer(in_html)
29 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
30 entitydict[x.group(1)] = x.group(2)
31 for key, name in entitydict.items():
33 entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
35 debug("[Callhtml2utf8] KeyError " + key + "/" + name)
37 htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
38 entities = htmlentitynumbermask.finditer(in_html)
40 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
41 entitydict[x.group(1)] = x.group(2)
42 for key, codepoint in entitydict.items():
44 uml = unichr(int(codepoint))
45 debug("[nrzuname] html2utf8: replace %s with %s in %s" %(repr(key), repr(uml), repr(in_html[0:20]+'...')))
46 in_html = in_html.replace(key, uml)
48 debug("[nrzuname] html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
51 def normalizePhoneNumber(intNo):
52 found = re.match('^\+(.*)', intNo)
54 intNo = '00' + found.group(1)
55 intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
56 found = re.match('.*?([0-9]+)', intNo)
62 def out(number, caller):
63 debug("[nrzuname] out: %s: %s" %(number, caller))
64 found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
67 ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
76 if strasse or hnr or plz or ort:
82 if (strasse or hnr) and (plz or ort):
85 name += plz + ' ' + ort
93 def simpleout(number, caller): #@UnusedVariable # pylint: disable-msg=W0613
97 from Tools.Directories import resolveFilename, SCOPE_PLUGINS
98 reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/NcidClient/reverselookup.xml")
100 reverseLookupFileName = "reverselookup.xml"
103 reverselookupMtime = 0
105 class ReverseLookupAndNotify:
106 def __init__(self, number, notificationCallback=out, charset="cp1252", countrycode = "0049"):
107 debug("[ReverseLookupAndNotify] reverse Lookup for %s!" %number)
109 self.notificationCallback = notificationCallback
111 self.currentWebsite = None
112 self.nextWebsiteNo = 0
113 #===============================================================================
114 # sorry does not work at all
116 # charset = sys.getdefaultencoding()
117 # debug("[ReverseLookupAndNotify] set charset from system: %s!" %charset)
118 #===============================================================================
119 self.charset = charset
121 global reverselookupMtime
122 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
123 if not countries or reverselookupMtimeAct > reverselookupMtime:
124 debug("[ReverseLookupAndNotify] (Re-)Reading %s\n" %reverseLookupFileName)
125 reverselookupMtime = reverselookupMtimeAct
126 dom = parse(reverseLookupFileName)
127 for top in dom.getElementsByTagName("reverselookup"):
128 for country in top.getElementsByTagName("country"):
129 code = country.getAttribute("code").replace("+","00")
130 countries[code] = country.getElementsByTagName("website")
132 self.countrycode = countrycode
134 if re.match('^\+', self.number):
135 self.number = '00' + self.number[1:]
137 if self.number[:len(countrycode)] == countrycode:
138 self.number = '0' + self.number[len(countrycode):]
141 self.notifyAndReset()
144 if self.number[:2] == "00":
145 if countries.has_key(self.number[:3]): # e.g. USA
146 self.countrycode = self.number[:3]
147 elif countries.has_key(self.number[:4]):
148 self.countrycode = self.number[:4]
149 elif countries.has_key(self.number[:5]):
150 self.countrycode = self.number[:5]
152 debug("[ReverseLookupAndNotify] Country cannot be reverse handled")
153 self.notifyAndReset()
156 if countries.has_key(self.countrycode):
157 debug("[ReverseLookupAndNotify] Found website for reverse lookup")
158 self.websites = countries[self.countrycode]
159 self.nextWebsiteNo = 1
160 self.handleWebsite(self.websites[0])
162 debug("[ReverseLookupAndNotify] Country cannot be reverse handled")
163 self.notifyAndReset()
166 def handleWebsite(self, website):
167 debug("[ReverseLookupAndNotify] handleWebsite: " + website.getAttribute("name"))
168 if self.number[:2] == "00":
169 number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
173 url = website.getAttribute("url")
174 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
175 debug("[ReverseLookupAndNotify] handleWebsite: (PFX)ARECODE cannot be handled")
176 # self.caller = _("UNKNOWN")
177 self.notifyAndReset()
180 # Apparently, there is no attribute called (pfx)areacode anymore
181 # So, this below will not work.
183 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
184 areaCodeLen = int(website.getAttribute("areacode"))
185 url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
186 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
187 areaCodeLen = int(website.getAttribute("pfxareacode"))
188 url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
189 url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
190 elif re.search('\\$NUMBER', url):
191 url = url.replace("$NUMBER","%s") %number
193 debug("[ReverseLookupAndNotify] handleWebsite: cannot handle websites with no $NUMBER in url")
194 # self.caller = _("UNKNOWN")
195 self.notifyAndReset()
197 debug("[ReverseLookupAndNotify] Url to query: " + url)
198 url = url.encode("UTF-8", "replace")
199 self.currentWebsite = website
201 agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
202 ).addCallback(self._gotPage).addErrback(self._gotError)
205 def _gotPage(self, page):
207 item = text.replace("%20"," ").replace(" "," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
209 item = html2unicode(item, self.charset)
210 #===================================================================
211 # try: # this works under Windows
212 # item = item.encode('iso-8859-1')
213 # except UnicodeEncodeError:
214 # debug("[ReverseLookupAndNotify] cleanName: encoding problem with iso8859")
215 # try: # this works under Enigma2
216 # item = item.encode('utf-8')
217 # except UnicodeEncodeError:
218 # debug("[ReverseLookupAndNotify] cleanName: encoding problem with utf-8")
220 # item = item.encode(self.charset)
221 # except UnicodeEncodeError:
222 # # debug("[ReverseLookupAndNotify] cleanName: " + traceback.format_exc())
223 # debug("[ReverseLookupAndNotify] cleanName: encoding problem")
224 #===================================================================
226 newitem = item.replace(" ", " ")
227 while newitem != item:
229 newitem = item.replace(" ", " ")
230 return newitem.strip()
232 debug("[ReverseLookupAndNotify] _gotPage")
233 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
235 debug("[ReverseLookupAndNotify] Charset: " + found.group(1))
236 page = page.replace("\xa0"," ").decode(found.group(1), "replace")
238 debug("[ReverseLookupAndNotify] Default Charset: iso-8859-1")
239 page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
241 for entry in self.currentWebsite.getElementsByTagName("entry"):
243 # for the sites delivering fuzzy matches, we check against the returned number
245 pat = self.getPattern(entry, "number")
248 debug("[ReverseLookupAndNotify] _gotPage: look for number with '''%s'''" %( pat ))
249 found = re.match(pat, page, re.S|re.M)
251 if self.number[:2] == '00':
252 number = '0' + self.number[4:]
255 if number != normalizePhoneNumber(found.group(1)):
256 debug("[ReverseLookupAndNotify] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
259 # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
266 pat = self.getPattern(entry, "lastname")
269 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
270 found = re.match(pat, page, re.S|re.M)
272 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
273 name = cleanName(found.group(1))
275 pat = self.getPattern(entry, "firstname")
278 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
279 found = re.match(pat, page, re.S|re.M)
281 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
282 firstname = cleanName(found.group(1)).strip()
285 pat = ".*?" + self.getPattern(entry, "name")
286 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
287 found = re.match(pat, page, re.S|re.M)
289 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
290 item = cleanName(found.group(1))
291 # debug("[ReverseLookupAndNotify] _gotPage: name: " + item)
293 firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
294 # debug("[ReverseLookupAndNotify] _gotPage: swapFirstAndLastName: " + firstNameFirst)
295 if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
296 found = re.match('(.*?)\s+(.*)', name)
298 firstname = found.group(1)
299 name = found.group(2)
301 debug("[ReverseLookupAndNotify] _gotPage: no name found, skipping")
307 pat = ".*?" + self.getPattern(entry, "city")
308 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
309 found = re.match(pat, page, re.S|re.M)
311 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
312 item = cleanName(found.group(1))
313 debug("[ReverseLookupAndNotify] _gotPage: city: " + item)
319 pat = ".*?" + self.getPattern(entry, "zipcode")
320 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
321 found = re.match(pat, page, re.S|re.M)
322 if found and found.group(1):
323 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
324 item = cleanName(found.group(1))
325 debug("[ReverseLookupAndNotify] _gotPage: zipcode: " + item)
326 zipcode = item.strip()
328 pat = ".*?" + self.getPattern(entry, "street")
329 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
330 found = re.match(pat, page, re.S|re.M)
331 if found and found.group(1):
332 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
333 item = cleanName(found.group(1))
334 debug("[ReverseLookupAndNotify] _gotPage: street: " + item)
335 street = item.strip()
337 found = re.match("^(.+) ([-\d]+)$", street, re.S)
339 street = found.group(1)
340 streetno = found.group(2)
341 #===============================================================
343 # found = re.match("^(\d+) (.+)$", street, re.S)
345 # street = found.group(2)
346 # streetno = found.group(1)
347 #===============================================================
349 self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
350 debug("[ReverseLookupAndNotify] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
352 self.notifyAndReset()
355 self._gotError("[ReverseLookupAndNotify] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
358 def _gotError(self, error = ""):
359 debug("[ReverseLookupAndNotify] _gotError - Error: %s" %error)
360 if self.nextWebsiteNo >= len(self.websites):
361 debug("[ReverseLookupAndNotify] _gotError: I give up")
362 # self.caller = _("UNKNOWN")
363 self.notifyAndReset()
366 debug("[ReverseLookupAndNotify] _gotError: try next website")
367 self.nextWebsiteNo = self.nextWebsiteNo+1
368 self.handleWebsite(self.websites[self.nextWebsiteNo-1])
370 def getPattern(self, website, which):
371 pat1 = website.getElementsByTagName(which)
376 debug("[ReverseLookupAndNotify] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
377 return pat1[0].childNodes[0].data
379 def notifyAndReset(self):
380 debug("[ReverseLookupAndNotify] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
381 # debug("1: " + repr(self.caller))
384 debug("2: " + repr(self.caller))
385 self.caller = self.caller.encode(self.charset, 'replace')
386 debug("3: " + repr(self.caller))
387 except UnicodeDecodeError:
388 debug("[ReverseLookupAndNotify] cannot encode?!?!")
389 # self.caller = unicode(self.caller)
390 # debug("4: " + repr(self.caller))
391 self.notificationCallback(self.number, self.caller)
393 self.notificationCallback(self.number, "")
394 if __name__ == '__main__':
395 reactor.stop() #@UndefinedVariable # pylint: disable-msg=E1101
397 if __name__ == '__main__':
398 cwd = os.path.dirname(sys.argv[0])
399 if (len(sys.argv) == 2):
401 ReverseLookupAndNotify(sys.argv[1], simpleout)
402 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
403 elif (len(sys.argv) == 3):
404 # nrzuname.py Nummer Charset
406 ReverseLookupAndNotify(sys.argv[1], out, sys.argv[2])
407 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101