2 # -*- coding: UTF-8 -*-
11 from xml.dom.minidom import parse
12 from twisted.web.client import getPage #@UnresolvedImport
13 from twisted.internet import reactor #@UnresolvedImport
16 from . import debug #@UnresolvedImport # pylint: disable-msg=W0613,F0401
17 def setDebug(what): # pylint: disable-msg=W0613
29 def html2unicode(in_html, charset):
30 #===============================================================================
33 # in_html = in_html.decode('iso-8859-1')
34 # debug("[Callhtml2utf8] Converted from latin1")
36 # debug("[Callhtml2utf8] lost in translation from latin1")
39 # in_html = in_html.decode('utf-8')
40 # debug("[Callhtml2utf8] Converted from utf-8")
42 # debug("[Callhtml2utf8] lost in translation from utf-8")
44 #===============================================================================
46 # first convert some WML codes from hex: e.g. ä -> ä
47 htmlentityhexnumbermask = re.compile('(&#x(..);)')
48 entities = htmlentityhexnumbermask.finditer(in_html)
50 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
52 htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
54 entities = htmlentitynamemask.finditer(in_html)
56 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
57 entitydict[x.group(1)] = x.group(2)
58 for key, name in entitydict.items():
60 entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
62 debug("[Callhtml2utf8] KeyError " + key + "/" + name)
64 htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
65 entities = htmlentitynumbermask.finditer(in_html)
67 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
68 entitydict[x.group(1)] = x.group(2)
69 for key, codepoint in entitydict.items():
71 uml = unichr(int(codepoint))
72 debug("[nrzuname] html2utf8: replace %s with %s in %s" %(repr(key), repr(uml), repr(in_html[0:20]+'...')))
73 in_html = in_html.replace(key, uml)
75 debug("[nrzuname] html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
78 def normalizePhoneNumber(intNo):
79 found = re.match('^\+(.*)', intNo)
81 intNo = '00' + found.group(1)
82 intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
83 found = re.match('.*?([0-9]+)', intNo)
89 def out(number, caller):
90 debug("[nrzuname] out: %s: %s" %(number, caller))
91 found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
94 ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
102 name += ' ' + vorname
103 if strasse or hnr or plz or ort:
109 if (strasse or hnr) and (plz or ort):
112 name += plz + ' ' + ort
120 def simpleout(number, caller): #@UnusedVariable # pylint: disable-msg=W0613
124 from Tools.Directories import resolveFilename, SCOPE_PLUGINS
125 reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
127 reverseLookupFileName = "reverselookup.xml"
130 reverselookupMtime = 0
132 class ReverseLookupAndNotifier:
133 def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
134 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
136 self.outputFunction = outputFunction
138 self.currentWebsite = None
139 self.nextWebsiteNo = 0
140 #===============================================================================
141 # sorry does not work at all
143 # charset = sys.getdefaultencoding()
144 # debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
145 #===============================================================================
146 self.charset = charset
148 global reverselookupMtime
149 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
150 if not countries or reverselookupMtimeAct > reverselookupMtime:
151 debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
152 reverselookupMtime = reverselookupMtimeAct
153 dom = parse(reverseLookupFileName)
154 for top in dom.getElementsByTagName("reverselookup"):
155 for country in top.getElementsByTagName("country"):
156 code = country.getAttribute("code").replace("+","00")
157 countries[code] = country.getElementsByTagName("website")
159 self.countrycode = countrycode
161 if re.match('^\+', self.number):
162 self.number = '00' + self.number[1:]
164 if self.number[:len(countrycode)] == countrycode:
165 self.number = '0' + self.number[len(countrycode):]
168 # self.caller = _("UNKNOWN")
169 self.notifyAndReset()
172 if self.number[:2] == "00":
173 if countries.has_key(self.number[:3]): # e.g. USA
174 self.countrycode = self.number[:3]
175 elif countries.has_key(self.number[:4]):
176 self.countrycode = self.number[:4]
177 elif countries.has_key(self.number[:5]):
178 self.countrycode = self.number[:5]
180 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
181 # self.caller = _("UNKNOWN")
182 self.notifyAndReset()
185 if countries.has_key(self.countrycode):
186 debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
187 self.websites = countries[self.countrycode]
188 self.nextWebsiteNo = 1
189 self.handleWebsite(self.websites[0])
191 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
192 # self.caller = _("UNKNOWN")
193 self.notifyAndReset()
196 def handleWebsite(self, website):
197 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
198 if self.number[:2] == "00":
199 number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
203 url = website.getAttribute("url")
204 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
205 debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
206 # self.caller = _("UNKNOWN")
207 self.notifyAndReset()
210 # Apparently, there is no attribute called (pfx)areacode anymore
211 # So, this below will not work.
213 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
214 areaCodeLen = int(website.getAttribute("areacode"))
215 url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
216 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
217 areaCodeLen = int(website.getAttribute("pfxareacode"))
218 url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
219 url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
220 elif re.search('\\$NUMBER', url):
221 url = url.replace("$NUMBER","%s") %number
223 debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
224 # self.caller = _("UNKNOWN")
225 self.notifyAndReset()
227 debug("[ReverseLookupAndNotifier] Url to query: " + url)
228 url = url.encode("UTF-8", "replace")
229 self.currentWebsite = website
231 agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
232 ).addCallback(self._gotPage).addErrback(self._gotError)
235 def _gotPage(self, page):
237 item = text.replace("%20"," ").replace(" "," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
239 item = html2unicode(item, self.charset)
240 #===================================================================
241 # try: # this works under Windows
242 # item = item.encode('iso-8859-1')
243 # except UnicodeEncodeError:
244 # debug("[ReverseLookupAndNotifier] cleanName: encoding problem with iso8859")
245 # try: # this works under Enigma2
246 # item = item.encode('utf-8')
247 # except UnicodeEncodeError:
248 # debug("[ReverseLookupAndNotifier] cleanName: encoding problem with utf-8")
250 # item = item.encode(self.charset)
251 # except UnicodeEncodeError:
252 # # debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
253 # debug("[ReverseLookupAndNotifier] cleanName: encoding problem")
254 #===================================================================
256 newitem = item.replace(" ", " ")
257 while newitem != item:
259 newitem = item.replace(" ", " ")
260 return newitem.strip()
262 debug("[ReverseLookupAndNotifier] _gotPage")
263 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
265 debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
266 page = page.replace("\xa0"," ").decode(found.group(1), "replace")
268 debug("[ReverseLookupAndNotifier] Default Charset: iso-8859-1")
269 page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
271 for entry in self.currentWebsite.getElementsByTagName("entry"):
273 # for the sites delivering fuzzy matches, we check against the returned number
275 pat = self.getPattern(entry, "number")
278 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
279 found = re.match(pat, page, re.S|re.M)
281 if self.number[:2] == '00':
282 number = '0' + self.number[4:]
285 if number != normalizePhoneNumber(found.group(1)):
286 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
289 # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
296 pat = self.getPattern(entry, "lastname")
299 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
300 found = re.match(pat, page, re.S|re.M)
302 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
303 name = cleanName(found.group(1))
305 pat = self.getPattern(entry, "firstname")
308 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
309 found = re.match(pat, page, re.S|re.M)
311 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
312 firstname = cleanName(found.group(1)).strip()
315 pat = ".*?" + self.getPattern(entry, "name")
316 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
317 found = re.match(pat, page, re.S|re.M)
319 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
320 item = cleanName(found.group(1))
321 # debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
323 firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
324 # debug("[ReverseLookupAndNotifier] _gotPage: swapFirstAndLastName: " + firstNameFirst)
325 if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
326 found = re.match('(.*?)\s+(.*)', name)
328 firstname = found.group(1)
329 name = found.group(2)
331 debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
337 pat = ".*?" + self.getPattern(entry, "city")
338 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
339 found = re.match(pat, page, re.S|re.M)
341 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
342 item = cleanName(found.group(1))
343 debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
349 pat = ".*?" + self.getPattern(entry, "zipcode")
350 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
351 found = re.match(pat, page, re.S|re.M)
352 if found and found.group(1):
353 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
354 item = cleanName(found.group(1))
355 debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
356 zipcode = item.strip()
358 pat = ".*?" + self.getPattern(entry, "street")
359 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
360 found = re.match(pat, page, re.S|re.M)
361 if found and found.group(1):
362 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
363 item = cleanName(found.group(1))
364 debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
365 street = item.strip()
367 found = re.match("^(.+) ([-\d]+)$", street, re.S)
369 street = found.group(1)
370 streetno = found.group(2)
371 #===============================================================
373 # found = re.match("^(\d+) (.+)$", street, re.S)
375 # street = found.group(2)
376 # streetno = found.group(1)
377 #===============================================================
379 self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
380 debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
382 self.notifyAndReset()
385 self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
388 def _gotError(self, error = ""):
389 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
390 if self.nextWebsiteNo >= len(self.websites):
391 debug("[ReverseLookupAndNotifier] _gotError: I give up")
392 # self.caller = _("UNKNOWN")
393 self.notifyAndReset()
396 debug("[ReverseLookupAndNotifier] _gotError: try next website")
397 self.nextWebsiteNo = self.nextWebsiteNo+1
398 self.handleWebsite(self.websites[self.nextWebsiteNo-1])
400 def getPattern(self, website, which):
401 pat1 = website.getElementsByTagName(which)
406 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
407 return pat1[0].childNodes[0].data
409 def notifyAndReset(self):
410 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
411 # debug("1: " + repr(self.caller))
414 debug("2: " + repr(self.caller))
415 self.caller = self.caller.encode(self.charset, 'replace')
416 debug("3: " + repr(self.caller))
417 except UnicodeDecodeError:
418 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
419 # self.caller = unicode(self.caller)
420 # debug("4: " + repr(self.caller))
421 self.outputFunction(self.number, self.caller)
423 self.outputFunction(self.number, "")
424 if __name__ == '__main__':
425 reactor.stop() #@UndefinedVariable # pylint: disable-msg=E1101
427 if __name__ == '__main__':
428 cwd = os.path.dirname(sys.argv[0])
429 if (len(sys.argv) == 2):
431 ReverseLookupAndNotifier(sys.argv[1], simpleout)
432 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
433 elif (len(sys.argv) == 3):
434 # nrzuname.py Nummer Charset
436 ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
437 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101