2 # -*- coding: UTF-8 -*-
11 from xml.dom.minidom import parse
12 from twisted.web.client import getPage #@UnresolvedImport
13 from twisted.internet import reactor #@UnresolvedImport
16 from . import debug #@UnresolvedImport # pylint: disable-msg=W0613,F0401
17 def setDebug(what): # pylint: disable-msg=W0613
29 def html2unicode(in_html, charset):
30 #===============================================================================
33 # in_html = in_html.decode('iso-8859-1')
34 # debug("[Callhtml2utf8] Converted from latin1")
36 # debug("[Callhtml2utf8] lost in translation from latin1")
39 # in_html = in_html.decode('utf-8')
40 # debug("[Callhtml2utf8] Converted from utf-8")
42 # debug("[Callhtml2utf8] lost in translation from utf-8")
44 #===============================================================================
46 # first convert some WML codes from hex: e.g. ä -> ä
47 htmlentityhexnumbermask = re.compile('(&#x(..);)')
48 entities = htmlentityhexnumbermask.finditer(in_html)
50 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
52 htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
54 entities = htmlentitynamemask.finditer(in_html)
56 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
57 entitydict[x.group(1)] = x.group(2)
58 for key, name in entitydict.items():
60 entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
62 debug("[Callhtml2utf8] KeyError " + key + "/" + name)
64 htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
65 entities = htmlentitynumbermask.finditer(in_html)
67 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
68 entitydict[x.group(1)] = x.group(2)
69 for key, codepoint in entitydict.items():
71 debug("[nrzuname] html2utf8: replace %s with %s" %(repr(key), str(codepoint)))
72 in_html = in_html.replace(unicode(key), (unichr(int(codepoint))).encode(charset))
74 debug("[nrzuname] html2utf8: ValueError " + key + "/" + str(codepoint))
77 def normalizePhoneNumber(intNo):
78 found = re.match('^\+(.*)', intNo)
80 intNo = '00' + found.group(1)
81 intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
82 found = re.match('.*?([0-9]+)', intNo)
88 def out(number, caller):
89 debug("[nrzuname] out: %s: %s" %(number, caller))
90 found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
93 ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
101 name += ' ' + vorname
102 if strasse or hnr or plz or ort:
108 if (strasse or hnr) and (plz or ort):
111 name += plz + ' ' + ort
119 def simpleout(number, caller): #@UnusedVariable # pylint: disable-msg=W0613
123 from Tools.Directories import resolveFilename, SCOPE_PLUGINS
124 reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
126 reverseLookupFileName = "reverselookup.xml"
129 reverselookupMtime = 0
131 class ReverseLookupAndNotifier:
132 def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
133 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
135 self.outputFunction = outputFunction
137 self.currentWebsite = None
138 self.nextWebsiteNo = 0
139 #===============================================================================
140 # sorry does not work at all
142 # charset = sys.getdefaultencoding()
143 # debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
144 #===============================================================================
145 self.charset = charset
147 global reverselookupMtime
148 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
149 if not countries or reverselookupMtimeAct > reverselookupMtime:
150 debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
151 reverselookupMtime = reverselookupMtimeAct
152 dom = parse(reverseLookupFileName)
153 for top in dom.getElementsByTagName("reverselookup"):
154 for country in top.getElementsByTagName("country"):
155 code = country.getAttribute("code").replace("+","00")
156 countries[code] = country.getElementsByTagName("website")
158 self.countrycode = countrycode
160 if re.match('^\+', self.number):
161 self.number = '00' + self.number[1:]
163 if self.number[:len(countrycode)] == countrycode:
164 self.number = '0' + self.number[len(countrycode):]
167 # self.caller = _("UNKNOWN")
168 self.notifyAndReset()
171 if self.number[:2] == "00":
172 if countries.has_key(self.number[:3]): # e.g. USA
173 self.countrycode = self.number[:3]
174 elif countries.has_key(self.number[:4]):
175 self.countrycode = self.number[:4]
176 elif countries.has_key(self.number[:5]):
177 self.countrycode = self.number[:5]
179 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
180 # self.caller = _("UNKNOWN")
181 self.notifyAndReset()
184 if countries.has_key(self.countrycode):
185 debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
186 self.websites = countries[self.countrycode]
187 self.nextWebsiteNo = 1
188 self.handleWebsite(self.websites[0])
190 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
191 # self.caller = _("UNKNOWN")
192 self.notifyAndReset()
195 def handleWebsite(self, website):
196 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
197 if self.number[:2] == "00":
198 number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
202 url = website.getAttribute("url")
203 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
204 debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
205 # self.caller = _("UNKNOWN")
206 self.notifyAndReset()
209 # Apparently, there is no attribute called (pfx)areacode anymore
210 # So, this below will not work.
212 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
213 areaCodeLen = int(website.getAttribute("areacode"))
214 url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
215 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
216 areaCodeLen = int(website.getAttribute("pfxareacode"))
217 url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
218 url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
219 elif re.search('\\$NUMBER', url):
220 url = url.replace("$NUMBER","%s") %number
222 debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
223 # self.caller = _("UNKNOWN")
224 self.notifyAndReset()
226 debug("[ReverseLookupAndNotifier] Url to query: " + url)
227 url = url.encode("UTF-8", "replace")
228 self.currentWebsite = website
230 agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
231 ).addCallback(self._gotPage).addErrback(self._gotError)
234 def _gotPage(self, page):
236 item = text.replace("%20"," ").replace(" "," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
238 item = html2unicode(item, self.charset)
239 #===================================================================
240 # try: # this works under Windows
241 # item = item.encode('iso-8859-1')
242 # except UnicodeEncodeError:
243 # debug("[ReverseLookupAndNotifier] cleanName: encoding problem with iso8859")
244 # try: # this works under Enigma2
245 # item = item.encode('utf-8')
246 # except UnicodeEncodeError:
247 # debug("[ReverseLookupAndNotifier] cleanName: encoding problem with utf-8")
249 # item = item.encode(self.charset)
250 # except UnicodeEncodeError:
251 # # debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
252 # debug("[ReverseLookupAndNotifier] cleanName: encoding problem")
253 #===================================================================
255 newitem = item.replace(" ", " ")
256 while newitem != item:
258 newitem = item.replace(" ", " ")
259 return newitem.strip()
261 debug("[ReverseLookupAndNotifier] _gotPage")
262 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
264 debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
265 page = page.replace("\xa0"," ").decode(found.group(1), "replace")
267 debug("[ReverseLookupAndNotifier] Default Charset: iso-8859-1")
268 page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
270 for entry in self.currentWebsite.getElementsByTagName("entry"):
272 # for the sites delivering fuzzy matches, we check against the returned number
274 pat = self.getPattern(entry, "number")
277 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
278 found = re.match(pat, page, re.S|re.M)
280 if self.number[:2] == '00':
281 number = '0' + self.number[4:]
284 if number != normalizePhoneNumber(found.group(1)):
285 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
288 # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
295 pat = self.getPattern(entry, "lastname")
298 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
299 found = re.match(pat, page, re.S|re.M)
301 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
302 name = cleanName(found.group(1))
304 pat = self.getPattern(entry, "firstname")
307 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
308 found = re.match(pat, page, re.S|re.M)
310 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
311 firstname = cleanName(found.group(1)).strip()
314 pat = ".*?" + self.getPattern(entry, "name")
315 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
316 found = re.match(pat, page, re.S|re.M)
318 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
319 item = cleanName(found.group(1))
320 # debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
322 firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
323 # debug("[ReverseLookupAndNotifier] _gotPage: swapFirstAndLastName: " + firstNameFirst)
324 if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
325 found = re.match('(.*?)\s+(.*)', name)
327 firstname = found.group(1)
328 name = found.group(2)
330 debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
336 pat = ".*?" + self.getPattern(entry, "city")
337 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
338 found = re.match(pat, page, re.S|re.M)
340 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
341 item = cleanName(found.group(1))
342 debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
348 pat = ".*?" + self.getPattern(entry, "zipcode")
349 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
350 found = re.match(pat, page, re.S|re.M)
351 if found and found.group(1):
352 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
353 item = cleanName(found.group(1))
354 debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
355 zipcode = item.strip()
357 pat = ".*?" + self.getPattern(entry, "street")
358 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
359 found = re.match(pat, page, re.S|re.M)
360 if found and found.group(1):
361 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
362 item = cleanName(found.group(1))
363 debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
364 street = item.strip()
366 found = re.match("^(.+) ([-\d]+)$", street, re.S)
368 street = found.group(1)
369 streetno = found.group(2)
370 #===============================================================
372 # found = re.match("^(\d+) (.+)$", street, re.S)
374 # street = found.group(2)
375 # streetno = found.group(1)
376 #===============================================================
378 self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
379 debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
381 self.notifyAndReset()
384 self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
387 def _gotError(self, error = ""):
388 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
389 if self.nextWebsiteNo >= len(self.websites):
390 debug("[ReverseLookupAndNotifier] _gotError: I give up")
391 # self.caller = _("UNKNOWN")
392 self.notifyAndReset()
395 debug("[ReverseLookupAndNotifier] _gotError: try next website")
396 self.nextWebsiteNo = self.nextWebsiteNo+1
397 self.handleWebsite(self.websites[self.nextWebsiteNo-1])
399 def getPattern(self, website, which):
400 pat1 = website.getElementsByTagName(which)
405 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
406 return pat1[0].childNodes[0].data
408 def notifyAndReset(self):
409 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
410 # debug("1: " + repr(self.caller))
413 debug("2: " + repr(self.caller))
414 self.caller = self.caller.encode(self.charset, 'replace')
415 debug("3: " + repr(self.caller))
416 except UnicodeDecodeError:
417 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
418 # self.caller = unicode(self.caller)
419 # debug("4: " + repr(self.caller))
420 self.outputFunction(self.number, self.caller)
422 self.outputFunction(self.number, "")
423 if __name__ == '__main__':
424 reactor.stop() #@UndefinedVariable # pylint: disable-msg=E1101
426 if __name__ == '__main__':
427 cwd = os.path.dirname(sys.argv[0])
428 if (len(sys.argv) == 2):
430 ReverseLookupAndNotifier(sys.argv[1], simpleout)
431 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
432 elif (len(sys.argv) == 3):
433 # nrzuname.py Nummer Charset
435 ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
436 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101