[FanControl2] too much deleted metainfo inserted again
[vuplus_dvbapp-plugin] / fritzcall / src / nrzuname.py
index 2ab2c07..3aecb1d 100644 (file)
 #!/usr/bin/python
 # -*- coding: UTF-8 -*-
-# $Id$
-# $Author$
-# $Revision$
-# $Date$
+'''
+$Id$
+$Author$
+$Revision$
+$Date$
+'''
 
 import re, sys, os
 from xml.dom.minidom import parse
 from twisted.web.client import getPage #@UnresolvedImport
 from twisted.internet import reactor #@UnresolvedImport
 
-debugSetting = True
-def setDebug(what):
-       global debugSetting
-       debugSetting = what
-
-def debug(str):
-       if debugSetting:
-               print str
+try:
+       from . import debug #@UnresolvedImport # pylint: disable-msg=W0613,F0401
+       def setDebug(what): # pylint: disable-msg=W0613
+               pass
+except ValueError:
+       debugVal = True
+       def setDebug(what):
+               global debugVal
+               debugVal = what
+       def debug(message):
+               if debugVal:
+                       print message
 
 import htmlentitydefs
-def html2unicode(in_html):
-       # sanity checks
-       try:
-               in_html = in_html.decode('iso-8859-1')
-               debug("[Callhtml2utf8] Converted from latin1")
-       except:
-               debug("[Callhtml2utf8] lost in translation from latin1")
-               pass
-       try:
-               in_html = in_html.decode('utf-8')
-               debug("[Callhtml2utf8] Converted from utf-8")
-       except:
-               debug("[Callhtml2utf8] lost in translation from utf-8")
-               pass
+def html2unicode(in_html, charset):
+#===============================================================================
+#      # sanity checks
+#      try:
+#              in_html = in_html.decode('iso-8859-1')
+#              debug("[Callhtml2utf8] Converted from latin1")
+#      except:
+#              debug("[Callhtml2utf8] lost in translation from latin1")
+#              pass
+#      try:
+#              in_html = in_html.decode('utf-8')
+#              debug("[Callhtml2utf8] Converted from utf-8")
+#      except:
+#              debug("[Callhtml2utf8] lost in translation from utf-8")
+#              pass
+#===============================================================================
 
        # first convert some WML codes from hex: e.g. &#xE4 -> &#228
        htmlentityhexnumbermask = re.compile('(&#x(..);)')
        entities = htmlentityhexnumbermask.finditer(in_html)
        for x in entities:
-               in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2),16)) + ';')
+               in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
 
-       entitydict = {}
-       # catch ü and colleagues here
        htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
+       entitydict = {}
        entities = htmlentitynamemask.finditer(in_html)
        for x in entities:
                # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
-               entitydict[x.group(1)] = htmlentitydefs.name2codepoint[str(x.group(2))]
+               entitydict[x.group(1)] = x.group(2)
+       for key, name in entitydict.items():
+               try:
+                       entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
+               except KeyError:
+                       debug("[Callhtml2utf8] KeyError " + key + "/" + name)
 
-       # this is for &#288 and other numbers
        htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
        entities = htmlentitynumbermask.finditer(in_html)
        for x in entities:
                # debug("[Callhtml2utf8] number: found %s" %x.group(1))
                entitydict[x.group(1)] = x.group(2)
-               
-       # no go and replace all occurrences
        for key, codepoint in entitydict.items():
                try:
-                       debug("[Callhtml2utf8] replace %s with %s" %(repr(key), unichr(int(codepoint))))
-                       in_html = in_html.replace(unicode(key), (unichr(int(codepoint))))
-                       # in_html = in_html.replace(unicode(key), (unichr(int(codepoint))).decode('cp1252').encode('utf-8'))
-               except ValueError:
-                       debug("[Callhtml2utf8] ValueError " + key + "/" + str(codepoint))
-                       pass
+                       uml = unichr(int(codepoint))
+                       debug("[nrzuname] html2utf8: replace %s with %s in %s" %(repr(key), repr(uml), repr(in_html[0:20]+'...')))
+                       in_html = in_html.replace(key, uml)
+               except ValueError, e:
+                       debug("[nrzuname] html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
        return in_html
 
-def out(number, caller):
-       debug("[out] %s: %s" %(number, caller))
-       if not caller:
-               return
-       name = vorname = strasse = hnr = plz = ort = ""
-       lines = caller.split(', ')
-       found = re.match("(.+?)\s+(.+)", lines[0])
+def normalizePhoneNumber(intNo):
+       found = re.match('^\+(.*)', intNo)
        if found:
-               name = found.group(1)
-               vorname = found.group(2)
-       else:
-               name = lines[0]
-       aktuell = 1
-       found = re.match("^(.+) ([-\d]+)$", lines[1], re.S)
+               intNo = '00' + found.group(1)
+       intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
+       found = re.match('.*?([0-9]+)', intNo)
        if found:
-               strasse = found.group(1)
-               hnr = found.group(2)
-               aktuell = 2
-       else:
-               found = re.match("^(\d+) (.+)$", lines[1], re.S)
-               if found:
-                       strasse = found.group(2)
-                       hnr = found.group(1)
-               else:
-                       strasse = lines[1]
-               aktuell = 2
-       for i in range(aktuell, len(lines)):
-               found = re.match("(\S+)\s+(.+)", lines[i], re.S)
-               if found:
-                       plz = found.group(1)
-                       ort = found.group(2)
-                       break
+               return found.group(1)
        else:
-               ort = lines[aktuell].strip()
-       print "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" %( name,vorname,strasse,hnr,plz,ort )
+               return '0'
 
-def simpleout(number, caller):
+def out(number, caller):
+       debug("[nrzuname] out: %s: %s" %(number, caller))
+       found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
+       if not found:
+               return
+       ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
+                                                                                       found.group(2),
+                                                                                       found.group(3),
+                                                                                       found.group(4),
+                                                                                       found.group(5),
+                                                                                       found.group(6)
+                                                                                       )
+       if vorname:
+               name += ' ' + vorname
+       if strasse or hnr or plz or ort:
+               name += ', '
+       if strasse:
+               name += strasse
+       if hnr:
+               name += ' ' + hnr
+       if (strasse or hnr) and (plz or ort):
+               name += ', '
+       if plz and ort:
+               name += plz + ' ' + ort
+       elif plz:
+               name += plz
+       elif ort:
+               name += ort
+
+       print(name)
+
+def simpleout(number, caller): #@UnusedVariable # pylint: disable-msg=W0613
        print caller
 
 try:
@@ -144,6 +158,12 @@ class ReverseLookupAndNotifier:
 
                self.countrycode = countrycode
 
+               if re.match('^\+', self.number):
+                       self.number = '00' + self.number[1:]
+
+               if self.number[:len(countrycode)] == countrycode:
+                       self.number = '0' + self.number[len(countrycode):]
+
                if number[0] != "0":
                        # self.caller = _("UNKNOWN")
                        self.notifyAndReset()
@@ -181,7 +201,7 @@ class ReverseLookupAndNotifier:
                        number = self.number
 
                url = website.getAttribute("url")
-               if re.search('$AREACODE',url) or re.search('$PFXAREACODE',url):
+               if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
                        debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
                        # self.caller = _("UNKNOWN")
                        self.notifyAndReset()
@@ -190,15 +210,14 @@ class ReverseLookupAndNotifier:
                # Apparently, there is no attribute called (pfx)areacode anymore
                # So, this below will not work.
                #
-               if re.search('\\$AREACODE',url) and website.hasAttribute("areacode"):
+               if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
                        areaCodeLen = int(website.getAttribute("areacode"))
-                       url = url.replace("$AREACODE","%(areacode)s").replace("$NUMBER","%(number)s")
-                       url = url %{ 'areacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
-               elif re.search('\\$PFXAREACODE',url) and website.hasAttribute("pfxareacode"):
+                       url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
+               elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
                        areaCodeLen = int(website.getAttribute("pfxareacode"))
-                       url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER","%(number)s")
-                       url = url %{ 'pfxareacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
-               elif re.search('\\$NUMBER',url): 
+                       url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
+                       url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
+               elif re.search('\\$NUMBER', url): 
                        url = url.replace("$NUMBER","%s") %number
                else:
                        debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
@@ -214,48 +233,157 @@ class ReverseLookupAndNotifier:
 
 
        def _gotPage(self, page):
+               def cleanName(text):
+                       item = text.replace("%20"," ").replace("&nbsp;"," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
+
+                       item = html2unicode(item, self.charset)
+                       #===================================================================
+                       # try: # this works under Windows
+                       #       item = item.encode('iso-8859-1')
+                       # except UnicodeEncodeError:
+                       #       debug("[ReverseLookupAndNotifier] cleanName: encoding problem with iso8859")
+                       #       try: # this works under Enigma2
+                       #               item = item.encode('utf-8')
+                       #       except UnicodeEncodeError:
+                       #               debug("[ReverseLookupAndNotifier] cleanName: encoding problem with utf-8")
+                       #               try: # fall back
+                       #                       item = item.encode(self.charset)
+                       #               except UnicodeEncodeError:
+                       #                       # debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
+                       #                       debug("[ReverseLookupAndNotifier] cleanName: encoding problem")
+                       #===================================================================
+
+                       newitem = item.replace("  ", " ")
+                       while newitem != item:
+                               item = newitem
+                               newitem = item.replace("  ", " ")
+                       return newitem.strip()
+       
                debug("[ReverseLookupAndNotifier] _gotPage")
-               found = re.match('.*content=".*?charset=([^"]+)"',page,re.S)
+               found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
                if found:
                        debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
                        page = page.replace("\xa0"," ").decode(found.group(1), "replace")
                else:
+                       debug("[ReverseLookupAndNotifier] Default Charset: iso-8859-1")
                        page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
 
                for entry in self.currentWebsite.getElementsByTagName("entry"):
-                       # debug("[ReverseLookupAndNotifier] _gotPage: try entry")
-                       details = []
-                       for what in ["name", "street", "city", "zipcode"]:
-                               pat = ".*?" + self.getPattern(entry, what)
-                               debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( what, pat ))
+                       #
+                       # for the sites delivering fuzzy matches, we check against the returned number
+                       #
+                       pat = self.getPattern(entry, "number")
+                       if pat:
+                               pat = ".*?" + pat
+                               debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
+                               found = re.match(pat, page, re.S|re.M)
+                               if found:
+                                       if self.number[:2] == '00':
+                                               number = '0' + self.number[4:]
+                                       else:
+                                               number = self.number
+                                       if number != normalizePhoneNumber(found.group(1)):
+                                               debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
+                                               continue
+                       
+                       # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
+                       name = ''
+                       firstname = ''
+                       street = ''
+                       streetno = ''
+                       city = ''
+                       zipcode = ''
+                       pat = self.getPattern(entry, "lastname")
+                       if pat:
+                               pat = ".*?" + pat
+                               debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
+                               found = re.match(pat, page, re.S|re.M)
+                               if found:
+                                       debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
+                                       name = cleanName(found.group(1))
+
+                                       pat = self.getPattern(entry, "firstname")
+                                       if pat:
+                                               pat = ".*?" + pat
+                                               debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
+                                               found = re.match(pat, page, re.S|re.M)
+                                               if found:
+                                                       debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
+                                               firstname = cleanName(found.group(1)).strip()
+
+                       else:
+                               pat = ".*?" + self.getPattern(entry, "name")
+                               debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
                                found = re.match(pat, page, re.S|re.M)
                                if found:
-                                       debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( what, repr(found.group(1)) ))
-                                       item = found.group(1).replace("&nbsp;"," ").replace("</b>","").replace(","," ")
-                                       item = html2unicode(item)
-                                       newitem = item.replace("  ", " ")
-                                       while newitem != item:
-                                               item = newitem
-                                               newitem = item.replace("  ", " ")
-                                       debug("[ReverseLookupAndNotifier] _gotPage: add to details: " + repr(item))
-                                       details.append(item.strip())
+                                       debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
+                                       item = cleanName(found.group(1))
+                                       # debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
+                                       name = item.strip()
+                                       firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
+                                       # debug("[ReverseLookupAndNotifier] _gotPage: swapFirstAndLastName: " + firstNameFirst)
+                                       if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
+                                               found = re.match('(.*?)\s+(.*)', name)
+                                               if found:
+                                                       firstname = found.group(1)
+                                                       name = found.group(2)
                                else:
-                                       break
+                                       debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
+                                       continue
 
-                       if len(details) != 4:
+                       if not name:
                                continue
-                       else:
-                               name = details[0]
-                               address =  details[1] + ", " + details[3] + " " + details[2]
-                               debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s\nAddress: %s" %(name, address))
-                               self.caller = "%s, %s" %(name, address)
-                               # if self.number != 0 and config.plugins.Call.addcallers.value and self.event == "RING":
-                                       # phonebook.add(self.number, self.caller)
 
-                               self.notifyAndReset()
-                               return True
+                       pat = ".*?" + self.getPattern(entry, "city")
+                       debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
+                       found = re.match(pat, page, re.S|re.M)
+                       if found:
+                               debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
+                               item = cleanName(found.group(1))
+                               debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
+                               city = item.strip()
+
+                       if not city:
+                               continue
+
+                       pat = ".*?" + self.getPattern(entry, "zipcode")
+                       debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
+                       found = re.match(pat, page, re.S|re.M)
+                       if found and found.group(1):
+                               debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
+                               item = cleanName(found.group(1))
+                               debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
+                               zipcode = item.strip()
+
+                       pat = ".*?" + self.getPattern(entry, "street")
+                       debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
+                       found = re.match(pat, page, re.S|re.M)
+                       if found and found.group(1):
+                               debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
+                               item = cleanName(found.group(1))
+                               debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
+                               street = item.strip()
+                               streetno = ''
+                               found = re.match("^(.+) ([-\d]+)$", street, re.S)
+                               if found:
+                                       street = found.group(1)
+                                       streetno = found.group(2)
+                               #===============================================================
+                               # else:
+                               #       found = re.match("^(\d+) (.+)$", street, re.S)
+                               #       if found:
+                               #               street = found.group(2)
+                               #               streetno = found.group(1)
+                               #===============================================================
+
+                       self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
+                       debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
+
+                       self.notifyAndReset()
+                       return True
                else:
                        self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
+                       return False
                        
        def _gotError(self, error = ""):
                debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
@@ -271,37 +399,39 @@ class ReverseLookupAndNotifier:
 
        def getPattern(self, website, which):
                pat1 = website.getElementsByTagName(which)
-               if len(pat1) > 1:
-                       debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
-               return pat1[0].childNodes[0].data
+               if len(pat1) == 0:
+                       return ''
+               else:
+                       if len(pat1) > 1:
+                               debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
+                       return pat1[0].childNodes[0].data
 
        def notifyAndReset(self):
                debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
                # debug("1: " + repr(self.caller))
                if self.caller:
                        try:
-                               debug("2: " + repr(self.caller))
-                               self.caller = self.caller.encode(self.charset)
-                               debug("3: " + repr(self.caller))
-                       except:
+                               debug("2: " + repr(self.caller))
+                               self.caller = self.caller.encode(self.charset, 'replace')
+                               debug("3: " + repr(self.caller))
+                       except UnicodeDecodeError:
                                debug("[ReverseLookupAndNotifier] cannot encode?!?!")
-                               pass
                        # self.caller = unicode(self.caller)
                        # debug("4: " + repr(self.caller))
                        self.outputFunction(self.number, self.caller)
                else:
                        self.outputFunction(self.number, "")
                if __name__ == '__main__':
-                       reactor.stop() #@UndefinedVariable
+                       reactor.stop() #@UndefinedVariable # pylint: disable-msg=E1101
 
 if __name__ == '__main__':
        cwd = os.path.dirname(sys.argv[0])
        if (len(sys.argv) == 2):
                # nrzuname.py Nummer
-               ReverseLookupAndNotifier(sys.argv[1])
-               reactor.run() #@UndefinedVariable
+               ReverseLookupAndNotifier(sys.argv[1], simpleout)
+               reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
        elif (len(sys.argv) == 3):
                # nrzuname.py Nummer Charset
                setDebug(False)
-               ReverseLookupAndNotifier(sys.argv[1], simpleout, sys.argv[2])
-               reactor.run() #@UndefinedVariable
+               ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
+               reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101