FIX: Umlauts in phonebook of newer firmwares
[vuplus_dvbapp-plugin] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 # $Id$
4 # $Author$
5 # $Revision$
6 # $Date$
7
8 import re, sys, os
9 from xml.dom.minidom import parse
10 from twisted.web.client import getPage #@UnresolvedImport
11 from twisted.internet import reactor #@UnresolvedImport
12
13 debugVal = True
14 def setDebug(what):
15         global debugVal
16         debugVal = what
17
18 def debug(str):
19         if debugVal:
20                 print str
21
22 import htmlentitydefs
23 def html2unicode(in_html):
24 #===============================================================================
25 #       # sanity checks
26 #       try:
27 #               in_html = in_html.decode('iso-8859-1')
28 #               debug("[Callhtml2utf8] Converted from latin1")
29 #       except:
30 #               debug("[Callhtml2utf8] lost in translation from latin1")
31 #               pass
32 #       try:
33 #               in_html = in_html.decode('utf-8')
34 #               debug("[Callhtml2utf8] Converted from utf-8")
35 #       except:
36 #               debug("[Callhtml2utf8] lost in translation from utf-8")
37 #               pass
38 #===============================================================================
39
40         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
41         htmlentityhexnumbermask = re.compile('(&#x(..);)')
42         entities = htmlentityhexnumbermask.finditer(in_html)
43         for x in entities:
44                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2),16)) + ';')
45
46         htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
47         entitydict = {}
48         entities = htmlentitynamemask.finditer(in_html)
49         for x in entities:
50                 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
51                 entitydict[x.group(1)] = x.group(2)
52         for key, name in entitydict.items():
53                 try:
54                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
55                 except KeyError:
56                         debug("[Callhtml2utf8] KeyError " + key + "/" + name)
57                         pass
58
59         htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
60         entities = htmlentitynumbermask.finditer(in_html)
61         for x in entities:
62                 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
63                 entitydict[x.group(1)] = x.group(2)
64         for key, codepoint in entitydict.items():
65                 try:
66                         debug("[Callhtml2utf8] replace %s with %s" %(repr(key), unichr(int(codepoint))))
67                         in_html = in_html.replace(unicode(key), (unichr(int(codepoint))))
68                 except ValueError:
69                         debug("[Callhtml2utf8] ValueError " + key + "/" + str(codepoint))
70                         pass
71         return in_html
72
73 def normalizePhoneNumber(intNo):
74         found = re.match('^\+(.*)', intNo)
75         if found:
76                 intNo = '00' + found.group(1)
77         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
78         found = re.match('.*?([0-9]+)', intNo)
79         if found:
80                 return found.group(1)
81         else:
82                 return '0'
83
84 def out(number, caller):
85         debug("[out] %s: %s" %(number, caller))
86         if not caller:
87                 return
88         name = vorname = strasse = hnr = plz = ort = ""
89         lines = caller.split(', ')
90         found = re.match("(.+?)\s+(.+)", lines[0])
91         if found:
92                 name = found.group(1)
93                 vorname = found.group(2)
94         else:
95                 name = lines[0]
96         aktuell = 1
97         found = re.match("^(.+) ([-\d]+)$", lines[1], re.S)
98         if found:
99                 strasse = found.group(1)
100                 hnr = found.group(2)
101                 aktuell = 2
102         else:
103                 found = re.match("^(\d+) (.+)$", lines[1], re.S)
104                 if found:
105                         strasse = found.group(2)
106                         hnr = found.group(1)
107                 else:
108                         strasse = lines[1]
109                 aktuell = 2
110         for i in range(aktuell, len(lines)):
111                 found = re.match("(\S+)\s+(.+)", lines[i], re.S)
112                 if found:
113                         plz = found.group(1)
114                         ort = found.group(2)
115                         break
116         else:
117                 ort = lines[aktuell].strip()
118         print "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" %( name,vorname,strasse,hnr,plz,ort )
119
120 def simpleout(number, caller):
121         print caller
122
123 try:
124         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
125         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
126 except ImportError:
127         reverseLookupFileName = "reverselookup.xml"
128
129 countries = { }
130 reverselookupMtime = 0
131
132 class ReverseLookupAndNotifier:
133         def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
134                 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
135                 self.number = number
136                 self.outputFunction = outputFunction
137                 self.caller = ""
138                 self.currentWebsite = None
139                 self.nextWebsiteNo = 0
140 #===============================================================================
141 # sorry does not work at all
142 #               if not charset:
143 #                       charset = sys.getdefaultencoding()
144 #                       debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
145 #===============================================================================
146                 self.charset = charset
147
148                 global reverselookupMtime
149                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
150                 if not countries or reverselookupMtimeAct > reverselookupMtime:
151                         debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
152                         reverselookupMtime = reverselookupMtimeAct
153                         dom = parse(reverseLookupFileName)
154                         for top in dom.getElementsByTagName("reverselookup"):
155                                 for country in top.getElementsByTagName("country"):
156                                         code = country.getAttribute("code").replace("+","00")
157                                         countries[code] = country.getElementsByTagName("website")
158
159                 self.countrycode = countrycode
160
161                 if number[0] != "0":
162                         # self.caller = _("UNKNOWN")
163                         self.notifyAndReset()
164                         return
165
166                 if self.number[:2] == "00":
167                         if countries.has_key(self.number[:3]):   #      e.g. USA
168                                 self.countrycode = self.number[:3]
169                         elif countries.has_key(self.number[:4]):
170                                 self.countrycode = self.number[:4]
171                         elif countries.has_key(self.number[:5]):
172                                 self.countrycode = self.number[:5]
173                         else:
174                                 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
175                                 # self.caller = _("UNKNOWN")
176                                 self.notifyAndReset()
177                                 return
178
179                 if countries.has_key(self.countrycode):
180                         debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
181                         self.websites = countries[self.countrycode]
182                         self.nextWebsiteNo = 1
183                         self.handleWebsite(self.websites[0])
184                 else:
185                         debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
186                         # self.caller = _("UNKNOWN")
187                         self.notifyAndReset()
188                         return
189
190         def handleWebsite(self, website):
191                 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
192                 if self.number[:2] == "00":
193                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
194                 else:
195                         number = self.number
196
197                 url = website.getAttribute("url")
198                 if re.search('$AREACODE',url) or re.search('$PFXAREACODE',url):
199                         debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
200                         # self.caller = _("UNKNOWN")
201                         self.notifyAndReset()
202                         return
203                 #
204                 # Apparently, there is no attribute called (pfx)areacode anymore
205                 # So, this below will not work.
206                 #
207                 if re.search('\\$AREACODE',url) and website.hasAttribute("areacode"):
208                         areaCodeLen = int(website.getAttribute("areacode"))
209                         url = url.replace("$AREACODE","%(areacode)s").replace("$NUMBER","%(number)s")
210                         url = url %{ 'areacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
211                 elif re.search('\\$PFXAREACODE',url) and website.hasAttribute("pfxareacode"):
212                         areaCodeLen = int(website.getAttribute("pfxareacode"))
213                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER","%(number)s")
214                         url = url %{ 'pfxareacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
215                 elif re.search('\\$NUMBER',url): 
216                         url = url.replace("$NUMBER","%s") %number
217                 else:
218                         debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
219                         # self.caller = _("UNKNOWN")
220                         self.notifyAndReset()
221                         return
222                 debug("[ReverseLookupAndNotifier] Url to query: " + url)
223                 url = url.encode("UTF-8", "replace")
224                 self.currentWebsite = website
225                 getPage(url,
226                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
227                         ).addCallback(self._gotPage).addErrback(self._gotError)
228
229
230         def _gotPage(self, page):
231                 def cleanName(text):
232                         item = text.replace("&nbsp;"," ").replace("</b>","").replace(","," ")
233                         item = html2unicode(item).decode('iso-8859-1')
234                         newitem = item.replace("  ", " ")
235                         while newitem != item:
236                                 item = newitem
237                                 newitem = item.replace("  ", " ")
238                         return newitem.strip()
239                         
240                 debug("[ReverseLookupAndNotifier] _gotPage")
241                 found = re.match('.*content=".*?charset=([^"]+)"',page,re.S)
242                 if found:
243                         debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
244                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
245                 else:
246                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
247
248                 for entry in self.currentWebsite.getElementsByTagName("entry"):
249                         #
250                         # for the sites delivering fuzzy matches, we check against the returned number
251                         #
252                         pat = self.getPattern(entry, "number")
253                         if pat:
254                                 pat = ".*?" + pat
255                                 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
256                                 found = re.match(pat, page, re.S|re.M)
257                                 if found:
258                                         if self.number[:2] == '00':
259                                                 number = '0' + self.number[4:]
260                                         else:
261                                                 number = self.number
262                                         if number != normalizePhoneNumber(found.group(1)):
263                                                 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1),self.number))
264                                                 continue
265                         details = []
266                         
267                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
268                         lastname = ''
269                         firstname = ''
270                         pat = self.getPattern(entry, "lastname")
271                         if pat:
272                                 pat = ".*?" + pat 
273                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
274                                 found = re.match(pat, page, re.S|re.M)
275                                 if found:
276                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", repr(found.group(1)) ))
277                                         lastname = cleanName(found.group(1))
278
279                                         pat = self.getPattern(entry, "firstname")
280                                         if pat:
281                                                 pat = ".*?" + pat
282                                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
283                                                 found = re.match(pat, page, re.S|re.M)
284                                                 if found:
285                                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", repr(found.group(1)) ))
286                                                 firstname = cleanName(found.group(1))
287
288                                         if firstname:
289                                                 details.append(lastname + ' ' + firstname)
290                                         else:
291                                                 details.append(lastname)
292                         else:
293                                 pat = ".*?" + self.getPattern(entry, "name")
294                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
295                                 found = re.match(pat, page, re.S|re.M)
296                                 if found:
297                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", repr(found.group(1)) ))
298                                         item = cleanName(found.group(1))
299                                         debug("[ReverseLookupAndNotifier] _gotPage: add to details: " + repr(item))
300                                         details.append(item)
301                                 else:
302                                         debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
303                                         continue
304
305                         for what in ["street", "city", "zipcode"]:
306                                 pat = ".*?" + self.getPattern(entry, what)
307                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( what, pat ))
308                                 found = re.match(pat, page, re.S|re.M)
309                                 if found:
310                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( what, repr(found.group(1)) ))
311                                         item = cleanName(found.group(1))
312                                         debug("[ReverseLookupAndNotifier] _gotPage: add to details: " + repr(item))
313                                         details.append(item.strip())
314                                 else:
315                                         break
316
317                         if len(details) != 4:
318                                 continue
319                         else:
320                                 name = details[0]
321                                 address =  details[1] + ", " + details[3] + " " + details[2]
322                                 debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s\nAddress: %s" %(name, address))
323                                 self.caller = "%s, %s" %(name, address)
324                                 # if self.number != 0 and config.plugins.Call.addcallers.value and self.event == "RING":
325                                         # phonebook.add(self.number, self.caller)
326
327                                 self.notifyAndReset()
328                                 return True
329                 else:
330                         self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
331                         
332         def _gotError(self, error = ""):
333                 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
334                 if self.nextWebsiteNo >= len(self.websites):
335                         debug("[ReverseLookupAndNotifier] _gotError: I give up")
336                         # self.caller = _("UNKNOWN")
337                         self.notifyAndReset()
338                         return
339                 else:
340                         debug("[ReverseLookupAndNotifier] _gotError: try next website")
341                         self.nextWebsiteNo = self.nextWebsiteNo+1
342                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
343
344         def getPattern(self, website, which):
345                 pat1 = website.getElementsByTagName(which)
346                 if len(pat1) == 0:
347                         return ''
348                 else:
349                         if len(pat1) > 1:
350                                 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
351                         return pat1[0].childNodes[0].data
352
353         def notifyAndReset(self):
354                 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
355                 # debug("1: " + repr(self.caller))
356                 if self.caller:
357                         try:
358                                 # debug("2: " + repr(self.caller))
359                                 self.caller = self.caller.encode(self.charset)
360                                 # debug("3: " + repr(self.caller))
361                         except:
362                                 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
363                                 pass
364                         # self.caller = unicode(self.caller)
365                         # debug("4: " + repr(self.caller))
366                         self.outputFunction(self.number, self.caller)
367                 else:
368                         self.outputFunction(self.number, "")
369                 if __name__ == '__main__':
370                         reactor.stop() #@UndefinedVariable
371
372 if __name__ == '__main__':
373         cwd = os.path.dirname(sys.argv[0])
374         if (len(sys.argv) == 2):
375                 # nrzuname.py Nummer
376                 ReverseLookupAndNotifier(sys.argv[1])
377                 reactor.run() #@UndefinedVariable
378         elif (len(sys.argv) == 3):
379                 # nrzuname.py Nummer Charset
380                 setDebug(False)
381                 ReverseLookupAndNotifier(sys.argv[1], simpleout, sys.argv[2])
382                 reactor.run() #@UndefinedVariable