UPD: nrzuname results made more generic
[vuplus_dvbapp-plugin] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 # $Id$
4 # $Author$
5 # $Revision$
6 # $Date$
7
8 import re, sys, os, traceback
9 from xml.dom.minidom import parse
10 from twisted.web.client import getPage #@UnresolvedImport
11 from twisted.internet import reactor #@UnresolvedImport
12
13 try:
14         from . import debug #@UnresolvedImport
15         def setDebug(what):
16                 pass
17 except ValueError:
18         debugVal = True
19         def setDebug(what):
20                 global debugVal
21                 debugVal = what
22         def debug(str):
23                 if debugVal:
24                         print str
25
26 import htmlentitydefs
27 def html2unicode(in_html):
28 #===============================================================================
29 #       # sanity checks
30 #       try:
31 #               in_html = in_html.decode('iso-8859-1')
32 #               debug("[Callhtml2utf8] Converted from latin1")
33 #       except:
34 #               debug("[Callhtml2utf8] lost in translation from latin1")
35 #               pass
36 #       try:
37 #               in_html = in_html.decode('utf-8')
38 #               debug("[Callhtml2utf8] Converted from utf-8")
39 #       except:
40 #               debug("[Callhtml2utf8] lost in translation from utf-8")
41 #               pass
42 #===============================================================================
43
44         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
45         htmlentityhexnumbermask = re.compile('(&#x(..);)')
46         entities = htmlentityhexnumbermask.finditer(in_html)
47         for x in entities:
48                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2),16)) + ';')
49
50         htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
51         entitydict = {}
52         entities = htmlentitynamemask.finditer(in_html)
53         for x in entities:
54                 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
55                 entitydict[x.group(1)] = x.group(2)
56         for key, name in entitydict.items():
57                 try:
58                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
59                 except KeyError:
60                         debug("[Callhtml2utf8] KeyError " + key + "/" + name)
61
62         htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
63         entities = htmlentitynumbermask.finditer(in_html)
64         for x in entities:
65                 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
66                 entitydict[x.group(1)] = x.group(2)
67         for key, codepoint in entitydict.items():
68                 try:
69                         debug("[nrzuname] html2utf8: replace %s with %s" %(repr(key), str(codepoint)))
70                         in_html = in_html.replace(unicode(key), (unichr(int(codepoint))))
71                 except ValueError:
72                         debug("[nrzuname] html2utf8: ValueError " + key + "/" + str(codepoint))
73         return in_html
74
75 def normalizePhoneNumber(intNo):
76         found = re.match('^\+(.*)', intNo)
77         if found:
78                 intNo = '00' + found.group(1)
79         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
80         found = re.match('.*?([0-9]+)', intNo)
81         if found:
82                 return found.group(1)
83         else:
84                 return '0'
85
86 def out(number, caller):
87         debug("[nrzuname] out: %s: %s" %(number, caller))
88         found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
89         if not found:
90                 return
91         ( name,vorname,strasse,hnr,plz,ort ) = (found.group(1),
92                                                                                         found.group(2),
93                                                                                         found.group(3),
94                                                                                         found.group(4),
95                                                                                         found.group(5),
96                                                                                         found.group(6)
97                                                                                         )
98         if vorname: name += ' ' + vorname
99         if strasse or hnr or plz or ort: name += ', '
100         if strasse: name += strasse
101         if hnr: name += ' ' + hnr
102         if (strasse or hnr) and (plz or ort): name += ', '
103         if plz and ort: name += plz + ' ' + ort
104         elif plz: name += plz
105         elif ort: name += ort
106
107         print(name)
108
109 def simpleout(number, caller): #@UnusedVariable
110         print caller
111
112 try:
113         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
114         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
115 except ImportError:
116         reverseLookupFileName = "reverselookup.xml"
117
118 countries = { }
119 reverselookupMtime = 0
120
121 class ReverseLookupAndNotifier:
122         def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
123                 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
124                 self.number = number
125                 self.outputFunction = outputFunction
126                 self.caller = ""
127                 self.currentWebsite = None
128                 self.nextWebsiteNo = 0
129 #===============================================================================
130 # sorry does not work at all
131 #               if not charset:
132 #                       charset = sys.getdefaultencoding()
133 #                       debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
134 #===============================================================================
135                 self.charset = charset
136
137                 global reverselookupMtime
138                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
139                 if not countries or reverselookupMtimeAct > reverselookupMtime:
140                         debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
141                         reverselookupMtime = reverselookupMtimeAct
142                         dom = parse(reverseLookupFileName)
143                         for top in dom.getElementsByTagName("reverselookup"):
144                                 for country in top.getElementsByTagName("country"):
145                                         code = country.getAttribute("code").replace("+","00")
146                                         countries[code] = country.getElementsByTagName("website")
147
148                 self.countrycode = countrycode
149
150                 if re.match('^\+', self.number):
151                         self.number = '00' + self.number[1:]
152
153                 if self.number[:len(countrycode)] == countrycode:
154                         self.number = '0' + self.number[len(countrycode):]
155
156                 if number[0] != "0":
157                         # self.caller = _("UNKNOWN")
158                         self.notifyAndReset()
159                         return
160
161                 if self.number[:2] == "00":
162                         if countries.has_key(self.number[:3]):   #      e.g. USA
163                                 self.countrycode = self.number[:3]
164                         elif countries.has_key(self.number[:4]):
165                                 self.countrycode = self.number[:4]
166                         elif countries.has_key(self.number[:5]):
167                                 self.countrycode = self.number[:5]
168                         else:
169                                 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
170                                 # self.caller = _("UNKNOWN")
171                                 self.notifyAndReset()
172                                 return
173
174                 if countries.has_key(self.countrycode):
175                         debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
176                         self.websites = countries[self.countrycode]
177                         self.nextWebsiteNo = 1
178                         self.handleWebsite(self.websites[0])
179                 else:
180                         debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
181                         # self.caller = _("UNKNOWN")
182                         self.notifyAndReset()
183                         return
184
185         def handleWebsite(self, website):
186                 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
187                 if self.number[:2] == "00":
188                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
189                 else:
190                         number = self.number
191
192                 url = website.getAttribute("url")
193                 if re.search('$AREACODE',url) or re.search('$PFXAREACODE',url):
194                         debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
195                         # self.caller = _("UNKNOWN")
196                         self.notifyAndReset()
197                         return
198                 #
199                 # Apparently, there is no attribute called (pfx)areacode anymore
200                 # So, this below will not work.
201                 #
202                 if re.search('\\$AREACODE',url) and website.hasAttribute("areacode"):
203                         areaCodeLen = int(website.getAttribute("areacode"))
204                         url = url.replace("$AREACODE","%(areacode)s").replace("$NUMBER","%(number)s")
205                         url = url %{ 'areacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
206                 elif re.search('\\$PFXAREACODE',url) and website.hasAttribute("pfxareacode"):
207                         areaCodeLen = int(website.getAttribute("pfxareacode"))
208                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER","%(number)s")
209                         url = url %{ 'pfxareacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
210                 elif re.search('\\$NUMBER',url): 
211                         url = url.replace("$NUMBER","%s") %number
212                 else:
213                         debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
214                         # self.caller = _("UNKNOWN")
215                         self.notifyAndReset()
216                         return
217                 debug("[ReverseLookupAndNotifier] Url to query: " + url)
218                 url = url.encode("UTF-8", "replace")
219                 self.currentWebsite = website
220                 getPage(url,
221                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
222                         ).addCallback(self._gotPage).addErrback(self._gotError)
223
224
225         def _gotPage(self, page):
226                 def cleanName(text):
227                         item = text.replace("%20"," ").replace("&nbsp;"," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
228
229                         item = html2unicode(item)
230                         try: # this works under Windows
231                                 item = item.decode('iso-8859-1')
232                         except:
233                                 try: # this works under Enigma2
234                                         item = item.decode('utf-8')
235                                 except:
236                                         try: # fall back
237                                                 item = item.decode(self.charset)
238                                         except:
239                                                 # debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
240                                                 debug("[ReverseLookupAndNotifier] cleanName: encoding problem")
241
242                         newitem = item.replace("  ", " ")
243                         while newitem != item:
244                                 item = newitem
245                                 newitem = item.replace("  ", " ")
246                         return newitem.strip()
247         
248                 debug("[ReverseLookupAndNotifier] _gotPage")
249                 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />',page, re.S)
250                 if found:
251                         debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
252                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
253                 else:
254                         debug("[ReverseLookupAndNotifier] Default Charset: iso-8859-1")
255                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
256
257                 for entry in self.currentWebsite.getElementsByTagName("entry"):
258                         #
259                         # for the sites delivering fuzzy matches, we check against the returned number
260                         #
261                         pat = self.getPattern(entry, "number")
262                         if pat:
263                                 pat = ".*?" + pat
264                                 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
265                                 found = re.match(pat, page, re.S|re.M)
266                                 if found:
267                                         if self.number[:2] == '00':
268                                                 number = '0' + self.number[4:]
269                                         else:
270                                                 number = self.number
271                                         if number != normalizePhoneNumber(found.group(1)):
272                                                 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1),self.number))
273                                                 continue
274                         
275                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
276                         name = ''
277                         firstname = ''
278                         street = ''
279                         streetno = ''
280                         city = ''
281                         zipcode = ''
282                         pat = self.getPattern(entry, "lastname")
283                         if pat:
284                                 pat = ".*?" + pat
285                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
286                                 found = re.match(pat, page, re.S|re.M)
287                                 if found:
288                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
289                                         name = cleanName(found.group(1))
290
291                                         pat = self.getPattern(entry, "firstname")
292                                         if pat:
293                                                 pat = ".*?" + pat
294                                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
295                                                 found = re.match(pat, page, re.S|re.M)
296                                                 if found:
297                                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
298                                                 firstname = cleanName(found.group(1)).strip()
299
300                         else:
301                                 pat = ".*?" + self.getPattern(entry, "name")
302                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
303                                 found = re.match(pat, page, re.S|re.M)
304                                 if found:
305                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
306                                         item = cleanName(found.group(1))
307                                         # debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
308                                         name = item.strip()
309                                         firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
310                                         # debug("[ReverseLookupAndNotifier] _gotPage: swapFirstAndLastName: " + firstNameFirst)
311                                         if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
312                                                 found = re.match('(.*?)\s+(.*)', name)
313                                                 if found:
314                                                         firstname = found.group(1)
315                                                         name = found.group(2)
316                                 else:
317                                         debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
318                                         continue
319
320                         if not name:
321                                 continue
322
323                         pat = ".*?" + self.getPattern(entry, "city")
324                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
325                         found = re.match(pat, page, re.S|re.M)
326                         if found:
327                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
328                                 item = cleanName(found.group(1))
329                                 debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
330                                 city = item.strip()
331
332                         if not city:
333                                 continue
334
335                         pat = ".*?" + self.getPattern(entry, "zipcode")
336                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
337                         found = re.match(pat, page, re.S|re.M)
338                         if found and found.group(1):
339                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
340                                 item = cleanName(found.group(1))
341                                 debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
342                                 zipcode = item.strip()
343
344                         pat = ".*?" + self.getPattern(entry, "street")
345                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
346                         found = re.match(pat, page, re.S|re.M)
347                         if found and found.group(1):
348                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
349                                 item = cleanName(found.group(1))
350                                 debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
351                                 street = item.strip()
352                                 streetno = ''
353                                 found = re.match("^(.+) ([-\d]+)$", street, re.S)
354                                 if found:
355                                         street = found.group(1)
356                                         streetno= found.group(2)
357                                 #===============================================================
358                                 # else:
359                                 #       found = re.match("^(\d+) (.+)$", street, re.S)
360                                 #       if found:
361                                 #               street = found.group(2)
362                                 #               streetno = found.group(1)
363                                 #===============================================================
364
365                         self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" %( name,firstname,street,streetno,zipcode,city )
366                         debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
367
368                         self.notifyAndReset()
369                         return True
370                 else:
371                         self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
372                         return False
373                         
374         def _gotError(self, error = ""):
375                 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
376                 if self.nextWebsiteNo >= len(self.websites):
377                         debug("[ReverseLookupAndNotifier] _gotError: I give up")
378                         # self.caller = _("UNKNOWN")
379                         self.notifyAndReset()
380                         return
381                 else:
382                         debug("[ReverseLookupAndNotifier] _gotError: try next website")
383                         self.nextWebsiteNo = self.nextWebsiteNo+1
384                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
385
386         def getPattern(self, website, which):
387                 pat1 = website.getElementsByTagName(which)
388                 if len(pat1) == 0:
389                         return ''
390                 else:
391                         if len(pat1) > 1:
392                                 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
393                         return pat1[0].childNodes[0].data
394
395         def notifyAndReset(self):
396                 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
397                 # debug("1: " + repr(self.caller))
398                 if self.caller:
399                         try:
400                                 # debug("2: " + repr(self.caller))
401                                 self.caller = self.caller.encode(self.charset, 'replace')
402                                 # debug("3: " + repr(self.caller))
403                         except:
404                                 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
405                                 pass
406                         # self.caller = unicode(self.caller)
407                         # debug("4: " + repr(self.caller))
408                         self.outputFunction(self.number, self.caller)
409                 else:
410                         self.outputFunction(self.number, "")
411                 if __name__ == '__main__':
412                         reactor.stop() #@UndefinedVariable
413
414 if __name__ == '__main__':
415         cwd = os.path.dirname(sys.argv[0])
416         if (len(sys.argv) == 2):
417                 # nrzuname.py Nummer
418                 ReverseLookupAndNotifier(sys.argv[1], simpleout)
419                 reactor.run() #@UndefinedVariable
420         elif (len(sys.argv) == 3):
421                 # nrzuname.py Nummer Charset
422                 setDebug(False)
423                 ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
424                 reactor.run() #@UndefinedVariable