UPD: plugin: better size calculation for action screen after reverse lookup
[vuplus_dvbapp-plugin] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 # $Id$
4 # $Author$
5 # $Revision$
6 # $Date$
7
8 import re, sys, os, traceback
9 from xml.dom.minidom import parse
10 from twisted.web.client import getPage #@UnresolvedImport
11 from twisted.internet import reactor #@UnresolvedImport
12
13 try:
14         from . import debug #@UnresolvedImport
15         def setDebug(what):
16                 pass
17 except ValueError:
18         debugVal = True
19         def setDebug(what):
20                 global debugVal
21                 debugVal = what
22         def debug(str):
23                 if debugVal:
24                         print str
25
26 import htmlentitydefs
27 def html2unicode(in_html):
28 #===============================================================================
29 #       # sanity checks
30 #       try:
31 #               in_html = in_html.decode('iso-8859-1')
32 #               debug("[Callhtml2utf8] Converted from latin1")
33 #       except:
34 #               debug("[Callhtml2utf8] lost in translation from latin1")
35 #               pass
36 #       try:
37 #               in_html = in_html.decode('utf-8')
38 #               debug("[Callhtml2utf8] Converted from utf-8")
39 #       except:
40 #               debug("[Callhtml2utf8] lost in translation from utf-8")
41 #               pass
42 #===============================================================================
43
44         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
45         htmlentityhexnumbermask = re.compile('(&#x(..);)')
46         entities = htmlentityhexnumbermask.finditer(in_html)
47         for x in entities:
48                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2),16)) + ';')
49
50         htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
51         entitydict = {}
52         entities = htmlentitynamemask.finditer(in_html)
53         for x in entities:
54                 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
55                 entitydict[x.group(1)] = x.group(2)
56         for key, name in entitydict.items():
57                 try:
58                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
59                 except KeyError:
60                         debug("[Callhtml2utf8] KeyError " + key + "/" + name)
61
62         htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
63         entities = htmlentitynumbermask.finditer(in_html)
64         for x in entities:
65                 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
66                 entitydict[x.group(1)] = x.group(2)
67         for key, codepoint in entitydict.items():
68                 try:
69                         debug("[nrzuname] html2utf8: replace %s with %s" %(repr(key), str(codepoint)))
70                         in_html = in_html.replace(unicode(key), (unichr(int(codepoint))))
71                 except ValueError:
72                         debug("[nrzuname] html2utf8: ValueError " + key + "/" + str(codepoint))
73         return in_html
74
75 def normalizePhoneNumber(intNo):
76         found = re.match('^\+(.*)', intNo)
77         if found:
78                 intNo = '00' + found.group(1)
79         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
80         found = re.match('.*?([0-9]+)', intNo)
81         if found:
82                 return found.group(1)
83         else:
84                 return '0'
85
86 def out(number, caller):
87         debug("[nrzuname] out: %s: %s" %(number, caller))
88         if not caller:
89                 return
90         name = vorname = strasse = hnr = plz = ort = ""
91         lines = caller.split(', ')
92         found = re.match("(.+?)\s+(.+)", lines[0])
93         if found:
94                 name = found.group(1)
95                 vorname = found.group(2)
96         else:
97                 name = lines[0]
98
99         if len(lines) > 1:
100                 if len(lines) > 2: # this means, we have street and city
101                         found = re.match("^(.+) ([-\d]+)$", lines[1], re.S)
102                         if found:
103                                 strasse = found.group(1)
104                                 hnr = found.group(2)
105                         else:
106                                 found = re.match("^(\d+) (.+)$", lines[1], re.S)
107                                 if found:
108                                         strasse = found.group(2)
109                                         hnr = found.group(1)
110                                 else:
111                                         strasse = lines[1]
112                         for i in range(2, len(lines)):
113                                 found = re.match("(\S+)\s+(.+)", lines[i], re.S)
114                                 if found and re.search('\d', found.group(1)):
115                                         plz = found.group(1)
116                                         ort = found.group(2)
117                                         break
118                 else: # only two lines, the second must be the city...
119                         ort = lines[1].strip()
120         print "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" %( name,vorname,strasse,hnr,plz,ort )
121
122 def simpleout(number, caller):
123         print caller
124
125 try:
126         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
127         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
128 except ImportError:
129         reverseLookupFileName = "reverselookup.xml"
130
131 countries = { }
132 reverselookupMtime = 0
133
134 class ReverseLookupAndNotifier:
135         def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
136                 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
137                 self.number = number
138                 self.outputFunction = outputFunction
139                 self.caller = ""
140                 self.currentWebsite = None
141                 self.nextWebsiteNo = 0
142 #===============================================================================
143 # sorry does not work at all
144 #               if not charset:
145 #                       charset = sys.getdefaultencoding()
146 #                       debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
147 #===============================================================================
148                 self.charset = charset
149
150                 global reverselookupMtime
151                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
152                 if not countries or reverselookupMtimeAct > reverselookupMtime:
153                         debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
154                         reverselookupMtime = reverselookupMtimeAct
155                         dom = parse(reverseLookupFileName)
156                         for top in dom.getElementsByTagName("reverselookup"):
157                                 for country in top.getElementsByTagName("country"):
158                                         code = country.getAttribute("code").replace("+","00")
159                                         countries[code] = country.getElementsByTagName("website")
160
161                 self.countrycode = countrycode
162
163                 if re.match('^\+', self.number):
164                         self.number = '00' + self.number[1:]
165
166                 if self.number[:len(countrycode)] == countrycode:
167                         self.number = '0' + self.number[len(countrycode):]
168
169                 if number[0] != "0":
170                         # self.caller = _("UNKNOWN")
171                         self.notifyAndReset()
172                         return
173
174                 if self.number[:2] == "00":
175                         if countries.has_key(self.number[:3]):   #      e.g. USA
176                                 self.countrycode = self.number[:3]
177                         elif countries.has_key(self.number[:4]):
178                                 self.countrycode = self.number[:4]
179                         elif countries.has_key(self.number[:5]):
180                                 self.countrycode = self.number[:5]
181                         else:
182                                 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
183                                 # self.caller = _("UNKNOWN")
184                                 self.notifyAndReset()
185                                 return
186
187                 if countries.has_key(self.countrycode):
188                         debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
189                         self.websites = countries[self.countrycode]
190                         self.nextWebsiteNo = 1
191                         self.handleWebsite(self.websites[0])
192                 else:
193                         debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
194                         # self.caller = _("UNKNOWN")
195                         self.notifyAndReset()
196                         return
197
198         def handleWebsite(self, website):
199                 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
200                 if self.number[:2] == "00":
201                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
202                 else:
203                         number = self.number
204
205                 url = website.getAttribute("url")
206                 if re.search('$AREACODE',url) or re.search('$PFXAREACODE',url):
207                         debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
208                         # self.caller = _("UNKNOWN")
209                         self.notifyAndReset()
210                         return
211                 #
212                 # Apparently, there is no attribute called (pfx)areacode anymore
213                 # So, this below will not work.
214                 #
215                 if re.search('\\$AREACODE',url) and website.hasAttribute("areacode"):
216                         areaCodeLen = int(website.getAttribute("areacode"))
217                         url = url.replace("$AREACODE","%(areacode)s").replace("$NUMBER","%(number)s")
218                         url = url %{ 'areacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
219                 elif re.search('\\$PFXAREACODE',url) and website.hasAttribute("pfxareacode"):
220                         areaCodeLen = int(website.getAttribute("pfxareacode"))
221                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER","%(number)s")
222                         url = url %{ 'pfxareacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
223                 elif re.search('\\$NUMBER',url): 
224                         url = url.replace("$NUMBER","%s") %number
225                 else:
226                         debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
227                         # self.caller = _("UNKNOWN")
228                         self.notifyAndReset()
229                         return
230                 debug("[ReverseLookupAndNotifier] Url to query: " + url)
231                 url = url.encode("UTF-8", "replace")
232                 self.currentWebsite = website
233                 getPage(url,
234                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
235                         ).addCallback(self._gotPage).addErrback(self._gotError)
236
237
238         def _gotPage(self, page):
239                 def cleanName(text):
240                         item = text.replace("&nbsp;"," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
241                         try:
242                                 item = html2unicode(item).decode('iso-8859-1')
243                                 # item = html2unicode(item)
244                                 newitem = item.replace("  ", " ")
245                                 while newitem != item:
246                                         item = newitem
247                                         newitem = item.replace("  ", " ")
248                                 return newitem.strip()
249                         except:
250                                 debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
251                                 return item
252         
253                 debug("[ReverseLookupAndNotifier] _gotPage")
254                 found = re.match('.*content=".*?charset=([^"]+)"',page, re.S)
255                 if found:
256                         debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
257                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
258                 else:
259                         debug("[ReverseLookupAndNotifier] Charset: iso-8859-1")
260                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
261
262                 for entry in self.currentWebsite.getElementsByTagName("entry"):
263                         #
264                         # for the sites delivering fuzzy matches, we check against the returned number
265                         #
266                         pat = self.getPattern(entry, "number")
267                         if pat:
268                                 pat = ".*?" + pat
269                                 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
270                                 found = re.match(pat, page, re.S|re.M)
271                                 if found:
272                                         if self.number[:2] == '00':
273                                                 number = '0' + self.number[4:]
274                                         else:
275                                                 number = self.number
276                                         if number != normalizePhoneNumber(found.group(1)):
277                                                 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1),self.number))
278                                                 continue
279                         
280                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
281                         lastname = ''
282                         firstname = ''
283                         pat = self.getPattern(entry, "lastname")
284                         if pat:
285                                 pat = ".*?" + pat
286                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
287                                 found = re.match(pat, page, re.S|re.M)
288                                 if found:
289                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
290                                         lastname = cleanName(found.group(1))
291
292                                         pat = self.getPattern(entry, "firstname")
293                                         if pat:
294                                                 pat = ".*?" + pat
295                                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
296                                                 found = re.match(pat, page, re.S|re.M)
297                                                 if found:
298                                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
299                                                 firstname = cleanName(found.group(1))
300
301                                         if firstname:
302                                                 name = lastname + ' ' + firstname
303                                         else:
304                                                 name = lastname
305                         else:
306                                 pat = ".*?" + self.getPattern(entry, "name")
307                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
308                                 found = re.match(pat, page, re.S|re.M)
309                                 if found:
310                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
311                                         item = cleanName(found.group(1))
312                                         debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
313                                         name = item
314                                 else:
315                                         debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
316                                         continue
317
318                         address = ""
319                         if name:
320                                 pat = ".*?" + self.getPattern(entry, "city")
321                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
322                                 found = re.match(pat, page, re.S|re.M)
323                                 if found:
324                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
325                                         item = cleanName(found.group(1))
326                                         debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
327                                         address = item.strip()
328
329                                         pat = ".*?" + self.getPattern(entry, "zipcode")
330                                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
331                                         found = re.match(pat, page, re.S|re.M)
332                                         if found and found.group(1):
333                                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
334                                                 item = cleanName(found.group(1))
335                                                 debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
336                                                 address = item.strip() + ' ' + address
337
338                                         pat = ".*?" + self.getPattern(entry, "street")
339                                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
340                                         found = re.match(pat, page, re.S|re.M)
341                                         if found and found.group(1):
342                                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
343                                                 item = cleanName(found.group(1))
344                                                 debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
345                                                 address = item.strip() + ', ' + address
346
347                                 if address:
348                                         debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s\nAddress: %s" %(name, address))
349                                         self.caller = "%s, %s" %(name, address)
350                                 else:
351                                         debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(name))
352                                         self.caller = name
353
354                                 self.notifyAndReset()
355                                 return True
356                         else:
357                                 continue
358                 else:
359                         self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
360                         return False
361                         
362         def _gotError(self, error = ""):
363                 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
364                 if self.nextWebsiteNo >= len(self.websites):
365                         debug("[ReverseLookupAndNotifier] _gotError: I give up")
366                         # self.caller = _("UNKNOWN")
367                         self.notifyAndReset()
368                         return
369                 else:
370                         debug("[ReverseLookupAndNotifier] _gotError: try next website")
371                         self.nextWebsiteNo = self.nextWebsiteNo+1
372                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
373
374         def getPattern(self, website, which):
375                 pat1 = website.getElementsByTagName(which)
376                 if len(pat1) == 0:
377                         return ''
378                 else:
379                         if len(pat1) > 1:
380                                 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
381                         return pat1[0].childNodes[0].data
382
383         def notifyAndReset(self):
384                 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
385                 # debug("1: " + repr(self.caller))
386                 if self.caller:
387                         try:
388                                 # debug("2: " + repr(self.caller))
389                                 self.caller = self.caller.encode(self.charset, 'replace')
390                                 # debug("3: " + repr(self.caller))
391                         except:
392                                 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
393                                 pass
394                         # self.caller = unicode(self.caller)
395                         # debug("4: " + repr(self.caller))
396                         self.outputFunction(self.number, self.caller)
397                 else:
398                         self.outputFunction(self.number, "")
399                 if __name__ == '__main__':
400                         reactor.stop() #@UndefinedVariable
401
402 if __name__ == '__main__':
403         cwd = os.path.dirname(sys.argv[0])
404         if (len(sys.argv) == 2):
405                 # nrzuname.py Nummer
406                 ReverseLookupAndNotifier(sys.argv[1])
407                 reactor.run() #@UndefinedVariable
408         elif (len(sys.argv) == 3):
409                 # nrzuname.py Nummer Charset
410                 setDebug(False)
411                 ReverseLookupAndNotifier(sys.argv[1], simpleout, sys.argv[2])
412                 reactor.run() #@UndefinedVariable