- FIX: commas in FBF-Fonbuch names
[vuplus_dvbapp-plugin] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 # $Id$
4 # $Author$
5 # $Revision$
6 # $Date$
7
8 import re, sys, os
9 from xml.dom.minidom import parse
10 from twisted.web.client import getPage #@UnresolvedImport
11 from twisted.internet import reactor #@UnresolvedImport
12
13 try:
14         from . import debug
15         def setDebug(what):
16                 pass
17 except ValueError:
18         debugVal = True
19         def setDebug(what):
20                 global debugVal
21                 debugVal = what
22         def debug(str):
23                 if debugVal:
24                         print str
25
26 import htmlentitydefs
27 def html2unicode(in_html):
28 #===============================================================================
29 #       # sanity checks
30 #       try:
31 #               in_html = in_html.decode('iso-8859-1')
32 #               debug("[Callhtml2utf8] Converted from latin1")
33 #       except:
34 #               debug("[Callhtml2utf8] lost in translation from latin1")
35 #               pass
36 #       try:
37 #               in_html = in_html.decode('utf-8')
38 #               debug("[Callhtml2utf8] Converted from utf-8")
39 #       except:
40 #               debug("[Callhtml2utf8] lost in translation from utf-8")
41 #               pass
42 #===============================================================================
43
44         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
45         htmlentityhexnumbermask = re.compile('(&#x(..);)')
46         entities = htmlentityhexnumbermask.finditer(in_html)
47         for x in entities:
48                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2),16)) + ';')
49
50         htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
51         entitydict = {}
52         entities = htmlentitynamemask.finditer(in_html)
53         for x in entities:
54                 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
55                 entitydict[x.group(1)] = x.group(2)
56         for key, name in entitydict.items():
57                 try:
58                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
59                 except KeyError:
60                         debug("[Callhtml2utf8] KeyError " + key + "/" + name)
61
62         htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
63         entities = htmlentitynumbermask.finditer(in_html)
64         for x in entities:
65                 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
66                 entitydict[x.group(1)] = x.group(2)
67         for key, codepoint in entitydict.items():
68                 try:
69                         debug("[nrzuname] html2utf8: replace %s with %s" %(repr(key), str(codepoint)))
70                         in_html = in_html.replace(unicode(key), (unichr(int(codepoint))))
71                 except ValueError:
72                         debug("[nrzuname] html2utf8: ValueError " + key + "/" + str(codepoint))
73         return in_html
74
75 def normalizePhoneNumber(intNo):
76         found = re.match('^\+(.*)', intNo)
77         if found:
78                 intNo = '00' + found.group(1)
79         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
80         found = re.match('.*?([0-9]+)', intNo)
81         if found:
82                 return found.group(1)
83         else:
84                 return '0'
85
86 def out(number, caller):
87         debug("[nrzuname] out: %s: %s" %(number, caller))
88         if not caller:
89                 return
90         name = vorname = strasse = hnr = plz = ort = ""
91         lines = caller.split(', ')
92         found = re.match("(.+?)\s+(.+)", lines[0])
93         if found:
94                 name = found.group(1)
95                 vorname = found.group(2)
96         else:
97                 name = lines[0]
98         aktuell = 1
99         found = re.match("^(.+) ([-\d]+)$", lines[1], re.S)
100         if found:
101                 strasse = found.group(1)
102                 hnr = found.group(2)
103                 aktuell = 2
104         else:
105                 found = re.match("^(\d+) (.+)$", lines[1], re.S)
106                 if found:
107                         strasse = found.group(2)
108                         hnr = found.group(1)
109                 else:
110                         strasse = lines[1]
111                 aktuell = 2
112         for i in range(aktuell, len(lines)):
113                 found = re.match("(\S+)\s+(.+)", lines[i], re.S)
114                 if found:
115                         plz = found.group(1)
116                         ort = found.group(2)
117                         break
118         else:
119                 ort = lines[aktuell].strip()
120         print "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" %( name,vorname,strasse,hnr,plz,ort )
121
122 def simpleout(number, caller):
123         print caller
124
125 try:
126         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
127         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
128 except ImportError:
129         reverseLookupFileName = "reverselookup.xml"
130
131 countries = { }
132 reverselookupMtime = 0
133
134 class ReverseLookupAndNotifier:
135         def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
136                 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
137                 self.number = number
138                 self.outputFunction = outputFunction
139                 self.caller = ""
140                 self.currentWebsite = None
141                 self.nextWebsiteNo = 0
142 #===============================================================================
143 # sorry does not work at all
144 #               if not charset:
145 #                       charset = sys.getdefaultencoding()
146 #                       debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
147 #===============================================================================
148                 self.charset = charset
149
150                 global reverselookupMtime
151                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
152                 if not countries or reverselookupMtimeAct > reverselookupMtime:
153                         debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
154                         reverselookupMtime = reverselookupMtimeAct
155                         dom = parse(reverseLookupFileName)
156                         for top in dom.getElementsByTagName("reverselookup"):
157                                 for country in top.getElementsByTagName("country"):
158                                         code = country.getAttribute("code").replace("+","00")
159                                         countries[code] = country.getElementsByTagName("website")
160
161                 self.countrycode = countrycode
162
163                 if number[0] != "0":
164                         # self.caller = _("UNKNOWN")
165                         self.notifyAndReset()
166                         return
167
168                 if self.number[:2] == "00":
169                         if countries.has_key(self.number[:3]):   #      e.g. USA
170                                 self.countrycode = self.number[:3]
171                         elif countries.has_key(self.number[:4]):
172                                 self.countrycode = self.number[:4]
173                         elif countries.has_key(self.number[:5]):
174                                 self.countrycode = self.number[:5]
175                         else:
176                                 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
177                                 # self.caller = _("UNKNOWN")
178                                 self.notifyAndReset()
179                                 return
180
181                 if countries.has_key(self.countrycode):
182                         debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
183                         self.websites = countries[self.countrycode]
184                         self.nextWebsiteNo = 1
185                         self.handleWebsite(self.websites[0])
186                 else:
187                         debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
188                         # self.caller = _("UNKNOWN")
189                         self.notifyAndReset()
190                         return
191
192         def handleWebsite(self, website):
193                 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
194                 if self.number[:2] == "00":
195                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
196                 else:
197                         number = self.number
198
199                 url = website.getAttribute("url")
200                 if re.search('$AREACODE',url) or re.search('$PFXAREACODE',url):
201                         debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
202                         # self.caller = _("UNKNOWN")
203                         self.notifyAndReset()
204                         return
205                 #
206                 # Apparently, there is no attribute called (pfx)areacode anymore
207                 # So, this below will not work.
208                 #
209                 if re.search('\\$AREACODE',url) and website.hasAttribute("areacode"):
210                         areaCodeLen = int(website.getAttribute("areacode"))
211                         url = url.replace("$AREACODE","%(areacode)s").replace("$NUMBER","%(number)s")
212                         url = url %{ 'areacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
213                 elif re.search('\\$PFXAREACODE',url) and website.hasAttribute("pfxareacode"):
214                         areaCodeLen = int(website.getAttribute("pfxareacode"))
215                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER","%(number)s")
216                         url = url %{ 'pfxareacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
217                 elif re.search('\\$NUMBER',url): 
218                         url = url.replace("$NUMBER","%s") %number
219                 else:
220                         debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
221                         # self.caller = _("UNKNOWN")
222                         self.notifyAndReset()
223                         return
224                 debug("[ReverseLookupAndNotifier] Url to query: " + url)
225                 url = url.encode("UTF-8", "replace")
226                 self.currentWebsite = website
227                 getPage(url,
228                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
229                         ).addCallback(self._gotPage).addErrback(self._gotError)
230
231
232         def _gotPage(self, page):
233                 def cleanName(text):
234                         try:
235                                 item = text.replace("&nbsp;"," ").replace("</b>","").replace(","," ")
236                                 item = html2unicode(item).decode('iso-8859-1')
237                                 # item = html2unicode(item)
238                                 newitem = item.replace("  ", " ")
239                                 while newitem != item:
240                                         item = newitem
241                                         newitem = item.replace("  ", " ")
242                                 return newitem.strip()
243                         except:
244                                 return text
245         
246                 debug("[ReverseLookupAndNotifier] _gotPage")
247                 found = re.match('.*content=".*?charset=([^"]+)"',page,re.S)
248                 if found:
249                         debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
250                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
251                 else:
252                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
253
254                 for entry in self.currentWebsite.getElementsByTagName("entry"):
255                         #
256                         # for the sites delivering fuzzy matches, we check against the returned number
257                         #
258                         pat = self.getPattern(entry, "number")
259                         if pat:
260                                 pat = ".*?" + pat
261                                 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
262                                 found = re.match(pat, page, re.S|re.M)
263                                 if found:
264                                         if self.number[:2] == '00':
265                                                 number = '0' + self.number[4:]
266                                         else:
267                                                 number = self.number
268                                         if number != normalizePhoneNumber(found.group(1)):
269                                                 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1),self.number))
270                                                 continue
271                         details = []
272                         
273                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
274                         lastname = ''
275                         firstname = ''
276                         pat = self.getPattern(entry, "lastname")
277                         if pat:
278                                 pat = ".*?" + pat 
279                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
280                                 found = re.match(pat, page, re.S|re.M)
281                                 if found:
282                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
283                                         lastname = cleanName(found.group(1))
284
285                                         pat = self.getPattern(entry, "firstname")
286                                         if pat:
287                                                 pat = ".*?" + pat
288                                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
289                                                 found = re.match(pat, page, re.S|re.M)
290                                                 if found:
291                                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
292                                                 firstname = cleanName(found.group(1))
293
294                                         if firstname:
295                                                 details.append(lastname + ' ' + firstname)
296                                         else:
297                                                 details.append(lastname)
298                         else:
299                                 pat = ".*?" + self.getPattern(entry, "name")
300                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
301                                 found = re.match(pat, page, re.S|re.M)
302                                 if found:
303                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
304                                         item = cleanName(found.group(1))
305                                         debug("[ReverseLookupAndNotifier] _gotPage: add to details: " + item)
306                                         details.append(item)
307                                 else:
308                                         debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
309                                         continue
310
311                         for what in ["street", "city", "zipcode"]:
312                                 pat = ".*?" + self.getPattern(entry, what)
313                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( what, pat ))
314                                 found = re.match(pat, page, re.S|re.M)
315                                 if found:
316                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( what, found.group(1)))
317                                         item = cleanName(found.group(1))
318                                         debug("[ReverseLookupAndNotifier] _gotPage: add to details: " + item)
319                                         details.append(item.strip())
320                                 else:
321                                         break
322
323                         if len(details) != 4:
324                                 continue
325                         else:
326                                 name = details[0]
327                                 address =  details[1] + ", " + details[3] + " " + details[2]
328                                 debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s\nAddress: %s" %(name, address))
329                                 self.caller = "%s, %s" %(name, address)
330                                 # if self.number != 0 and config.plugins.Call.addcallers.value and self.event == "RING":
331                                         # phonebook.add(self.number, self.caller)
332
333                                 self.notifyAndReset()
334                                 return True
335                 else:
336                         self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
337                         
338         def _gotError(self, error = ""):
339                 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
340                 if self.nextWebsiteNo >= len(self.websites):
341                         debug("[ReverseLookupAndNotifier] _gotError: I give up")
342                         # self.caller = _("UNKNOWN")
343                         self.notifyAndReset()
344                         return
345                 else:
346                         debug("[ReverseLookupAndNotifier] _gotError: try next website")
347                         self.nextWebsiteNo = self.nextWebsiteNo+1
348                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
349
350         def getPattern(self, website, which):
351                 pat1 = website.getElementsByTagName(which)
352                 if len(pat1) == 0:
353                         return ''
354                 else:
355                         if len(pat1) > 1:
356                                 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
357                         return pat1[0].childNodes[0].data
358
359         def notifyAndReset(self):
360                 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
361                 # debug("1: " + repr(self.caller))
362                 if self.caller:
363                         try:
364                                 # debug("2: " + repr(self.caller))
365                                 self.caller = self.caller.encode(self.charset)
366                                 # debug("3: " + repr(self.caller))
367                         except:
368                                 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
369                                 pass
370                         # self.caller = unicode(self.caller)
371                         # debug("4: " + repr(self.caller))
372                         self.outputFunction(self.number, self.caller)
373                 else:
374                         self.outputFunction(self.number, "")
375                 if __name__ == '__main__':
376                         reactor.stop() #@UndefinedVariable
377
378 if __name__ == '__main__':
379         cwd = os.path.dirname(sys.argv[0])
380         if (len(sys.argv) == 2):
381                 # nrzuname.py Nummer
382                 ReverseLookupAndNotifier(sys.argv[1])
383                 reactor.run() #@UndefinedVariable
384         elif (len(sys.argv) == 3):
385                 # nrzuname.py Nummer Charset
386                 setDebug(False)
387                 ReverseLookupAndNotifier(sys.argv[1], simpleout, sys.argv[2])
388                 reactor.run() #@UndefinedVariable