2 # -*- coding: UTF-8 -*-
8 import re, sys, os, traceback
9 from xml.dom.minidom import parse
10 from twisted.web.client import getPage #@UnresolvedImport
11 from twisted.internet import reactor #@UnresolvedImport
14 from . import debug #@UnresolvedImport
27 def html2unicode(in_html):
28 #===============================================================================
31 # in_html = in_html.decode('iso-8859-1')
32 # debug("[Callhtml2utf8] Converted from latin1")
34 # debug("[Callhtml2utf8] lost in translation from latin1")
37 # in_html = in_html.decode('utf-8')
38 # debug("[Callhtml2utf8] Converted from utf-8")
40 # debug("[Callhtml2utf8] lost in translation from utf-8")
42 #===============================================================================
44 # first convert some WML codes from hex: e.g. ä -> ä
45 htmlentityhexnumbermask = re.compile('(&#x(..);)')
46 entities = htmlentityhexnumbermask.finditer(in_html)
48 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2),16)) + ';')
50 htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
52 entities = htmlentitynamemask.finditer(in_html)
54 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
55 entitydict[x.group(1)] = x.group(2)
56 for key, name in entitydict.items():
58 entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
60 debug("[Callhtml2utf8] KeyError " + key + "/" + name)
62 htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
63 entities = htmlentitynumbermask.finditer(in_html)
65 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
66 entitydict[x.group(1)] = x.group(2)
67 for key, codepoint in entitydict.items():
69 debug("[nrzuname] html2utf8: replace %s with %s" %(repr(key), str(codepoint)))
70 in_html = in_html.replace(unicode(key), (unichr(int(codepoint))))
72 debug("[nrzuname] html2utf8: ValueError " + key + "/" + str(codepoint))
75 def normalizePhoneNumber(intNo):
76 found = re.match('^\+(.*)', intNo)
78 intNo = '00' + found.group(1)
79 intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
80 found = re.match('.*?([0-9]+)', intNo)
86 def out(number, caller):
87 debug("[nrzuname] out: %s: %s" %(number, caller))
88 found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
91 ( name,vorname,strasse,hnr,plz,ort ) = (found.group(1),
98 if vorname: name += ' ' + vorname
99 if strasse or hnr or plz or ort: name += ', '
100 if strasse: name += strasse
101 if hnr: name += ' ' + hnr
102 if (strasse or hnr) and (plz or ort): name += ', '
103 if plz and ort: name += plz + ' ' + ort
104 elif plz: name += plz
105 elif ort: name += ort
109 def simpleout(number, caller): #@UnusedVariable
113 from Tools.Directories import resolveFilename, SCOPE_PLUGINS
114 reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
116 reverseLookupFileName = "reverselookup.xml"
119 reverselookupMtime = 0
121 class ReverseLookupAndNotifier:
122 def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
123 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
125 self.outputFunction = outputFunction
127 self.currentWebsite = None
128 self.nextWebsiteNo = 0
129 #===============================================================================
130 # sorry does not work at all
132 # charset = sys.getdefaultencoding()
133 # debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
134 #===============================================================================
135 self.charset = charset
137 global reverselookupMtime
138 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
139 if not countries or reverselookupMtimeAct > reverselookupMtime:
140 debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
141 reverselookupMtime = reverselookupMtimeAct
142 dom = parse(reverseLookupFileName)
143 for top in dom.getElementsByTagName("reverselookup"):
144 for country in top.getElementsByTagName("country"):
145 code = country.getAttribute("code").replace("+","00")
146 countries[code] = country.getElementsByTagName("website")
148 self.countrycode = countrycode
150 if re.match('^\+', self.number):
151 self.number = '00' + self.number[1:]
153 if self.number[:len(countrycode)] == countrycode:
154 self.number = '0' + self.number[len(countrycode):]
157 # self.caller = _("UNKNOWN")
158 self.notifyAndReset()
161 if self.number[:2] == "00":
162 if countries.has_key(self.number[:3]): # e.g. USA
163 self.countrycode = self.number[:3]
164 elif countries.has_key(self.number[:4]):
165 self.countrycode = self.number[:4]
166 elif countries.has_key(self.number[:5]):
167 self.countrycode = self.number[:5]
169 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
170 # self.caller = _("UNKNOWN")
171 self.notifyAndReset()
174 if countries.has_key(self.countrycode):
175 debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
176 self.websites = countries[self.countrycode]
177 self.nextWebsiteNo = 1
178 self.handleWebsite(self.websites[0])
180 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
181 # self.caller = _("UNKNOWN")
182 self.notifyAndReset()
185 def handleWebsite(self, website):
186 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
187 if self.number[:2] == "00":
188 number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
192 url = website.getAttribute("url")
193 if re.search('$AREACODE',url) or re.search('$PFXAREACODE',url):
194 debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
195 # self.caller = _("UNKNOWN")
196 self.notifyAndReset()
199 # Apparently, there is no attribute called (pfx)areacode anymore
200 # So, this below will not work.
202 if re.search('\\$AREACODE',url) and website.hasAttribute("areacode"):
203 areaCodeLen = int(website.getAttribute("areacode"))
204 url = url.replace("$AREACODE","%(areacode)s").replace("$NUMBER","%(number)s")
205 url = url %{ 'areacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
206 elif re.search('\\$PFXAREACODE',url) and website.hasAttribute("pfxareacode"):
207 areaCodeLen = int(website.getAttribute("pfxareacode"))
208 url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER","%(number)s")
209 url = url %{ 'pfxareacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
210 elif re.search('\\$NUMBER',url):
211 url = url.replace("$NUMBER","%s") %number
213 debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
214 # self.caller = _("UNKNOWN")
215 self.notifyAndReset()
217 debug("[ReverseLookupAndNotifier] Url to query: " + url)
218 url = url.encode("UTF-8", "replace")
219 self.currentWebsite = website
221 agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
222 ).addCallback(self._gotPage).addErrback(self._gotError)
225 def _gotPage(self, page):
227 item = text.replace("%20"," ").replace(" "," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
229 item = html2unicode(item)
230 try: # this works under Windows
231 item = item.decode('iso-8859-1')
233 try: # this works under Enigma2
234 item = item.decode('utf-8')
237 item = item.decode(self.charset)
239 # debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
240 debug("[ReverseLookupAndNotifier] cleanName: encoding problem")
242 newitem = item.replace(" ", " ")
243 while newitem != item:
245 newitem = item.replace(" ", " ")
246 return newitem.strip()
248 debug("[ReverseLookupAndNotifier] _gotPage")
249 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />',page, re.S)
251 debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
252 page = page.replace("\xa0"," ").decode(found.group(1), "replace")
254 debug("[ReverseLookupAndNotifier] Default Charset: iso-8859-1")
255 page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
257 for entry in self.currentWebsite.getElementsByTagName("entry"):
259 # for the sites delivering fuzzy matches, we check against the returned number
261 pat = self.getPattern(entry, "number")
264 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
265 found = re.match(pat, page, re.S|re.M)
267 if self.number[:2] == '00':
268 number = '0' + self.number[4:]
271 if number != normalizePhoneNumber(found.group(1)):
272 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1),self.number))
275 # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
282 pat = self.getPattern(entry, "lastname")
285 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
286 found = re.match(pat, page, re.S|re.M)
288 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
289 name = cleanName(found.group(1))
291 pat = self.getPattern(entry, "firstname")
294 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
295 found = re.match(pat, page, re.S|re.M)
297 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
298 firstname = cleanName(found.group(1)).strip()
301 pat = ".*?" + self.getPattern(entry, "name")
302 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
303 found = re.match(pat, page, re.S|re.M)
305 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
306 item = cleanName(found.group(1))
307 # debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
309 firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
310 # debug("[ReverseLookupAndNotifier] _gotPage: swapFirstAndLastName: " + firstNameFirst)
311 if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
312 found = re.match('(.*?)\s+(.*)', name)
314 firstname = found.group(1)
315 name = found.group(2)
317 debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
323 pat = ".*?" + self.getPattern(entry, "city")
324 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
325 found = re.match(pat, page, re.S|re.M)
327 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
328 item = cleanName(found.group(1))
329 debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
335 pat = ".*?" + self.getPattern(entry, "zipcode")
336 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
337 found = re.match(pat, page, re.S|re.M)
338 if found and found.group(1):
339 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
340 item = cleanName(found.group(1))
341 debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
342 zipcode = item.strip()
344 pat = ".*?" + self.getPattern(entry, "street")
345 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
346 found = re.match(pat, page, re.S|re.M)
347 if found and found.group(1):
348 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
349 item = cleanName(found.group(1))
350 debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
351 street = item.strip()
353 found = re.match("^(.+) ([-\d]+)$", street, re.S)
355 street = found.group(1)
356 streetno= found.group(2)
357 #===============================================================
359 # found = re.match("^(\d+) (.+)$", street, re.S)
361 # street = found.group(2)
362 # streetno = found.group(1)
363 #===============================================================
365 self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" %( name,firstname,street,streetno,zipcode,city )
366 debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
368 self.notifyAndReset()
371 self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
374 def _gotError(self, error = ""):
375 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
376 if self.nextWebsiteNo >= len(self.websites):
377 debug("[ReverseLookupAndNotifier] _gotError: I give up")
378 # self.caller = _("UNKNOWN")
379 self.notifyAndReset()
382 debug("[ReverseLookupAndNotifier] _gotError: try next website")
383 self.nextWebsiteNo = self.nextWebsiteNo+1
384 self.handleWebsite(self.websites[self.nextWebsiteNo-1])
386 def getPattern(self, website, which):
387 pat1 = website.getElementsByTagName(which)
392 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
393 return pat1[0].childNodes[0].data
395 def notifyAndReset(self):
396 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
397 # debug("1: " + repr(self.caller))
400 # debug("2: " + repr(self.caller))
401 self.caller = self.caller.encode(self.charset, 'replace')
402 # debug("3: " + repr(self.caller))
404 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
406 # self.caller = unicode(self.caller)
407 # debug("4: " + repr(self.caller))
408 self.outputFunction(self.number, self.caller)
410 self.outputFunction(self.number, "")
411 if __name__ == '__main__':
412 reactor.stop() #@UndefinedVariable
414 if __name__ == '__main__':
415 cwd = os.path.dirname(sys.argv[0])
416 if (len(sys.argv) == 2):
418 ReverseLookupAndNotifier(sys.argv[1], simpleout)
419 reactor.run() #@UndefinedVariable
420 elif (len(sys.argv) == 3):
421 # nrzuname.py Nummer Charset
423 ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
424 reactor.run() #@UndefinedVariable