python 正则提取代理ip
import re m = re.match('^((([^:]+):([^@]+))@)?((\d{1,3}\.){3}\d{1,3})(:(\d{1,5}))?$', proxy) m.group()
import re samples = [ 'user:pass@123.123.123.123:1234', '123.123.123.123:1234', '123.123.123:123', '321.123.123.123', '123.123.123.123:123123'] def isValidUser(name): return re.match('\w+', name) != None def isValidPass(passwd): return isValidUser(passwd) def isValidIp(ip): if ip.count('.') != 3: return False else: valid = True for i in ip.split('.'): try: if 0 <= int(i) <= 255: pass else: valid = False except ValueError: return False return valid def isValidPort(port): valid = False try: if 0 < int(port) < 2**16: valid = True except ValueError: return False return valid def isValidProxy(proxy): m = re.match('^((([^:]+):([^@]+))@)?((\d{1, proxy) if m is None: return False user = m.group(3) or 'user' passwd = m.group(4) or 'pass' ip = m.group(5) port = m.group(8) or '1234' return isValidUser(user) and isValidPass(passwd) and isValidIp(ip) and isValidPort(port) for n in samples: print isValidProxy(n)
参考:https://stackoverflow.com/questions/18546053/how-to-perfectly-match-a-proxy-with-regex/