You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
391 lines
14 KiB
391 lines
14 KiB
import csv, re, sys
|
|
from six.moves.urllib.parse import unquote_plus
|
|
'''
|
|
Parse a User Agent string (which must be un-encoded, so de-quote if necessary first)
|
|
into the following dict
|
|
|
|
{
|
|
"browser":
|
|
"browserversion":
|
|
"os":
|
|
"osvariant":
|
|
"osversion":
|
|
}
|
|
|
|
This is tested against a huge list of browsers from a standard browsecap file in main
|
|
so use that
|
|
'''
|
|
|
|
def parse_stuffone(stuff, n, dict):
|
|
sep = stuff[n].split('; ')
|
|
for ss in sep:
|
|
if (ss == 'Windows' or ss == 'Macintosh' or ss == 'Linux'
|
|
or ss == 'Android'):
|
|
if (dict['os'] == "unknown"):
|
|
dict['os'] = ss
|
|
continue
|
|
if (ss == 'iPhone' or ss == 'iPad' or ss == 'iPod'):
|
|
dict['os'] = 'Apple iOS'
|
|
dict['osvariant'] = ss
|
|
continue
|
|
if (ss.find('Symbian') >= 0):
|
|
dict['os'] = 'Symbian'
|
|
continue
|
|
if (ss == "BlackBerry"):
|
|
dict['os'] = "BlackBerry"
|
|
dict['osvariant'] = "BlackBerry"
|
|
continue
|
|
if (ss == "J2ME/MIDP"):
|
|
dict['osvariant'] = "J2ME"
|
|
continue
|
|
match = re.match('CrOS (\w) ([0-9\.]+)', ss)
|
|
if (match is not None):
|
|
dict['os'] = "Linux"
|
|
dict['osvariant'] = "ChromeOS"
|
|
dict['osversion'] = match.group(2)
|
|
continue
|
|
match = re.match('Windows NT ([0-9\.]+)', ss)
|
|
if (match is not None):
|
|
dict['os'] = "Windows"
|
|
dict['osvariant'] = "Windows NT"
|
|
dict['osversion'] = match.group(1)
|
|
continue
|
|
match = re.match('Linux (.*)', ss)
|
|
if (match is not None):
|
|
dict['os'] = 'Linux'
|
|
dict['osvariant'] = ss
|
|
continue
|
|
match = re.match('FreeBSD (.*)', ss)
|
|
if (match is not None):
|
|
dict['os'] = 'FreeBSD'
|
|
dict['osvariant'] = ss
|
|
continue
|
|
match = re.match('Windows\/([0-9\.]+)',ss)
|
|
if (match is not None):
|
|
dict['os'] = 'Windows'
|
|
dict['osversion'] = match.group(1)
|
|
dict['osvariant'] = dict['os'] + " "+dict['osversion']
|
|
continue
|
|
match = re.match('(CPU iPhone|iPhone|iPhone OS|CPU OS|CPU iPhone OS) ([0-9\_\.]+)', ss)
|
|
if (match is not None):
|
|
dict['osversion'] = match.group(2).replace('_', '.')
|
|
continue
|
|
match = re.match('(.* Mac OS .*?)(Version)?\s([0-9\.\_]+)', ss)
|
|
if (match is not None):
|
|
dict['os'] = "Macintosh"
|
|
dict['osvariant'] = match.group(1)
|
|
dict['osversion'] = match.group(3).replace('_', '.')
|
|
continue
|
|
match = re.match('Android (.*)', ss)
|
|
if (match is not None):
|
|
dict['os'] = "Google Android"
|
|
dict['osvariant'] = "Android"
|
|
dict['osversion'] = match.group(1)
|
|
continue
|
|
match = re.match('Series60/([0-9\.]+)', ss)
|
|
if (match is not None):
|
|
dict['osvariant'] = "Series60"
|
|
dict['osversion'] = match.group(1)
|
|
continue
|
|
match = re.match("Opera Mini/([0-9]+\.[0-9]+)", ss)
|
|
if (match is not None):
|
|
dict['browser'] = "Opera Mini"
|
|
dict['browserversion'] = match.group(1)
|
|
continue
|
|
match = re.match("Konqueror/([0-9\.]+)", ss)
|
|
if (match is not None):
|
|
dict['browser'] = "Konqueror"
|
|
dict['browserversion'] = match.group(1)
|
|
|
|
# If it's Linux, then we have to do more work to find the variants
|
|
if (dict['os'] == "Linux"):
|
|
for ss in stuff:
|
|
match = re.search(
|
|
"(CentOS|Debian|Fedora|Gentoo|Mint|PCLinuxOS|SUSE|Ubuntu)/([0-9a-z\.]+)",
|
|
ss)
|
|
if (match is not None):
|
|
dict['osvariant'] = match.group(1)
|
|
dict['osversion'] = match.group(2)
|
|
|
|
def parse_useragent(useragent):
|
|
dict = {
|
|
"browser": "unknown",
|
|
"browserversion": "unknown",
|
|
"os": "unknown",
|
|
"osvariant": "unknown",
|
|
"osversion": "unknown"
|
|
}
|
|
|
|
# If the useragent is not well-formed, then we can't do anything with it
|
|
stuff = useragent.replace(')', '(').split('(')
|
|
if (len(stuff) > 1):
|
|
parse_stuffone(stuff, 1, dict)
|
|
|
|
# Microsoft Internet Explorer
|
|
if (useragent.find('MSIE') > 0):
|
|
dict['browser'] = "Internet Explorer"
|
|
sep = stuff[1].split('; ')
|
|
for ss in sep:
|
|
match = re.match("MSIE ([0-9]+\.[0-9]+)", ss)
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
continue
|
|
match = re.match("(Windows .*) ([0-9\.]+)", ss)
|
|
if (match is not None):
|
|
dict['os'] = "Windows"
|
|
dict['osvariant'] = match.group(1)
|
|
dict['osversion'] = match.group(2)
|
|
continue
|
|
if (ss == "Windows CE"):
|
|
dict['os'] = "Windows Mobile"
|
|
dict['osvariant'] = "Windows CE"
|
|
continue
|
|
return dict
|
|
|
|
# Microsoft Office (Outlook clients)
|
|
if (useragent.find('Microsoft Office') >= 0):
|
|
dict['browser'] = "Microsoft Outlook"
|
|
match = re.match("Microsoft Office/([0-9.]+)", stuff[0])
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
return dict
|
|
|
|
# MacOutlook
|
|
if (useragent.find('MacOutlook') >= 0):
|
|
dict['browser'] = "Mac Outlook"
|
|
match = re.match("MacOutlook/([0-9.]+)", stuff[0])
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
return dict
|
|
|
|
# Microsoft Entourage
|
|
if (useragent.find('Entourage') >= 0):
|
|
dict['browser'] = "Microsoft Entourage"
|
|
match = re.match("Entourage/([0-9\.]+)", stuff[0])
|
|
dict['browserversion'] = match.group(1)
|
|
match = re.match("([^0-9]+) ([0-9\.\_]+)", stuff[1])
|
|
if (match is not None):
|
|
dict['os'] = "Macintosh"
|
|
dict['osvariant'] = match.group(1)
|
|
dict['osversion'] = match.group(2).replace('_', '.')
|
|
return dict
|
|
|
|
# Google Chrome
|
|
if (useragent.find('Chrome/') > 0):
|
|
dict['browser'] = "Google Chrome"
|
|
for s in stuff:
|
|
match = re.match('.*Chrome/([0-9]+\.[0-9]+)', s)
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
return dict
|
|
|
|
# Opera Browser
|
|
if (useragent.find('Opera/') >= 0):
|
|
dict['browser'] = "Opera"
|
|
match = re.match("Opera/([0-9\.]+)", stuff[0])
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
match = re.match("Version/([0-9\.]+)", useragent)
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
for x in range(1, len(stuff) - 1):
|
|
parse_stuffone(stuff, x, dict)
|
|
return dict
|
|
|
|
# Symbian BrowserNG
|
|
if (useragent.find("BrowserNG/") >= 0):
|
|
dict['browser'] = "BrowserNG"
|
|
match = re.search("BrowserNG/([0-9\.]+)", useragent)
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
return dict
|
|
|
|
# Mozilla Firefox 3.x
|
|
if (useragent.find('Firefox') > 0 or useragent.find('Iceweasel') > 0):
|
|
dict['browser'] = "Mozilla Firefox"
|
|
for s in stuff:
|
|
if (s.find("Gentoo") >= 0):
|
|
dict['osvariant'] = "Gentoo"
|
|
match = re.match('.*Firefox/([0-9\.]+)', s)
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
# Those sneaky Iceweasel folks think they can hide...
|
|
match = re.match('.*Iceweasel/([0-9\.]+)', s)
|
|
if (match is not None):
|
|
dict['browser'] = "Iceweasel"
|
|
dict['browserversion'] = match.group(1)
|
|
return dict
|
|
|
|
# Apple Safari - note order is important here
|
|
if (useragent.find('Safari') > 0):
|
|
dict['browser'] = "Apple Safari"
|
|
for s in stuff:
|
|
match = re.match('.*Version/([0-9\.]+) Safari/', s)
|
|
if (match is not None):
|
|
dict['browserversion'] = match.group(1)
|
|
match = re.match('.*Version/([0-9\.]+) Mobile Safari/', s)
|
|
if (match is not None):
|
|
dict['browser'] = "Mobile Safari"
|
|
dict['browserversion'] = match.group(1)
|
|
match = re.match('.*Version/([0-9\.]+) Mobile/', s)
|
|
if (match is not None):
|
|
dict['browser'] = "Mobile Safari"
|
|
dict['browserversion'] = match.group(1)
|
|
return dict
|
|
|
|
# Blackberry Browser
|
|
if (stuff[0].find("BlackBerry") >= 0):
|
|
match = re.search("BlackBerry[0-9]+/([0-9]+\.[0-9]+)", useragent)
|
|
if (match is not None):
|
|
dict['os'] = "BlackBerry"
|
|
dict['osvariant'] = "BlackBerry"
|
|
dict['osversion'] = match.group(1)
|
|
dict['browserversion'] = match.group(1)
|
|
dict['browser'] = "BlackBerry"
|
|
return dict
|
|
|
|
# Other stuff we can find out - pass through each one
|
|
match = re.search("Darwin/([0-9\.\_]+)", useragent)
|
|
if (match is not None):
|
|
dict['os'] = "Macintosh"
|
|
dict['osvariant'] = "Mac OS X"
|
|
dict['osversion'] = match.group(1).replace('_', '.')
|
|
|
|
match = re.search("Mac OS X/([0-9\.\_]+)", useragent)
|
|
if (match is not None):
|
|
dict['os'] = "Macintosh"
|
|
dict['osvariant'] = "Mac OS X"
|
|
dict['osversion'] = match.group(1).replace('_', '.')
|
|
|
|
match = re.search("SfBForMac/([0-9\.\_]+)", useragent)
|
|
if (match is not None):
|
|
dict['os'] = "Macintosh"
|
|
dict['browser'] = "Skype for Business For Mac"
|
|
dict['browserversion'] = match.group(1).replace('_', '.')
|
|
|
|
match = re.search("OC/([0-9\.\_]+)", useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "Office Communicator"
|
|
dict['browserversion'] = match.group(1).replace('_', '.')
|
|
|
|
match = re.search("MC/([0-9\.\_]+)", useragent)
|
|
if (match is not None):
|
|
dict['os'] = "Macintosh"
|
|
dict['browser'] = "Microsoft Lync"
|
|
dict['browserversion'] = match.group(1).replace('_', '.')
|
|
|
|
match = re.search("Mac OSX ([0-9\.\_]+)", useragent)
|
|
if (match is not None):
|
|
dict['os'] = "Macintosh"
|
|
dict['osvariant'] = "Mac OSX"
|
|
dict['osversion'] = match.group(1).replace('_', '.')
|
|
|
|
match = re.search("AddressBookSourceSync\/(.*)(\)|$)", useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "Offline address book sync"
|
|
dict['browserversion'] = match.group(1)
|
|
|
|
match = re.search("Notes\/(.*)(\)|$)",useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "Notes"
|
|
dict['browserversion'] = match.group(1)
|
|
|
|
match = re.search("Mail\/(.*)(\)|$)",useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "Mail"
|
|
dict['browserversion'] = match.group(1)
|
|
|
|
match = re.search("CalendarAgent\/(.*)(\)|$)",useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "Calendar"
|
|
dict['browserversion'] = match.group(1)
|
|
|
|
match = re.search("OneNote\/(.*?)(\)|$|\s)",useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "OneNote"
|
|
dict['browserversion'] = match.group(1)
|
|
|
|
match = re.search("ASProxy(.*?)(\d.*?)(/|$)",useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "ASProxy"
|
|
dict['browserversion'] = match.group(2)
|
|
|
|
match = re.search("MSEXCHMON(.*?)(\d.*?)(\)|$)",useragent)
|
|
if (match is not None):
|
|
dict['browser'] = "MSEXCHMON"
|
|
dict['browserversion'] = match.group(2)
|
|
|
|
if (useragent.find("OwaProxy") >= 0):
|
|
dict['browser'] = 'OwaProxy'
|
|
|
|
if (useragent.find("ExchangeSharingSync") >= 0):
|
|
dict['browser'] = 'ExchangeSharingSync'
|
|
|
|
if(useragent.find("Apple") >= 0):
|
|
dict['os'] = 'Macintosh'
|
|
|
|
if (useragent.find("Microsoft Windows XP") >= 0):
|
|
dict['os'] = "Windows"
|
|
dict['osvariant'] = "Windows NT"
|
|
dict['osversion'] = "5.2"
|
|
|
|
if (useragent.find("ExchangeServicesClient") >= 0):
|
|
dict['browser'] = "ExchangeServicesClient"
|
|
match = re.search("ExchangeServicesClient\/(.*)(\)|$)", useragent)
|
|
dict['browserversion'] = match.group(1)
|
|
if (dict['browserversion'][-1] == ')'):
|
|
dict['browserversion'] = dict['browserversion'][:-1]
|
|
#We have done all we can, so let's return the package
|
|
return dict
|
|
|
|
#
|
|
# Main routine - basically it's the standard python recipe for handling
|
|
# Splunk lookups
|
|
#
|
|
windows_mapping = {
|
|
'5.0': 'Windows 2000',
|
|
'5.1': 'Windows XP',
|
|
'5.2': 'Windows XP/Server 2003',
|
|
'6.0': 'Windows Vista/Server 2008',
|
|
'6.1': 'Windows 7/Server 2008R2',
|
|
'6.2': 'Windows 8/Server 2012',
|
|
'6.3': 'Windows 8.1/Server 2012 R2',
|
|
'10.0': 'Windows 10/Server 2016/Server 2019'
|
|
}
|
|
|
|
if __name__ == '__main__':
|
|
r = csv.reader(sys.stdin)
|
|
w = csv.writer(sys.stdout)
|
|
have_header = False
|
|
|
|
header = []
|
|
idx = -1
|
|
for row in r:
|
|
if (have_header == False):
|
|
header = row
|
|
have_header = True
|
|
z = 0
|
|
for h in row:
|
|
if (h == "cs_user_agent"):
|
|
idx = z
|
|
z = z + 1
|
|
w.writerow(row)
|
|
continue
|
|
|
|
# We only care about the cs_user_agent field - everything else is filled in
|
|
cs_user_agent = row[idx]
|
|
useragent = unquote_plus(cs_user_agent)
|
|
dict = parse_useragent(useragent)
|
|
|
|
# We have a mapping for the WindowsNT stuff to the more normal names
|
|
if dict['osvariant'] == 'Windows NT' and dict['osversion'] in windows_mapping:
|
|
dict['osvariant'] = windows_mapping[dict['osversion']]
|
|
|
|
# Now write it out
|
|
orow = []
|
|
for xx in header:
|
|
if (xx == "cs_user_agent"):
|
|
orow.append(cs_user_agent)
|
|
else:
|
|
orow.append(dict[xx])
|
|
w.writerow(orow) |