You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

391 lines
14 KiB

import csv, re, sys
from six.moves.urllib.parse import unquote_plus
'''
Parse a User Agent string (which must be un-encoded, so de-quote if necessary first)
into the following dict
{
"browser":
"browserversion":
"os":
"osvariant":
"osversion":
}
This is tested against a huge list of browsers from a standard browsecap file in main
so use that
'''
def parse_stuffone(stuff, n, dict):
sep = stuff[n].split('; ')
for ss in sep:
if (ss == 'Windows' or ss == 'Macintosh' or ss == 'Linux'
or ss == 'Android'):
if (dict['os'] == "unknown"):
dict['os'] = ss
continue
if (ss == 'iPhone' or ss == 'iPad' or ss == 'iPod'):
dict['os'] = 'Apple iOS'
dict['osvariant'] = ss
continue
if (ss.find('Symbian') >= 0):
dict['os'] = 'Symbian'
continue
if (ss == "BlackBerry"):
dict['os'] = "BlackBerry"
dict['osvariant'] = "BlackBerry"
continue
if (ss == "J2ME/MIDP"):
dict['osvariant'] = "J2ME"
continue
match = re.match('CrOS (\w) ([0-9\.]+)', ss)
if (match is not None):
dict['os'] = "Linux"
dict['osvariant'] = "ChromeOS"
dict['osversion'] = match.group(2)
continue
match = re.match('Windows NT ([0-9\.]+)', ss)
if (match is not None):
dict['os'] = "Windows"
dict['osvariant'] = "Windows NT"
dict['osversion'] = match.group(1)
continue
match = re.match('Linux (.*)', ss)
if (match is not None):
dict['os'] = 'Linux'
dict['osvariant'] = ss
continue
match = re.match('FreeBSD (.*)', ss)
if (match is not None):
dict['os'] = 'FreeBSD'
dict['osvariant'] = ss
continue
match = re.match('Windows\/([0-9\.]+)',ss)
if (match is not None):
dict['os'] = 'Windows'
dict['osversion'] = match.group(1)
dict['osvariant'] = dict['os'] + " "+dict['osversion']
continue
match = re.match('(CPU iPhone|iPhone|iPhone OS|CPU OS|CPU iPhone OS) ([0-9\_\.]+)', ss)
if (match is not None):
dict['osversion'] = match.group(2).replace('_', '.')
continue
match = re.match('(.* Mac OS .*?)(Version)?\s([0-9\.\_]+)', ss)
if (match is not None):
dict['os'] = "Macintosh"
dict['osvariant'] = match.group(1)
dict['osversion'] = match.group(3).replace('_', '.')
continue
match = re.match('Android (.*)', ss)
if (match is not None):
dict['os'] = "Google Android"
dict['osvariant'] = "Android"
dict['osversion'] = match.group(1)
continue
match = re.match('Series60/([0-9\.]+)', ss)
if (match is not None):
dict['osvariant'] = "Series60"
dict['osversion'] = match.group(1)
continue
match = re.match("Opera Mini/([0-9]+\.[0-9]+)", ss)
if (match is not None):
dict['browser'] = "Opera Mini"
dict['browserversion'] = match.group(1)
continue
match = re.match("Konqueror/([0-9\.]+)", ss)
if (match is not None):
dict['browser'] = "Konqueror"
dict['browserversion'] = match.group(1)
# If it's Linux, then we have to do more work to find the variants
if (dict['os'] == "Linux"):
for ss in stuff:
match = re.search(
"(CentOS|Debian|Fedora|Gentoo|Mint|PCLinuxOS|SUSE|Ubuntu)/([0-9a-z\.]+)",
ss)
if (match is not None):
dict['osvariant'] = match.group(1)
dict['osversion'] = match.group(2)
def parse_useragent(useragent):
dict = {
"browser": "unknown",
"browserversion": "unknown",
"os": "unknown",
"osvariant": "unknown",
"osversion": "unknown"
}
# If the useragent is not well-formed, then we can't do anything with it
stuff = useragent.replace(')', '(').split('(')
if (len(stuff) > 1):
parse_stuffone(stuff, 1, dict)
# Microsoft Internet Explorer
if (useragent.find('MSIE') > 0):
dict['browser'] = "Internet Explorer"
sep = stuff[1].split('; ')
for ss in sep:
match = re.match("MSIE ([0-9]+\.[0-9]+)", ss)
if (match is not None):
dict['browserversion'] = match.group(1)
continue
match = re.match("(Windows .*) ([0-9\.]+)", ss)
if (match is not None):
dict['os'] = "Windows"
dict['osvariant'] = match.group(1)
dict['osversion'] = match.group(2)
continue
if (ss == "Windows CE"):
dict['os'] = "Windows Mobile"
dict['osvariant'] = "Windows CE"
continue
return dict
# Microsoft Office (Outlook clients)
if (useragent.find('Microsoft Office') >= 0):
dict['browser'] = "Microsoft Outlook"
match = re.match("Microsoft Office/([0-9.]+)", stuff[0])
if (match is not None):
dict['browserversion'] = match.group(1)
return dict
# MacOutlook
if (useragent.find('MacOutlook') >= 0):
dict['browser'] = "Mac Outlook"
match = re.match("MacOutlook/([0-9.]+)", stuff[0])
if (match is not None):
dict['browserversion'] = match.group(1)
return dict
# Microsoft Entourage
if (useragent.find('Entourage') >= 0):
dict['browser'] = "Microsoft Entourage"
match = re.match("Entourage/([0-9\.]+)", stuff[0])
dict['browserversion'] = match.group(1)
match = re.match("([^0-9]+) ([0-9\.\_]+)", stuff[1])
if (match is not None):
dict['os'] = "Macintosh"
dict['osvariant'] = match.group(1)
dict['osversion'] = match.group(2).replace('_', '.')
return dict
# Google Chrome
if (useragent.find('Chrome/') > 0):
dict['browser'] = "Google Chrome"
for s in stuff:
match = re.match('.*Chrome/([0-9]+\.[0-9]+)', s)
if (match is not None):
dict['browserversion'] = match.group(1)
return dict
# Opera Browser
if (useragent.find('Opera/') >= 0):
dict['browser'] = "Opera"
match = re.match("Opera/([0-9\.]+)", stuff[0])
if (match is not None):
dict['browserversion'] = match.group(1)
match = re.match("Version/([0-9\.]+)", useragent)
if (match is not None):
dict['browserversion'] = match.group(1)
for x in range(1, len(stuff) - 1):
parse_stuffone(stuff, x, dict)
return dict
# Symbian BrowserNG
if (useragent.find("BrowserNG/") >= 0):
dict['browser'] = "BrowserNG"
match = re.search("BrowserNG/([0-9\.]+)", useragent)
if (match is not None):
dict['browserversion'] = match.group(1)
return dict
# Mozilla Firefox 3.x
if (useragent.find('Firefox') > 0 or useragent.find('Iceweasel') > 0):
dict['browser'] = "Mozilla Firefox"
for s in stuff:
if (s.find("Gentoo") >= 0):
dict['osvariant'] = "Gentoo"
match = re.match('.*Firefox/([0-9\.]+)', s)
if (match is not None):
dict['browserversion'] = match.group(1)
# Those sneaky Iceweasel folks think they can hide...
match = re.match('.*Iceweasel/([0-9\.]+)', s)
if (match is not None):
dict['browser'] = "Iceweasel"
dict['browserversion'] = match.group(1)
return dict
# Apple Safari - note order is important here
if (useragent.find('Safari') > 0):
dict['browser'] = "Apple Safari"
for s in stuff:
match = re.match('.*Version/([0-9\.]+) Safari/', s)
if (match is not None):
dict['browserversion'] = match.group(1)
match = re.match('.*Version/([0-9\.]+) Mobile Safari/', s)
if (match is not None):
dict['browser'] = "Mobile Safari"
dict['browserversion'] = match.group(1)
match = re.match('.*Version/([0-9\.]+) Mobile/', s)
if (match is not None):
dict['browser'] = "Mobile Safari"
dict['browserversion'] = match.group(1)
return dict
# Blackberry Browser
if (stuff[0].find("BlackBerry") >= 0):
match = re.search("BlackBerry[0-9]+/([0-9]+\.[0-9]+)", useragent)
if (match is not None):
dict['os'] = "BlackBerry"
dict['osvariant'] = "BlackBerry"
dict['osversion'] = match.group(1)
dict['browserversion'] = match.group(1)
dict['browser'] = "BlackBerry"
return dict
# Other stuff we can find out - pass through each one
match = re.search("Darwin/([0-9\.\_]+)", useragent)
if (match is not None):
dict['os'] = "Macintosh"
dict['osvariant'] = "Mac OS X"
dict['osversion'] = match.group(1).replace('_', '.')
match = re.search("Mac OS X/([0-9\.\_]+)", useragent)
if (match is not None):
dict['os'] = "Macintosh"
dict['osvariant'] = "Mac OS X"
dict['osversion'] = match.group(1).replace('_', '.')
match = re.search("SfBForMac/([0-9\.\_]+)", useragent)
if (match is not None):
dict['os'] = "Macintosh"
dict['browser'] = "Skype for Business For Mac"
dict['browserversion'] = match.group(1).replace('_', '.')
match = re.search("OC/([0-9\.\_]+)", useragent)
if (match is not None):
dict['browser'] = "Office Communicator"
dict['browserversion'] = match.group(1).replace('_', '.')
match = re.search("MC/([0-9\.\_]+)", useragent)
if (match is not None):
dict['os'] = "Macintosh"
dict['browser'] = "Microsoft Lync"
dict['browserversion'] = match.group(1).replace('_', '.')
match = re.search("Mac OSX ([0-9\.\_]+)", useragent)
if (match is not None):
dict['os'] = "Macintosh"
dict['osvariant'] = "Mac OSX"
dict['osversion'] = match.group(1).replace('_', '.')
match = re.search("AddressBookSourceSync\/(.*)(\)|$)", useragent)
if (match is not None):
dict['browser'] = "Offline address book sync"
dict['browserversion'] = match.group(1)
match = re.search("Notes\/(.*)(\)|$)",useragent)
if (match is not None):
dict['browser'] = "Notes"
dict['browserversion'] = match.group(1)
match = re.search("Mail\/(.*)(\)|$)",useragent)
if (match is not None):
dict['browser'] = "Mail"
dict['browserversion'] = match.group(1)
match = re.search("CalendarAgent\/(.*)(\)|$)",useragent)
if (match is not None):
dict['browser'] = "Calendar"
dict['browserversion'] = match.group(1)
match = re.search("OneNote\/(.*?)(\)|$|\s)",useragent)
if (match is not None):
dict['browser'] = "OneNote"
dict['browserversion'] = match.group(1)
match = re.search("ASProxy(.*?)(\d.*?)(/|$)",useragent)
if (match is not None):
dict['browser'] = "ASProxy"
dict['browserversion'] = match.group(2)
match = re.search("MSEXCHMON(.*?)(\d.*?)(\)|$)",useragent)
if (match is not None):
dict['browser'] = "MSEXCHMON"
dict['browserversion'] = match.group(2)
if (useragent.find("OwaProxy") >= 0):
dict['browser'] = 'OwaProxy'
if (useragent.find("ExchangeSharingSync") >= 0):
dict['browser'] = 'ExchangeSharingSync'
if(useragent.find("Apple") >= 0):
dict['os'] = 'Macintosh'
if (useragent.find("Microsoft Windows XP") >= 0):
dict['os'] = "Windows"
dict['osvariant'] = "Windows NT"
dict['osversion'] = "5.2"
if (useragent.find("ExchangeServicesClient") >= 0):
dict['browser'] = "ExchangeServicesClient"
match = re.search("ExchangeServicesClient\/(.*)(\)|$)", useragent)
dict['browserversion'] = match.group(1)
if (dict['browserversion'][-1] == ')'):
dict['browserversion'] = dict['browserversion'][:-1]
#We have done all we can, so let's return the package
return dict
#
# Main routine - basically it's the standard python recipe for handling
# Splunk lookups
#
windows_mapping = {
'5.0': 'Windows 2000',
'5.1': 'Windows XP',
'5.2': 'Windows XP/Server 2003',
'6.0': 'Windows Vista/Server 2008',
'6.1': 'Windows 7/Server 2008R2',
'6.2': 'Windows 8/Server 2012',
'6.3': 'Windows 8.1/Server 2012 R2',
'10.0': 'Windows 10/Server 2016/Server 2019'
}
if __name__ == '__main__':
r = csv.reader(sys.stdin)
w = csv.writer(sys.stdout)
have_header = False
header = []
idx = -1
for row in r:
if (have_header == False):
header = row
have_header = True
z = 0
for h in row:
if (h == "cs_user_agent"):
idx = z
z = z + 1
w.writerow(row)
continue
# We only care about the cs_user_agent field - everything else is filled in
cs_user_agent = row[idx]
useragent = unquote_plus(cs_user_agent)
dict = parse_useragent(useragent)
# We have a mapping for the WindowsNT stuff to the more normal names
if dict['osvariant'] == 'Windows NT' and dict['osversion'] in windows_mapping:
dict['osvariant'] = windows_mapping[dict['osversion']]
# Now write it out
orow = []
for xx in header:
if (xx == "cs_user_agent"):
orow.append(cs_user_agent)
else:
orow.append(dict[xx])
w.writerow(orow)