User:Habst/Scripts/makencaatable.py

#!/usr/bin/env python3

import bs4
import sys
import requests

s = bs4.BeautifulSoup(requests.get(sys.argv[1]).text, 'html5lib')

evt = s.find('table', { 'id': 'events' })

tk = {}
for i, th in enumerate(evt.find('tr').find_all('th')):
    tk[th.text.strip()] = i

cheats = {
    'Army West Point': 'Army Black Knights',
}
teams = [ ine for ine in open('fullteams.txt').read().split('\n') if ine.strip() != '' and ine[0] != '#' and 'Lady' not in ine ]
def team_transform(team):
    if team in cheats: return cheats[team]
    team = team.replace('St.', 'State')
    direction = False
    if 'SE' in team:
        teamz = [ team.replace('SE', 'Southeast'), team.replace('SE', 'Southeastern') ]
        direction = True
    elif 'N.' in team:
        teamz = [ team.replace('N.', 'North'), team.replace('N.', 'N') ]
        direction = True
    elif 'Miss ' in team or 'Miss.' in team:
        teamz = [ team, team.replace('Miss.', 'Miss'), team.replace('Miss', 'Mississippi'), team.replace('Miss.', 'Mississippi') ]
    elif 'U.' in team:
        teamz = [ team, team.replace('U.', 'U'), team.replace('U.', 'University') ]
    else:
        teamz = [ team ]
    possibles = []
    for t in teams:
        for tz in teamz:
            if t.startswith(tz + ' ') and ((('State' in t) == ('State' in tz)) or direction) and (('A&M' in t) == ('A&M' in tz)):
                possibles.append(t)
    if len(possibles) > 0:
        return sorted(possibles, key = lambda x: len(x))[0]
    return team

athdis = [
    'Tim Duckworth',
    'Harrison Williams',
    'William Dougherty',
    'Michael Shanahan',
    'Adam Kelly',
    'Anders Eriksson',
    'Vincent Kiprop',
    'Matthew Baxter',
    'Robert Brandt',
    'Jonathan Green',
    'Michael Crozier',
    'Matt Welch',
    'Sean Collins',
    'Craig Hunter',
    'Scott Marshall',
    'Nick Meyer',
    'Jesse Newman',
    'Adrian Williams',
    'Tony White',
    'Sean Richards',
    'Charles Brown',
    'Jonathan Wells',
    'Kenneth Fisher',
    'Jason Smith',
    'Peter Simon',
    'Rachel Wilson',
    'Charlotte Taylor',
    'Erin Clark',
    'Christine Frederick',
    'Margaret Allen',
    'Maddie Gardner',
    'Kate Hall',
    'Lauren Evans',
    'Andrew Gardner',
    'John Rice',
    'John Burt',
    'Elijah Hall',
    'Isaiah Harris',
    'Robert Ford',
    'Jacob Smith',
    'Rodney Rowe',
    'Christian Edwards',
    'Jordan Scott',
    'John Warren',
    'Jeremiah Green',
    'Zachary Johnson',
    'Ty Anderson',
    'Vernon Turner',
    'Greg Thompson',
    'Brian Williams',
    'David Lucas',
    'George Evans',
    'Nicolai Ceban',
    'Carlos Davis',
    'Ashley Taylor',
    'Olivia Baker',
    'Chloe Abbott',
    
]

a2dis = {
    'William Petersson': 'javelin thrower',
    'Ashley Bryant': 'hammer thrower',
    'Michael Norman': 'sprinter',
}

def athdisambig(ath):
    if ath in a2dis:
        return ath + ' (' + a2dis[ath] + ')|' + ath
    elif ath in athdis:
        return ath + ' (athlete)|' + ath
    else:
        return ath

for tr in evt.find_all('tr', class_ = True):
    tds = tr.find_all('td')
    if sys.argv[2] == 'relay':
        teamtext = tds[tk['Team'] + 2].text.strip()
    else:
        athtext = tds[tk['Athlete']].text.strip() if not tds[tk['Athlete']].find('b') else tds[tk['Athlete']].find('b').text.strip()
        athlete = athdisambig(athtext.split()[0] + ' ' + ' '.join(athtext.split()[1:]).title())
        if 'Mc' in athlete:
            athlete = 'Mc'.join([ athlete.split('Mc')[0], athlete.split('Mc')[1].title() ])
        teamtext = tds[tk['Affiliation']].text.strip() if 'Affiliation' in tk else ' '.join(tds[tk['Athlete']].find_all('a')[-1].text.strip().split()[:-1]).strip()
    team = '{{{{College cell|{longteam}|{team}}}}}'.format(
        longteam = team_transform(teamtext.replace(';', '').strip()) + ' track and field',
        team = teamtext.replace(';', ''),
    )
    rank = tds[tk['Place' if 'Place' in tk else 'Pl']].text.strip()
    notes = ''
    if 'PB' in tds[-1].text: 
        notes += ' {{AthAbbr|PB}}'
    if 'MR' in tds[-1].text:
        notes += ' {{AthAbbr|CR}}'
    if 'FR' in tds[-1].text:
        notes += ' {{AthAbbr|FR}}'
    if 'CR' in tds[-1].text:
        notes += ' {{AthAbbr|NCAAR}}'
    notes = notes.strip()
    if rank.isdigit():
        rankfill = rank.zfill(4)
        if int(rank) == 1:
            rank = '{{sort|' + rankfill + '|{{Gold1}}}}'
        elif int(rank) == 2:
            rank = '{{sort|' + rankfill + '|{{Silver2}}}}'
        elif int(rank) == 3:
            rank = '{{sort|' + rankfill + '|{{Bronze3}}}}'
        else:
            rank = '{{sort|' + rankfill + '|' + rank + '}}'
    else:
        rank = '{{sort|9999|' + rank + '}}'
    if sys.argv[2] == 'decathlon':
        print('| {place} ||align=left| [[{athlete}]] || {team}\n! {points}'.format(
            place = rank,
            athlete = athlete,
            team = team,
            points = tds[tk['Points']].text.split()[0],
        ))
        mevts = [ '100m', 'LJ', 'SP', 'HJ', '400m', '110mH', 'DT', 'PV', 'JT', '1500m' ]
        tfcalc = [ 'LJ', 'SP', 'HJ', 'DT', 'PV', 'JT' ]
        for i, mev in enumerate(mevts):
            evtd = tds[tk[mev]]
            if evtd.find('center'):
                points = evtd.find('center').contents[4]
                mark = evtd.find('center').contents[0].split()[0]
                if mev in tfcalc and mark.replace('.', '').isdigit():
                    mark = '{{{{T&Fcalc|{}}}}}'.format(mark)
            else:
                points = ''
                mark = ''
            if i == 0:
                print('| ', end = '')
            else:
                print(' || ', end = '')
            print('{{{{sort|{}|{}<br/>{}}}}}'.format(
                points.zfill(4),
                ("'''" + points + "'''") if points != '' else '',
                mark.strip(),
            ), end = '')
        print()
    elif sys.argv[2] == 'throw':
        mark = tds[tk['Best Mark']].text.split()[0].replace('m', '')
        if mark.replace('.', '').isdigit():
            mark = '{{T&Fcalc|' + mark + '}}'
        print('| {place} ||align=left| [[{athlete}]] || {team} || \'\'\'{mark}\'\'\' || {notes}'.format(
            place = rank,
            athlete = athlete,
            team = team,
            mark = mark,
            notes = notes,
        ))
    elif sys.argv[2] == 'distance':
        print('| {place} ||align=left| [[{athlete}]] || {team} || \'\'\'{time}\'\'\' || {notes}'.format(
            place = rank,
            athlete = athlete,
            team = team,
            time = tds[tk['Time']].text.strip(),
            notes = notes,
        ))
    elif sys.argv[2] == 'fieldwind':
        mark = tds[tk['Best Mark']].text.split()[0].replace('m', '')
        if mark.replace('.', '').isdigit():
            mark = '{{T&Fcalc|' + mark + '}}'
        print('| {place} ||align=left| [[{athlete}]] || {team} || \'\'\'{mark}\'\'\' || {wind} || {notes}'.format(
            place = rank,
            athlete = athlete,
            team = team,
            mark = mark,
            wind = tds[tk['Best Mark'] + 2].text.strip(),
            notes = notes,
        ))
    elif sys.argv[2] == 'jump':
        mark = tds[tk['Best Jump']].text.split()[0].replace('m', '')
        if mark.replace('.', '').isdigit():
            mark = '{{T&Fcalc|' + mark + '}}'
        else:
            mark = '{{AthAbbr|' + mark + '}}'
        print('| {place} ||align=left| [[{athlete}]] || {team} || \'\'\'{mark}\'\'\' || {notes}'.format(
            place = rank,
            athlete = athlete,
            team = team,
            mark = mark,
            notes = notes,
        ))
    elif sys.argv[2] == 'relay':
        print('| {place} || {team} || \'\'\'{time}\'\'\' || {notes}'.format(
            place = rank,
            team = team,
            time = tds[tk['Time']].text.strip(),
            notes = notes,
        ))
    elif sys.argv[2] == 'sprint':
        time = tds[tk['Time']].text.strip()
        print('| {place} ||align=left| [[{athlete}]] || {team} || \'\'\'{time}\'\'\' || {notes}'.format(
            place = rank,
            athlete = athlete,
            team = team,
            time = time,
            #wind = tds[tk['Time'] + 2].text.strip(),
            notes = notes,
        ))
    print('|-')