Initial import
This commit is contained in:
commit
07a5c10be2
151
csv-util.py
Executable file
151
csv-util.py
Executable file
|
@ -0,0 +1,151 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Script to ...
|
||||||
|
#
|
||||||
|
# Minimally tested. Seems to work. Use at your own risk.
|
||||||
|
#
|
||||||
|
# By James Eagan <james.eagan@telecom-paris.fr>
|
||||||
|
# https://james.eagan.fr
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
skipCols = ['SID', 'Email', 'Submission ID', 'Submission Time',
|
||||||
|
'Lateness (H:M:S)', 'View Count', 'Submission Count',
|
||||||
|
]
|
||||||
|
|
||||||
|
def readCSV(fileName, sep=',', decimal_char='.', skipRows=0, skipFooter=0):
|
||||||
|
cols = list(pd.read_csv(fileName, nrows=1, sep=sep, decimal=decimal_char))
|
||||||
|
keepCols = [col for col in cols if col not in skipCols]
|
||||||
|
|
||||||
|
df = pd.read_csv(fileName, usecols=keepCols, sep=sep, decimal=decimal_char, skiprows=skipRows, skipfooter=skipFooter, engine='python')
|
||||||
|
# df = df.sort_values(by='Last Name')
|
||||||
|
return df
|
||||||
|
|
||||||
|
def massageHeaders(df, args):
|
||||||
|
if args.split_ects and 'ECTS' in df.columns:
|
||||||
|
df[['ECTS', 'ECTS attempted']] = df['ECTS'].str.replace(',', '.') \
|
||||||
|
.str.split('/', expand=True) \
|
||||||
|
.astype('float64')
|
||||||
|
|
||||||
|
if args.calc_avg and 'ECTS attempted' in df.columns and 'Note finale' in df.columns:
|
||||||
|
df['Weighted Grade'] = df[["ECTS attempted", "Note finale"]].product(axis=1)
|
||||||
|
attemptedECTS = df['ECTS attempted'].sum()
|
||||||
|
weightedAvg = df["Weighted Grade"].sum() / attemptedECTS
|
||||||
|
earnedECTS = df['ECTS'].sum()
|
||||||
|
mention = "passable" if 10 <= weightedAvg < 12 else \
|
||||||
|
"assez bien" if 12 <= weightedAvg < 14 else \
|
||||||
|
"bien" if 14 <= weightedAvg < 16 else \
|
||||||
|
"très bien" if weightedAvg >= 16 else ""
|
||||||
|
df = df.append({"Occurrence d'UE": "Overall",
|
||||||
|
'Note finale': round(weightedAvg, 2),
|
||||||
|
'ECTS': earnedECTS,
|
||||||
|
'Note finale transposée': mention
|
||||||
|
}, ignore_index=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def writeExcel(df, fileName):
|
||||||
|
with pd.ExcelWriter(fileName) as writer:
|
||||||
|
df.to_excel(writer)
|
||||||
|
# with open(basename + "-out.csv", 'w') as writer:
|
||||||
|
# writer.write(df.to_csv())
|
||||||
|
|
||||||
|
def run(df, commandString, args):
|
||||||
|
for idx, row in df.iterrows():
|
||||||
|
columnValue = lambda match: str( # coerce numbers to strings for commandString
|
||||||
|
row[int( # coerce matches into ints so pandas treats as col number and not name
|
||||||
|
match.group(1))
|
||||||
|
- 1 # use 1-based indexing for columns (pandas uses 0-based, so we subtract)
|
||||||
|
])
|
||||||
|
replacedCommand = re.sub("\$(\d+)", columnValue, commandString)
|
||||||
|
if args.dry_run:
|
||||||
|
print(replacedCommand)
|
||||||
|
else:
|
||||||
|
result = subprocess.run(replacedCommand, capture_output=True, shell=True, text=True)
|
||||||
|
if result.stdout:
|
||||||
|
print(result.stdout)
|
||||||
|
if result.stderr:
|
||||||
|
print(result.stderr, file=sys.stderr)
|
||||||
|
result.check_returncode()
|
||||||
|
if args.delay:
|
||||||
|
time.sleep(args.delay)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description="Massage Gradescope data")
|
||||||
|
parser.add_argument("csv", help="csv file as exported from Gradescope")
|
||||||
|
parser.add_argument("-o", "--out", help="file to write output")
|
||||||
|
group = parser.add_mutually_exclusive_group()
|
||||||
|
group.add_argument("-,", "--commas", help="use commas for decimal separator", action="store_true")
|
||||||
|
group.add_argument("-.", "--dots", help="use dots for decimal separator", action="store_true")
|
||||||
|
group.add_argument("-,.", "--commas2dots", help="convert decimal separator from , to .", action="store_true")
|
||||||
|
group.add_argument("-.,", "--dots2commas", help="convert decimal separator from . to ,", action="store_true")
|
||||||
|
group.add_argument("-d", "--decimal-separator", help="decimal separator for real numbers", default=",")
|
||||||
|
parser.add_argument("-s", "--sep", help="csv column separator (default: ',' when decimal separator is '.' and ';' for ',')")
|
||||||
|
parser.add_argument("--insep", help="input column separator", default=",")
|
||||||
|
# FIXME: These two aren't really general csv options and should be refactored elsewhere.
|
||||||
|
parser.add_argument("--split-ects", help="split ECTS column into two on / separator", action="store_true")
|
||||||
|
parser.add_argument("--calc-avg", help="calculate weighted average from 'Note finale' and 'ECTS'", action="store_true")
|
||||||
|
parser.add_argument("--calc-mentions", help="calculate mentions from avg")
|
||||||
|
parser.add_argument("--run", help="command to run for each row", default=None)
|
||||||
|
parser.add_argument("--dry-run", help="do not run anything (when used with --run)", action="store_true")
|
||||||
|
parser.add_argument("--delay", help="delay in s to add between calls (when used with --run)", default=0, type=float)
|
||||||
|
parser.add_argument("--head", help="limit to first N content lines", metavar="N", action="store", default=False, type=int)
|
||||||
|
parser.add_argument("--tail", help="limit to last N content lines", metavar="N", action="store", default=False, type=int)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# FIXME : broken logic
|
||||||
|
if args.commas2dots:
|
||||||
|
args.indecimal_separator = ','
|
||||||
|
args.decimal_separator = '.'
|
||||||
|
elif args.dots2commas:
|
||||||
|
args.indecimal_separator = '.'
|
||||||
|
args.decimal_separator = ","
|
||||||
|
elif args.dots:
|
||||||
|
args.indecimal_separator = "."
|
||||||
|
args.decimal_separator = "."
|
||||||
|
elif args.commas:
|
||||||
|
args.indecimal_separator = "."
|
||||||
|
args.decimal_separator = ","
|
||||||
|
|
||||||
|
if not args.sep:
|
||||||
|
args.sep = ';' if args.decimal_separator == ',' else ','
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
def writeOutput(df, args):
|
||||||
|
if args.out:
|
||||||
|
writeExcel(df, args.out)
|
||||||
|
elif args.run:
|
||||||
|
run(df, args.run, args)
|
||||||
|
else :
|
||||||
|
df.to_csv(sys.stdout, sep=args.sep, decimal=args.decimal_separator)
|
||||||
|
|
||||||
|
def nowDoIt():
|
||||||
|
args = parse_args()
|
||||||
|
skipFooter = args.head * -1 if args.head and args.head < 0 else 0
|
||||||
|
skipRows = args.tail * -1 if args.tail and args.tail < 0 else 0
|
||||||
|
df = readCSV(args.csv, args.insep, args.indecimal_separator, skipRows, skipFooter)
|
||||||
|
|
||||||
|
if args.head and args.head > 0:
|
||||||
|
df = df.iloc[:args.head]
|
||||||
|
if args.tail and args.tail > 0:
|
||||||
|
df = df.iloc[len(df) - args.tail:]
|
||||||
|
|
||||||
|
df = massageHeaders(df, args)
|
||||||
|
writeOutput(df, args)
|
||||||
|
|
||||||
|
nowDoIt()
|
||||||
|
|
Loading…
Reference in a new issue