From 07a5c10be270a3516c7e0e00bec90de1502f358b Mon Sep 17 00:00:00 2001 From: James Eagan Date: Tue, 8 Oct 2024 10:23:25 -0600 Subject: [PATCH] Initial import --- csv-util.py | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100755 csv-util.py diff --git a/csv-util.py b/csv-util.py new file mode 100755 index 0000000..d2d3176 --- /dev/null +++ b/csv-util.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Script to ... +# +# Minimally tested. Seems to work. Use at your own risk. +# +# By James Eagan +# https://james.eagan.fr + +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +import pandas as pd +import re +import subprocess +import sys +import time + +skipCols = ['SID', 'Email', 'Submission ID', 'Submission Time', + 'Lateness (H:M:S)', 'View Count', 'Submission Count', + ] + +def readCSV(fileName, sep=',', decimal_char='.', skipRows=0, skipFooter=0): + cols = list(pd.read_csv(fileName, nrows=1, sep=sep, decimal=decimal_char)) + keepCols = [col for col in cols if col not in skipCols] + + df = pd.read_csv(fileName, usecols=keepCols, sep=sep, decimal=decimal_char, skiprows=skipRows, skipfooter=skipFooter, engine='python') + # df = df.sort_values(by='Last Name') + return df + +def massageHeaders(df, args): + if args.split_ects and 'ECTS' in df.columns: + df[['ECTS', 'ECTS attempted']] = df['ECTS'].str.replace(',', '.') \ + .str.split('/', expand=True) \ + .astype('float64') + + if args.calc_avg and 'ECTS attempted' in df.columns and 'Note finale' in df.columns: + df['Weighted Grade'] = df[["ECTS attempted", "Note finale"]].product(axis=1) + attemptedECTS = df['ECTS attempted'].sum() + weightedAvg = df["Weighted Grade"].sum() / attemptedECTS + earnedECTS = df['ECTS'].sum() + mention = "passable" if 10 <= weightedAvg < 12 else \ + "assez bien" if 12 <= weightedAvg < 14 else \ + "bien" if 14 <= weightedAvg < 16 else \ + "très bien" if weightedAvg >= 16 else "" + df = df.append({"Occurrence d'UE": "Overall", + 'Note finale': round(weightedAvg, 2), + 'ECTS': earnedECTS, + 'Note finale transposée': mention + }, ignore_index=True) + + return df + +def writeExcel(df, fileName): + with pd.ExcelWriter(fileName) as writer: + df.to_excel(writer) +# with open(basename + "-out.csv", 'w') as writer: +# writer.write(df.to_csv()) + +def run(df, commandString, args): + for idx, row in df.iterrows(): + columnValue = lambda match: str( # coerce numbers to strings for commandString + row[int( # coerce matches into ints so pandas treats as col number and not name + match.group(1)) + - 1 # use 1-based indexing for columns (pandas uses 0-based, so we subtract) + ]) + replacedCommand = re.sub("\$(\d+)", columnValue, commandString) + if args.dry_run: + print(replacedCommand) + else: + result = subprocess.run(replacedCommand, capture_output=True, shell=True, text=True) + if result.stdout: + print(result.stdout) + if result.stderr: + print(result.stderr, file=sys.stderr) + result.check_returncode() + if args.delay: + time.sleep(args.delay) + + +if __name__ == '__main__': + import argparse + + def parse_args(): + parser = argparse.ArgumentParser(description="Massage Gradescope data") + parser.add_argument("csv", help="csv file as exported from Gradescope") + parser.add_argument("-o", "--out", help="file to write output") + group = parser.add_mutually_exclusive_group() + group.add_argument("-,", "--commas", help="use commas for decimal separator", action="store_true") + group.add_argument("-.", "--dots", help="use dots for decimal separator", action="store_true") + group.add_argument("-,.", "--commas2dots", help="convert decimal separator from , to .", action="store_true") + group.add_argument("-.,", "--dots2commas", help="convert decimal separator from . to ,", action="store_true") + group.add_argument("-d", "--decimal-separator", help="decimal separator for real numbers", default=",") + parser.add_argument("-s", "--sep", help="csv column separator (default: ',' when decimal separator is '.' and ';' for ',')") + parser.add_argument("--insep", help="input column separator", default=",") + # FIXME: These two aren't really general csv options and should be refactored elsewhere. + parser.add_argument("--split-ects", help="split ECTS column into two on / separator", action="store_true") + parser.add_argument("--calc-avg", help="calculate weighted average from 'Note finale' and 'ECTS'", action="store_true") + parser.add_argument("--calc-mentions", help="calculate mentions from avg") + parser.add_argument("--run", help="command to run for each row", default=None) + parser.add_argument("--dry-run", help="do not run anything (when used with --run)", action="store_true") + parser.add_argument("--delay", help="delay in s to add between calls (when used with --run)", default=0, type=float) + parser.add_argument("--head", help="limit to first N content lines", metavar="N", action="store", default=False, type=int) + parser.add_argument("--tail", help="limit to last N content lines", metavar="N", action="store", default=False, type=int) + + args = parser.parse_args() + + # FIXME : broken logic + if args.commas2dots: + args.indecimal_separator = ',' + args.decimal_separator = '.' + elif args.dots2commas: + args.indecimal_separator = '.' + args.decimal_separator = "," + elif args.dots: + args.indecimal_separator = "." + args.decimal_separator = "." + elif args.commas: + args.indecimal_separator = "." + args.decimal_separator = "," + + if not args.sep: + args.sep = ';' if args.decimal_separator == ',' else ',' + + return args + + def writeOutput(df, args): + if args.out: + writeExcel(df, args.out) + elif args.run: + run(df, args.run, args) + else : + df.to_csv(sys.stdout, sep=args.sep, decimal=args.decimal_separator) + + def nowDoIt(): + args = parse_args() + skipFooter = args.head * -1 if args.head and args.head < 0 else 0 + skipRows = args.tail * -1 if args.tail and args.tail < 0 else 0 + df = readCSV(args.csv, args.insep, args.indecimal_separator, skipRows, skipFooter) + + if args.head and args.head > 0: + df = df.iloc[:args.head] + if args.tail and args.tail > 0: + df = df.iloc[len(df) - args.tail:] + + df = massageHeaders(df, args) + writeOutput(df, args) + + nowDoIt() + \ No newline at end of file