csv-util/csv-util.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Script to ...
#
# Minimally tested.  Seems to work.  Use at your own risk.
#
# By James Eagan <james.eagan@telecom-paris.fr>
# https://james.eagan.fr

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import re
import subprocess
import sys
import time

skipCols = ['SID', 'Email', 'Submission ID', 'Submission Time', 
            'Lateness (H:M:S)', 'View Count', 'Submission Count',
           ]

def readCSV(fileName, sep=',', decimal_char='.', skipRows=0, skipFooter=0):
    cols = list(pd.read_csv(fileName, nrows=1, sep=sep, decimal=decimal_char))
    keepCols = [col for col in cols if col not in skipCols]
    
    df = pd.read_csv(fileName, usecols=keepCols, sep=sep, decimal=decimal_char, skiprows=skipRows, skipfooter=skipFooter, engine='python')
    # df = df.sort_values(by='Last Name')
    return df

def massageHeaders(df, args):
    if args.split_ects and 'ECTS' in df.columns:
        df[['ECTS', 'ECTS attempted']] = df['ECTS'].str.replace(',', '.') \
                                            .str.split('/', expand=True) \
                                            .astype('float64')

    if args.calc_avg and 'ECTS attempted' in df.columns and 'Note finale' in df.columns:
        df['Weighted Grade'] = df[["ECTS attempted", "Note finale"]].product(axis=1)
        attemptedECTS = df['ECTS attempted'].sum()
        weightedAvg = df["Weighted Grade"].sum() / attemptedECTS
        earnedECTS = df['ECTS'].sum()
        mention = "passable" if 10 <= weightedAvg < 12 else \
                    "assez bien" if 12 <= weightedAvg < 14 else \
                    "bien" if 14 <= weightedAvg < 16 else \
                    "très bien" if weightedAvg >= 16 else ""
        df = df.append({"Occurrence d'UE": "Overall",
                        'Note finale': round(weightedAvg, 2),
                        'ECTS': earnedECTS,
                        'Note finale transposée': mention
                        }, ignore_index=True)

    return df

def writeExcel(df, fileName):
    with pd.ExcelWriter(fileName) as writer:
        df.to_excel(writer)
#     with open(basename + "-out.csv", 'w') as writer:
#         writer.write(df.to_csv())

def run(df, commandString, args):
    for idx, row in df.iterrows():
        columnValue = lambda match: str( # coerce numbers to strings for commandString
            row[int( # coerce matches into ints so pandas treats as col number and not name
                match.group(1)) 
                - 1 # use 1-based indexing for columns (pandas uses 0-based, so we subtract)
            ])
        replacedCommand = re.sub("\$(\d+)", columnValue, commandString)
        if args.dry_run:
            print(replacedCommand)
        else:
            result = subprocess.run(replacedCommand, capture_output=True, shell=True, text=True)
            if result.stdout:
                print(result.stdout)
            if result.stderr:
                print(result.stderr, file=sys.stderr)
            result.check_returncode()
        if args.delay:
            time.sleep(args.delay)


if __name__ == '__main__':
    import argparse
    
    def parse_args():
        parser = argparse.ArgumentParser(description="Massage Gradescope data")
        parser.add_argument("csv", help="csv file as exported from Gradescope")
        parser.add_argument("-o", "--out", help="file to write output")
        group = parser.add_mutually_exclusive_group()
        group.add_argument("-,", "--commas", help="use commas for decimal separator", action="store_true")
        group.add_argument("-.", "--dots", help="use dots for decimal separator", action="store_true")
        group.add_argument("-,.", "--commas2dots", help="convert decimal separator from , to .", action="store_true")
        group.add_argument("-.,", "--dots2commas", help="convert decimal separator from . to ,", action="store_true")
        group.add_argument("-d", "--decimal-separator", help="decimal separator for real numbers", default=",")
        parser.add_argument("-s", "--sep", help="csv column separator (default: ',' when decimal separator is '.' and ';' for ',')")
        parser.add_argument("--insep", help="input column separator", default=",")
        # FIXME: These two aren't really general csv options and should be refactored elsewhere.
        parser.add_argument("--split-ects", help="split ECTS column into two on / separator", action="store_true")
        parser.add_argument("--calc-avg", help="calculate weighted average from 'Note finale' and 'ECTS'", action="store_true")
        parser.add_argument("--calc-mentions", help="calculate mentions from avg")
        parser.add_argument("--run", help="command to run for each row", default=None)
        parser.add_argument("--dry-run", help="do not run anything (when used with --run)", action="store_true")
        parser.add_argument("--delay", help="delay in s to add between calls (when used with --run)", default=0, type=float)
        parser.add_argument("--head", help="limit to first N content lines", metavar="N", action="store", default=False, type=int)
        parser.add_argument("--tail", help="limit to last N content lines", metavar="N", action="store", default=False, type=int)

        args = parser.parse_args()

        # FIXME : broken logic
        if args.commas2dots:
            args.indecimal_separator = ','
            args.decimal_separator = '.'
        elif args.dots2commas:
            args.indecimal_separator = '.'
            args.decimal_separator = ","
        elif args.dots:
            args.indecimal_separator = "."
            args.decimal_separator = "."
        elif args.commas:
            args.indecimal_separator = "."
            args.decimal_separator = ","

        if not args.sep:
            args.sep = ';' if args.decimal_separator == ',' else ','

        return args
    
    def writeOutput(df, args):
        if args.out:
            writeExcel(df, args.out)
        elif args.run:
            run(df, args.run, args)
        else :
            df.to_csv(sys.stdout, sep=args.sep, decimal=args.decimal_separator)
    
    def nowDoIt():
        args = parse_args()
        skipFooter = args.head * -1 if args.head and args.head < 0 else 0
        skipRows = args.tail * -1 if args.tail and args.tail < 0 else 0
        df = readCSV(args.csv, args.insep, args.indecimal_separator, skipRows, skipFooter)

        if args.head and args.head > 0:
            df = df.iloc[:args.head]
        if args.tail and args.tail > 0:
            df = df.iloc[len(df) - args.tail:]

        df = massageHeaders(df, args)
        writeOutput(df, args)

    nowDoIt()
Initial import 2024-10-08 18:23:25 +02:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`# Script to ...`
			`#`
			`# Minimally tested. Seems to work. Use at your own risk.`
			`#`
			`# By James Eagan <james.eagan@telecom-paris.fr>`
			`# https://james.eagan.fr`

			`import warnings`
			`warnings.simplefilter(action='ignore', category=FutureWarning)`

			`import pandas as pd`
			`import re`
			`import subprocess`
			`import sys`
			`import time`

			`skipCols = ['SID', 'Email', 'Submission ID', 'Submission Time',`
			`'Lateness (H:M:S)', 'View Count', 'Submission Count',`
			`]`

			`def readCSV(fileName, sep=',', decimal_char='.', skipRows=0, skipFooter=0):`
			`cols = list(pd.read_csv(fileName, nrows=1, sep=sep, decimal=decimal_char))`
			`keepCols = [col for col in cols if col not in skipCols]`

			`df = pd.read_csv(fileName, usecols=keepCols, sep=sep, decimal=decimal_char, skiprows=skipRows, skipfooter=skipFooter, engine='python')`
			`# df = df.sort_values(by='Last Name')`
			`return df`

			`def massageHeaders(df, args):`
			`if args.split_ects and 'ECTS' in df.columns:`
			`df[['ECTS', 'ECTS attempted']] = df['ECTS'].str.replace(',', '.') \`
			`.str.split('/', expand=True) \`
			`.astype('float64')`

			`if args.calc_avg and 'ECTS attempted' in df.columns and 'Note finale' in df.columns:`
			`df['Weighted Grade'] = df[["ECTS attempted", "Note finale"]].product(axis=1)`
			`attemptedECTS = df['ECTS attempted'].sum()`
			`weightedAvg = df["Weighted Grade"].sum() / attemptedECTS`
			`earnedECTS = df['ECTS'].sum()`
			`mention = "passable" if 10 <= weightedAvg < 12 else \`
			`"assez bien" if 12 <= weightedAvg < 14 else \`
			`"bien" if 14 <= weightedAvg < 16 else \`
			`"très bien" if weightedAvg >= 16 else ""`
			`df = df.append({"Occurrence d'UE": "Overall",`
			`'Note finale': round(weightedAvg, 2),`
			`'ECTS': earnedECTS,`
			`'Note finale transposée': mention`
			`}, ignore_index=True)`

			`return df`

			`def writeExcel(df, fileName):`
			`with pd.ExcelWriter(fileName) as writer:`
			`df.to_excel(writer)`
			`# with open(basename + "-out.csv", 'w') as writer:`
			`# writer.write(df.to_csv())`

			`def run(df, commandString, args):`
			`for idx, row in df.iterrows():`
			`columnValue = lambda match: str( # coerce numbers to strings for commandString`
			`row[int( # coerce matches into ints so pandas treats as col number and not name`
			`match.group(1))`
			`- 1 # use 1-based indexing for columns (pandas uses 0-based, so we subtract)`
			`])`
			`replacedCommand = re.sub("\$(\d+)", columnValue, commandString)`
			`if args.dry_run:`
			`print(replacedCommand)`
			`else:`
			`result = subprocess.run(replacedCommand, capture_output=True, shell=True, text=True)`
			`if result.stdout:`
			`print(result.stdout)`
			`if result.stderr:`
			`print(result.stderr, file=sys.stderr)`
			`result.check_returncode()`
			`if args.delay:`
			`time.sleep(args.delay)`


			`if __name__ == '__main__':`
			`import argparse`

			`def parse_args():`
			`parser = argparse.ArgumentParser(description="Massage Gradescope data")`
			`parser.add_argument("csv", help="csv file as exported from Gradescope")`
			`parser.add_argument("-o", "--out", help="file to write output")`
			`group = parser.add_mutually_exclusive_group()`
			`group.add_argument("-,", "--commas", help="use commas for decimal separator", action="store_true")`
			`group.add_argument("-.", "--dots", help="use dots for decimal separator", action="store_true")`
			`group.add_argument("-,.", "--commas2dots", help="convert decimal separator from , to .", action="store_true")`
			`group.add_argument("-.,", "--dots2commas", help="convert decimal separator from . to ,", action="store_true")`
			`group.add_argument("-d", "--decimal-separator", help="decimal separator for real numbers", default=",")`
			`parser.add_argument("-s", "--sep", help="csv column separator (default: ',' when decimal separator is '.' and ';' for ',')")`
			`parser.add_argument("--insep", help="input column separator", default=",")`
			`# FIXME: These two aren't really general csv options and should be refactored elsewhere.`
			`parser.add_argument("--split-ects", help="split ECTS column into two on / separator", action="store_true")`
			`parser.add_argument("--calc-avg", help="calculate weighted average from 'Note finale' and 'ECTS'", action="store_true")`
			`parser.add_argument("--calc-mentions", help="calculate mentions from avg")`
			`parser.add_argument("--run", help="command to run for each row", default=None)`
			`parser.add_argument("--dry-run", help="do not run anything (when used with --run)", action="store_true")`
			`parser.add_argument("--delay", help="delay in s to add between calls (when used with --run)", default=0, type=float)`
			`parser.add_argument("--head", help="limit to first N content lines", metavar="N", action="store", default=False, type=int)`
			`parser.add_argument("--tail", help="limit to last N content lines", metavar="N", action="store", default=False, type=int)`

			`args = parser.parse_args()`

			`# FIXME : broken logic`
			`if args.commas2dots:`
			`args.indecimal_separator = ','`
			`args.decimal_separator = '.'`
			`elif args.dots2commas:`
			`args.indecimal_separator = '.'`
			`args.decimal_separator = ","`
			`elif args.dots:`
			`args.indecimal_separator = "."`
			`args.decimal_separator = "."`
			`elif args.commas:`
			`args.indecimal_separator = "."`
			`args.decimal_separator = ","`

			`if not args.sep:`
			`args.sep = ';' if args.decimal_separator == ',' else ','`

			`return args`

			`def writeOutput(df, args):`
			`if args.out:`
			`writeExcel(df, args.out)`
			`elif args.run:`
			`run(df, args.run, args)`
			`else :`
			`df.to_csv(sys.stdout, sep=args.sep, decimal=args.decimal_separator)`

			`def nowDoIt():`
			`args = parse_args()`
			`skipFooter = args.head * -1 if args.head and args.head < 0 else 0`
			`skipRows = args.tail * -1 if args.tail and args.tail < 0 else 0`
			`df = readCSV(args.csv, args.insep, args.indecimal_separator, skipRows, skipFooter)`

			`if args.head and args.head > 0:`
			`df = df.iloc[:args.head]`
			`if args.tail and args.tail > 0:`
			`df = df.iloc[len(df) - args.tail:]`

			`df = massageHeaders(df, args)`
			`writeOutput(df, args)`

			`nowDoIt()`