Tuesday 13 October 2015

scramble values in csv file

I need to do kind of scrambling, or mixing some values in certain columns if csv file. Kind of security issue. So python takes argument of column place in the file to mix. You can state more than one column. It keeps first 8 number and shuffles rest. You change this part for more.
SHELL PART
I do process controls here, for not running twice or more.

#!/bin/sh
#used for process control
probe=SMTH
if [ -e /tmp/.transferfile$probe.pid ]
then
  PID=`cat /tmp/.transferfile$probe.pid`
  PROC=`ps $PID | wc -l`
  if [ $PROC -gt 1 ]
    then
    exit 0
  fi
fi


echo $$ > /tmp/.transferfile$probe.pid


ls /folder| grep ^filename  | while read file 
do
        /usr/bin/python /pathto/xdrscramble.py -i /folder/$file -o /newfolder/ -c 7 9 16  
       

done







 PYTHON PART


import csv
import os
import sys
import getopt
import random
from random import shuffle
keep = 8


def main(argv):
        inputfile = ''
        outputfile = ''
        try:
                opts, args = getopt.getopt(argv,"hi:o:c",["ifile=","ofile="])
        except getopt.GetoptError:
                print 'xdrscamble.py -i  -o '
                sys.exit(2)
        for opt, arg in opts:
                if opt == '-h':
                        print 'xdrscamble.py -i  -o  -c column1 column2'
                        sys.exit()
                elif opt in ("-i", "--ifile"):
                        inputfile = arg
                        inputdir = os.path.dirname (inputfile)
                        inputfilename = os.path.basename (inputfile)
                elif opt in ("-o", "--ofile"):
                        outputfile = arg
                elif opt in ("-c", "--columns"):
                        mixthem  = args
        f = open(inputfile)
        xdrfile = csv.reader(f)
        print inputdir+ "/" +  "tmp_"+inputfilename
        fo = open(inputdir+ "/" +  "tmp_"+inputfilename,'wb')
        writer= csv.writer(fo)
        for row in xdrfile:
                for columns in mixthem:
                        columns=int(columns)
                        columns = columns -1
                        if row[columns] != "_":
                                x=row[columns]
                                rowx=row[columns][keep:]
                                y=mixup(rowx)
                        row[columns] = row[columns][0:keep]
                writer.writerow(row)
        fo.close
        f.close
        os.system ("mv " + inputdir+ "/" +  "tmp_" + inputfilename + " " + outputfile + "/" + inputfilename )  
        os.remove (inputfile)







def mixup(rowx):
        as_list_of_letters = list(rowx)
        random.shuffle(as_list_of_letters)
        return ''.join(as_list_of_letters)



if __name__ == "__main__":
        main(sys.argv[1:])