Skip to content

Commit 8b65a7c

Browse files
committed
csv splitter
1 parent 681685c commit 8b65a7c

File tree

2 files changed

+268
-0
lines changed

2 files changed

+268
-0
lines changed

12_csv_split.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
### WIP
2+
3+
import sys
4+
import os
5+
import getopt
6+
import csv
7+
8+
"""
9+
Splits a CSV file into multiple pieces based on command line arguments.
10+
11+
Arguments:
12+
`-h`: help file of usage of the script
13+
`-i`: input file name
14+
`-o`: output file, A %s-style template for the numbered output files.
15+
`-r`: row limit to split
16+
`-c`: A %s-style template for the numbered output files.
17+
18+
Default settings:
19+
`output_path` is the current directory
20+
`keep_headers` is on (headers will be kept)
21+
`delimeter` is ,
22+
23+
Example usage:
24+
# split by every 10000 rows
25+
>> python 12_csv_split.py -i input.csv -o rownumber -r 10000
26+
# split by unique items in column 0
27+
>> python 12_csv_split.py -i input.csv -o userid -c 0
28+
# access help
29+
>> python 12_csv_split.py -h for help
30+
31+
"""
32+
33+
def main(argv):
34+
35+
argument_dict = grab_command_line_arguments(argv)
36+
parse_file(argument_dict)
37+
38+
39+
def grab_command_line_arguments(argv):
40+
41+
# global variables
42+
inputfile = ''
43+
outputfile = ''
44+
rowlimit = ''
45+
columnindex = ''
46+
argument_dict = {}
47+
48+
# grab arguments
49+
opts, args = getopt.getopt(argv,"hi:o:r:c:",["ifile=","ofile=","rowlimit=","columnindex="])
50+
51+
# end if no arguments provided
52+
if not opts:
53+
print "No options provided. Try again. Use `-h` for help."
54+
sys.exit()
55+
56+
# grab arguments
57+
for opt, arg in opts:
58+
if opt == '-h':
59+
print 'csvsplit.py -i <inputfile> -r <row limit> -c <column index> -o <outputfile>'
60+
sys.exit()
61+
elif opt in ("-i", "--ifile"):
62+
inputfile = arg
63+
elif opt in ("-o", "--ofile"):
64+
outputfile = arg
65+
elif opt in ("-r", "--rowlimit"):
66+
rowlimit = arg
67+
elif opt in ("-c", "--columnindex"):
68+
columnindex = arg
69+
70+
# Output arguments
71+
print "\nArguments:"
72+
if inputfile:
73+
argument_dict["input_file"] = inputfile
74+
print "Input file is '{}'".format(inputfile)
75+
else:
76+
"Please enter an input file."
77+
if outputfile:
78+
argument_dict["output_file"] = outputfile
79+
print "Output file is '{}'".format(outputfile)
80+
else:
81+
print "Please enter an output file."
82+
if rowlimit:
83+
argument_dict["rowlimit"] = rowlimit
84+
print "Rowlimit is '{}'".format(rowlimit)
85+
if columnindex:
86+
argument_dict["columnindex"] = columnindex
87+
print "Columnindex is '{}'".format(columnindex)
88+
if rowlimit and columnindex:
89+
print "Please use either a rowlimit or columnlimit, not both."
90+
sys.exit()
91+
if not rowlimit or columnindex:
92+
print "Please enter either a rowlimit or columnlimit."
93+
sys.exit()
94+
95+
# to do - check to make sure file, rowlimit, and columnlimit exist
96+
print argument_dict
97+
return argument_dict
98+
99+
100+
def parse_file(argument_dict):
101+
102+
#split csv file by certain rownumber
103+
if argument_dict["rowlimit"]:
104+
rowlimit = int(argument_dict["rowlimit"])
105+
output_name_file = "{}.csv".format(argument_dict["output_file"])
106+
output_path='.'
107+
keep_headers=True
108+
delimiter=','
109+
filehandler = open(argument_dict["input_file"],'r')
110+
reader = csv.reader(filehandler, delimiter=delimiter)
111+
current_piece = 1
112+
current_out_path = os.path.join(
113+
output_path,
114+
output_name_file
115+
)
116+
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
117+
current_limit = rowlimit
118+
if keep_headers:
119+
headers = reader.next()
120+
current_out_writer.writerow(headers)
121+
for i, row in enumerate(reader):
122+
if i + 1 > current_limit:
123+
current_piece += 1
124+
current_limit = rowlimit * current_piece
125+
current_out_path = os.path.join(
126+
output_path,
127+
output_name_file
128+
)
129+
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
130+
131+
# elif columnindex: #split csv file accrording to unique values of certain column,it's like filter only certain item in excel
132+
# itemlist = []
133+
# columnindex = int(columnindex)
134+
# output_name_template= outputfile+'_%s.csv'
135+
# output_path='.'
136+
# keep_headers=True
137+
# delimiter=','
138+
# filehandler = open(inputfile,'r')
139+
# reader = csv.reader(filehandler, delimiter=delimiter)
140+
# if keep_headers:
141+
# headers = reader.next()
142+
143+
# for i, row in enumerate(reader):
144+
145+
# current_out_path = os.path.join(
146+
# output_path,
147+
# output_name_template % row[columnindex] )
148+
# if row[columnindex] not in itemlist:
149+
# try:
150+
# current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
151+
# except IOError:
152+
# continue
153+
# else:
154+
# itemlist.append(row[columnindex])
155+
# if keep_headers:
156+
# current_out_writer.writerow(headers)
157+
# current_out_writer.writerow(row)
158+
# else:
159+
# current_out_writer = csv.writer(open(current_out_path, 'a'), delimiter=delimiter)
160+
# current_out_writer.writerow(row)
161+
# print 'totally %i unique items in column %i \n' % (len(itemlist),columnindex)
162+
# else:
163+
# print "oops, please check instruction of script by >>./csvsplit.py -h"
164+
165+
166+
if __name__ == "__main__":
167+
main(sys.argv[1:])

12_sample_csv.csv

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
First Name,Last Name,Email Address,Phone Number,Company,Date Hired
2+
Abigail,Branch,[email protected],(412) 540-6276,Sem Eget PC,07/02/2013
3+
Roanna,Lambert,[email protected],(747) 536-6748,Eget Laoreet Foundation,11/23/2013
4+
Amanda,England,[email protected],(669) 164-6411,Magna Nec Quam Limited,08/11/2012
5+
Hilel,Chapman,[email protected],(683) 531-0279,Sed Molestie PC,06/25/2012
6+
Basia,Bowers,[email protected],(135) 986-6437,Tincidunt Nunc Ac Associates,05/11/2013
7+
Dylan,Dunlap,[email protected],(877) 604-4603,Eu Ultrices Institute,07/02/2012
8+
Regan,Cardenas,[email protected],(693) 378-7235,Neque Morbi Corporation,10/30/2012
9+
Sade,Green,[email protected],(816) 255-5508,Eleifend Ltd,09/03/2012
10+
Marshall,Richardson,[email protected],(460) 132-4621,Purus Maecenas Libero LLC,12/21/2012
11+
Regina,Brown,[email protected],(185) 963-9365,Vulputate Consulting,06/16/2013
12+
Irma,Rivers,[email protected],(701) 393-3679,Nec Leo Morbi Incorporated,05/07/2013
13+
Rudyard,Cline,[email protected],(971) 228-3147,Risus Quis Consulting,04/25/2013
14+
Justina,Richmond,[email protected],(755) 103-3125,Ullamcorper Associates,02/12/2013
15+
Reece,Blackburn,[email protected],(239) 528-2742,Suspendisse Associates,04/03/2014
16+
Lillith,Holden,[email protected],(305) 797-1579,Dapibus Id Blandit LLP,09/11/2013
17+
Taylor,Vinson,[email protected],(355) 993-1099,Egestas Institute,05/16/2012
18+
Colton,Barker,[email protected],(705) 978-5992,Ornare Consulting,04/24/2013
19+
Vladimir,Walls,[email protected],(311) 406-4856,Faucibus Ut Nulla LLP,08/12/2012
20+
Freya,Rowland,[email protected],(284) 850-7506,Turpis PC,05/31/2013
21+
Cullen,Phelps,[email protected],(425) 280-1763,Rhoncus Id Mollis Consulting,09/10/2013
22+
Boris,Lopez,[email protected],(769) 701-0055,Nunc Sed Orci Industries,07/26/2013
23+
Alvin,Meyer,[email protected],(783) 312-0821,Dignissim Pharetra Ltd,03/02/2013
24+
Nicole,Boyle,[email protected],(675) 678-1160,Dictum Eleifend Nunc LLC,05/05/2012
25+
Flynn,Petersen,[email protected],(787) 543-7411,Penatibus Et Associates,03/11/2013
26+
Troy,Herman,[email protected],(932) 900-7922,Dolor Donec Associates,11/16/2012
27+
Constance,Shields,[email protected],(221) 761-2368,Vel Quam Company,02/14/2014
28+
Ocean,Green,[email protected],(481) 832-0298,Nunc Associates,03/03/2013
29+
Steven,Lopez,[email protected],(294) 415-0435,Ipsum Company,07/25/2013
30+
Adara,Lee,[email protected],(760) 291-7826,Eu Ultrices PC,10/05/2013
31+
Noble,Hancock,[email protected],(333) 272-8234,Vitae Risus Duis LLC,09/13/2012
32+
Kendall,Wilcox,[email protected],(173) 982-4381,Ultrices Industries,01/26/2013
33+
Sebastian,Barton,[email protected],(951) 817-9217,In Mi Pede Corporation,05/11/2014
34+
Gavin,Clark,[email protected],(671) 714-8378,Vestibulum Neque Limited,06/06/2012
35+
Charles,Woods,[email protected],(559) 935-9739,Amet Ante Company,09/02/2013
36+
Elvis,Roberts,[email protected],(184) 182-5324,Facilisis Vitae Inc.,01/07/2014
37+
Caldwell,Carey,[email protected],(125) 243-9354,Egestas Lacinia Sed Inc.,10/24/2012
38+
Jesse,Leblanc,[email protected],(726) 216-8000,Lectus Ltd,11/22/2013
39+
Hu,Adkins,[email protected],(370) 317-7556,Aliquam Vulputate Company,10/19/2013
40+
Hamilton,Tyler,[email protected],(234) 744-3868,Nunc Sed LLC,10/19/2012
41+
Cade,Osborn,[email protected],(501) 753-9793,Consectetuer Industries,08/14/2013
42+
Ashely,Kent,[email protected],(789) 869-6558,Imperdiet Ornare Corporation,02/04/2013
43+
Veda,Cameron,[email protected],(522) 127-0654,Egestas Incorporated,12/29/2012
44+
Burke,Ferrell,[email protected],(975) 891-3694,Purus Accumsan Institute,07/26/2013
45+
Fuller,Lamb,[email protected],(523) 614-5785,Pede Cum Sociis Limited,12/02/2013
46+
Natalie,Taylor,[email protected],(117) 594-2685,A Facilisis Non LLP,12/06/2013
47+
Astra,Morton,[email protected],(390) 867-2558,Non Ante Bibendum Foundation,05/07/2012
48+
David,Espinoza,[email protected],(287) 945-5239,Lobortis Nisi Nibh Industries,05/11/2014
49+
Sybil,Todd,[email protected],(611) 848-4765,Massa Mauris Vestibulum Incorporated,01/19/2013
50+
Lee,Barron,[email protected],(765) 654-9167,In Ornare Inc.,01/01/2013
51+
Zachery,Reed,[email protected],(667) 465-1222,Ac Corp.,10/07/2012
52+
Marshall,Brady,[email protected],(391) 336-5310,Ac Sem Ut Incorporated,07/12/2012
53+
Selma,Floyd,[email protected],(398) 920-1076,Non Foundation,07/21/2012
54+
Ivy,Garrison,[email protected],(428) 321-5542,Semper Erat Foundation,12/19/2013
55+
Wyatt,Gibbs,[email protected],(973) 141-9840,Pellentesque Corp.,11/21/2013
56+
Vaughan,Moss,[email protected],(597) 730-0228,Tempor Institute,10/27/2013
57+
Elijah,Mcgowan,[email protected],(127) 171-1859,Tempor Bibendum Donec LLC,08/26/2012
58+
Miranda,Ingram,[email protected],(864) 873-7359,Feugiat Non Lobortis Institute,08/20/2012
59+
Anastasia,Lawrence,[email protected],(106) 260-8688,Sit Amet Consulting,05/31/2012
60+
Samson,Patton,[email protected],(302) 330-4251,Hendrerit Associates,12/27/2013
61+
Erasmus,Sexton,[email protected],(972) 793-9187,Feugiat Industries,10/15/2013
62+
Emery,Gardner,[email protected],(848) 534-1656,Nunc Sit Amet Industries,08/24/2012
63+
Nomlanga,Hensley,[email protected],(644) 169-6243,Consectetuer Company,08/29/2012
64+
Jason,Craft,[email protected],(691) 770-9143,Blandit LLC,03/23/2013
65+
Kathleen,Haley,[email protected],(891) 454-8400,Lorem Company,07/02/2012
66+
Aline,Flynn,[email protected],(563) 400-6803,Et Netus LLP,01/28/2013
67+
Ursa,Dickson,[email protected],(371) 615-7750,Nullam Company,12/22/2012
68+
Wesley,Lopez,[email protected],(287) 777-3724,Lobortis Ultrices Vivamus Corp.,06/17/2013
69+
Victoria,Mcleod,[email protected],(583) 108-1294,Justo Faucibus Lectus Corporation,10/17/2012
70+
Shana,Roach,[email protected],(921) 385-2342,Quis Turpis Vitae Incorporated,05/26/2014
71+
Maxine,Ruiz,[email protected],(520) 801-0808,Luctus Foundation,12/05/2013
72+
Harriet,Bishop,[email protected],(758) 716-9401,Dictum Phasellus In Inc.,09/08/2013
73+
Serina,Williams,[email protected],(270) 288-0136,At Egestas A Corporation,03/17/2014
74+
Rhea,Copeland,[email protected],(775) 493-9118,Ipsum Incorporated,05/22/2013
75+
Evan,Holcomb,[email protected],(695) 656-8621,Sem Institute,02/16/2013
76+
Basil,Mccall,[email protected],(144) 989-4125,Feugiat Tellus Lorem Institute,02/25/2013
77+
Florence,Riley,[email protected],(663) 529-4829,Enim Sit PC,01/14/2014
78+
Heather,Peck,[email protected],(850) 444-0917,Curabitur Limited,01/16/2014
79+
Dara,Robinson,[email protected],(106) 576-1355,Urna Incorporated,12/15/2012
80+
Kylan,Maxwell,[email protected],(973) 206-2558,Aliquam Eros Turpis Company,08/21/2012
81+
Petra,Blake,[email protected],(901) 207-9872,Ac Metus Institute,06/17/2013
82+
Fiona,Goff,[email protected],(265) 255-7749,Odio Phasellus Corp.,12/03/2012
83+
Kameko,Diaz,[email protected],(731) 354-4848,Montes Nascetur Corporation,08/16/2013
84+
Craig,Valentine,[email protected],(437) 229-8198,Etiam Gravida Molestie Consulting,05/06/2014
85+
Samson,Cunningham,[email protected],(335) 666-7758,Nec Ante Associates,07/02/2013
86+
Yoko,Rogers,[email protected],(893) 405-6889,Fermentum Vel Mauris Corp.,03/29/2014
87+
Walter,Burnett,[email protected],(336) 411-9222,Suscipit Est Institute,06/26/2012
88+
Gisela,Nash,[email protected],(917) 249-0166,Non Magna LLP,11/23/2012
89+
Wanda,Pierce,[email protected],(480) 872-3389,Cum Sociis Natoque Limited,11/02/2013
90+
Jane,Dixon,[email protected],(112) 139-8563,Id Ante Dictum LLC,03/14/2014
91+
Octavius,Shannon,[email protected],(541) 652-3295,Libero Est Institute,05/28/2014
92+
Rigel,Hunt,[email protected],(792) 358-7505,Enim PC,09/05/2013
93+
Rachel,Gray,[email protected],(165) 973-1366,Suscipit Nonummy Fusce LLC,05/08/2013
94+
Madeline,Bradley,[email protected],(436) 223-3135,Posuere PC,01/24/2014
95+
Emma,Conner,[email protected],(304) 429-2622,Nulla Incorporated,11/05/2013
96+
Halee,Mclean,[email protected],(669) 364-0148,Ligula Consulting,03/05/2014
97+
Conan,Williams,[email protected],(999) 649-4433,Velit Eu Limited,05/15/2014
98+
Martena,Fowler,[email protected],(405) 661-1762,Blandit Nam Institute,02/27/2013
99+
Robin,Buckley,[email protected],(376) 771-9862,Sed Corp.,10/30/2012
100+
Isadora,Adams,[email protected],(138) 774-6058,Blandit Viverra Donec Institute,08/07/2012
101+
Bernard,Price,[email protected],(368) 882-6146,Egestas Blandit LLP,11/03/2013

0 commit comments

Comments
 (0)