Skip to content

Commit c6a8d56

Browse files
committed
Create WordFrequency.java
1 parent 134dfcc commit c6a8d56

File tree

1 file changed

+362
-0
lines changed

1 file changed

+362
-0
lines changed

WordFrequency.java

Lines changed: 362 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
import java.util.*;
2+
import java.io.*;
3+
import java.lang.*;
4+
5+
6+
public class WordFrequency
7+
{
8+
//private ArrayList<WordPair> list = new ArrayList<WordPair>();
9+
private Map<String, Integer> map = new LinkedHashMap<String, Integer>();
10+
private int threshold = 0;
11+
12+
public static void main(String[] args)
13+
{
14+
WordFrequency hw = new WordFrequency();
15+
if(args.length==0||args.length==1)
16+
{
17+
System.err.println("Error: Mode and filename expected");
18+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
19+
System.out.println("Utility to count the occurences of each word in a text file.");
20+
System.out.println("MODE is one of:");
21+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
22+
System.out.println(" --by-word sort by words, alphabetically");
23+
System.out.println(" --by-orig show words in order of first appearance");
24+
}
25+
else if(args.length==2)
26+
{
27+
if(args[0].equals("--by-freq"))
28+
{
29+
try
30+
{
31+
File readIn = new File(args[1]);
32+
hw.storeInfo(readIn);
33+
hw.sortByFreq();
34+
}
35+
catch(IOException e)
36+
{
37+
System.err.println("Error: Mode and filename expected");
38+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
39+
System.out.println("Utility to count the occurences of each word in a text file.");
40+
System.out.println("MODE is one of:");
41+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
42+
System.out.println(" --by-word sort by words, alphabetically");
43+
System.out.println(" --by-orig show words in order of first appearance");
44+
}
45+
46+
}
47+
else if(args[0].equals("--by-word"))
48+
{
49+
try
50+
{
51+
File readIn = new File(args[1]);
52+
hw.storeInfo(readIn);
53+
hw.sortByWord();
54+
}
55+
catch(IOException e)
56+
{
57+
System.err.println("Error: Mode and filename expected");
58+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
59+
System.out.println("Utility to count the occurences of each word in a text file.");
60+
System.out.println("MODE is one of:");
61+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
62+
System.out.println(" --by-word sort by words, alphabetically");
63+
System.out.println(" --by-orig show words in order of first appearance");
64+
}
65+
}
66+
else if(args[0].equals("--by-orig"))
67+
{
68+
try
69+
{
70+
File readIn = new File(args[1]);
71+
hw.storeInfo(readIn);
72+
hw.sortByOrig();
73+
}
74+
catch(IOException e)
75+
{
76+
System.err.println("Error: Mode and filename expected");
77+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
78+
System.out.println("Utility to count the occurences of each word in a text file.");
79+
System.out.println("MODE is one of:");
80+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
81+
System.out.println(" --by-word sort by words, alphabetically");
82+
System.out.println(" --by-orig show words in order of first appearance");
83+
}
84+
}
85+
else
86+
{
87+
System.err.println("Error: Mode and filename expected");
88+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
89+
System.out.println("Utility to count the occurences of each word in a text file.");
90+
System.out.println("MODE is one of:");
91+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
92+
System.out.println(" --by-word sort by words, alphabetically");
93+
System.out.println(" --by-orig show words in order of first appearance");
94+
}
95+
96+
}
97+
else if(args.length==3)
98+
{
99+
if(args[0].equals("--by-freq"))
100+
{
101+
String[] threshold = args[1].split("=");
102+
if(threshold.length==2&&threshold[0].equals("--threshold"))
103+
{
104+
hw.setThreshold(Integer.parseInt(threshold[1]));
105+
try
106+
{
107+
File readIn = new File(args[2]);
108+
hw.storeInfo(readIn);
109+
hw.sortByFreq();
110+
}
111+
catch(IOException e)
112+
{
113+
System.err.println("Error: Mode and filename expected");
114+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
115+
System.out.println("Utility to count the occurences of each word in a text file.");
116+
System.out.println("MODE is one of:");
117+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
118+
System.out.println(" --by-word sort by words, alphabetically");
119+
System.out.println(" --by-orig show words in order of first appearance");
120+
}
121+
}
122+
else
123+
{
124+
System.err.println("Error: Mode and filename expected");
125+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
126+
System.out.println("Utility to count the occurences of each word in a text file.");
127+
System.out.println("MODE is one of:");
128+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
129+
System.out.println(" --by-word sort by words, alphabetically");
130+
System.out.println(" --by-orig show words in order of first appearance");
131+
}
132+
133+
}
134+
else if(args[0].equals("--by-word"))
135+
{
136+
String[] threshold = args[1].split("=");
137+
if(threshold.length==2&&threshold[0].equals("--threshold"))
138+
{
139+
hw.setThreshold(Integer.parseInt(threshold[1]));
140+
try
141+
{
142+
File readIn = new File(args[2]);
143+
hw.storeInfo(readIn);
144+
hw.sortByWord();
145+
}
146+
catch(IOException e)
147+
{
148+
System.err.println("Error: Mode and filename expected");
149+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
150+
System.out.println("Utility to count the occurences of each word in a text file.");
151+
System.out.println("MODE is one of:");
152+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
153+
System.out.println(" --by-word sort by words, alphabetically");
154+
System.out.println(" --by-orig show words in order of first appearance");
155+
}
156+
}
157+
else
158+
{
159+
System.err.println("Error: Mode and filename expected");
160+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
161+
System.out.println("Utility to count the occurences of each word in a text file.");
162+
System.out.println("MODE is one of:");
163+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
164+
System.out.println(" --by-word sort by words, alphabetically");
165+
System.out.println(" --by-orig show words in order of first appearance");
166+
}
167+
168+
}
169+
else if(args[0].equals("--by-orig"))
170+
{
171+
String[] threshold = args[1].split("=");
172+
if(threshold.length==2&&threshold[0].equals("--threshold"))
173+
{
174+
//System.out.println("I am here");
175+
hw.setThreshold(Integer.parseInt(threshold[1]));
176+
try
177+
{
178+
File readIn = new File(args[2]);
179+
hw.storeInfo(readIn);
180+
hw.sortByOrig();
181+
}
182+
catch(IOException e)
183+
{
184+
//System.out.println("isItException?");
185+
System.err.println("Error: Mode and filename expected");
186+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
187+
System.out.println("Utility to count the occurences of each word in a text file.");
188+
System.out.println("MODE is one of:");
189+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
190+
System.out.println(" --by-word sort by words, alphabetically");
191+
System.out.println(" --by-orig show words in order of first appearance");
192+
}
193+
}
194+
else
195+
{
196+
System.err.println("Error: Mode and filename expected");
197+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
198+
System.out.println("Utility to count the occurences of each word in a text file.");
199+
System.out.println("MODE is one of:");
200+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
201+
System.out.println(" --by-word sort by words, alphabetically");
202+
System.out.println(" --by-orig show words in order of first appearance");
203+
}
204+
205+
}
206+
else
207+
{
208+
System.err.println("Error: Mode and filename expected");
209+
System.out.println("Usage: java WordFrequency <MODE> [--threshold=NUM] <TEXTFILE>");
210+
System.out.println("Utility to count the occurences of each word in a text file.");
211+
System.out.println("MODE is one of:");
212+
System.out.println(" --by-freq sort by frequency count, from highest to lowest");
213+
System.out.println(" --by-word sort by words, alphabetically");
214+
System.out.println(" --by-orig show words in order of first appearance");
215+
}
216+
217+
}
218+
}
219+
220+
/*
221+
* Cite: Book
222+
* Title: Introduction to JAVA programming comprehensive version Tenth Edition
223+
* Author: Y.Daniel Liang
224+
* Page: 815
225+
* Citation: Line 235-249
226+
*
227+
*
228+
*/
229+
public void storeInfo(File readIn) throws IOException
230+
{
231+
Scanner in = new Scanner(readIn);
232+
in.useDelimiter("[^A-Za-z']+");
233+
while(in.hasNext())
234+
{
235+
String text = in.next();
236+
String word = text.toLowerCase();
237+
if(word.length()>0)
238+
{
239+
if(!this.map.containsKey(word))
240+
{
241+
this.map.put(word,1);
242+
}
243+
else
244+
{
245+
int value = this.map.get(word);
246+
value++;
247+
this.map.put(word,value);
248+
}
249+
}
250+
251+
252+
}
253+
254+
}
255+
public void sortByOrig()
256+
{
257+
258+
Set<Map.Entry<String, Integer>> entrySet = this.map.entrySet();
259+
for(Map.Entry<String, Integer> entry: entrySet)
260+
{
261+
if(entry.getValue()>this.threshold)
262+
{
263+
System.out.printf("%18s: %d%n", entry.getKey(), entry.getValue());
264+
}
265+
}
266+
267+
}
268+
269+
public void sortByWord()
270+
{
271+
Map<String, Integer> treeMap = new TreeMap<String, Integer>(this.map);
272+
Set<Map.Entry<String, Integer>> entrySet = treeMap.entrySet();
273+
for(Map.Entry<String, Integer> entry: entrySet)
274+
{
275+
if(entry.getValue()>this.threshold)
276+
{
277+
System.out.printf("%18s: %d%n", entry.getKey(), entry.getValue());
278+
}
279+
}
280+
}
281+
282+
/*
283+
* Cite:
284+
* URL: http://stackoverflow.com/questions/109383/how-to-sort-a-mapkey-
285+
* value-on-the-values-in-java
286+
* Date:July 10th
287+
*
288+
* Description: I referenced to the algorithum it is used on the web page
289+
* in order to achieve the goal to sort the entries based on the value
290+
* instead of the key
291+
*
292+
*
293+
*/
294+
public void sortByFreq()
295+
{
296+
FreqComparator comparator = new FreqComparator(this.map);
297+
TreeMap<String, Integer> treeMap = new TreeMap<String, Integer>(comparator);
298+
treeMap.putAll(this.map);
299+
Set<Map.Entry<String, Integer>> entrySet = treeMap.entrySet();
300+
for(Map.Entry<String, Integer> entry: entrySet)
301+
{
302+
if(entry.getValue()>this.threshold)
303+
{
304+
System.out.printf("%18s: %d%n", entry.getKey(), entry.getValue());
305+
}
306+
}
307+
308+
}
309+
private void setThreshold(int holder)
310+
{
311+
this.threshold = holder;
312+
}
313+
private class FreqComparator implements Comparator<String>
314+
{
315+
Map<String, Integer> base;
316+
public FreqComparator(Map<String, Integer> base)
317+
{
318+
this.base = base;
319+
}
320+
public int compare(String a, String b)
321+
{
322+
if(base.get(a)>base.get(b))
323+
{
324+
return -1;
325+
}
326+
else if(base.get(a)<base.get(b))
327+
{
328+
return 1;
329+
}
330+
else
331+
{
332+
return a.compareTo(b);
333+
}
334+
}
335+
}
336+
/*private class WordPair
337+
{
338+
private String word;
339+
private int frequency;
340+
public WordPair(String word, int frequency)
341+
{
342+
setFrequency(frequency);
343+
setWord(word);
344+
}
345+
public String getWord()
346+
{
347+
return this.word;
348+
}
349+
public int getFrequency()
350+
{
351+
return this.frequency;
352+
}
353+
public void setWord(String word)
354+
{
355+
this.word = word;
356+
}
357+
public void setFrequency(int frequency)
358+
{
359+
this.frequency = frequency;
360+
}
361+
}*/
362+
}

0 commit comments

Comments
 (0)