Gridarta Editor
CSVProcessor.java
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * - Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * - Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  *
15  * - Neither the name of Oracle nor the names of its
16  * contributors may be used to endorse or promote products derived
17  * from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * This source code is provided to illustrate the usage of a given feature
34  * or technique and has been deliberately simplified. Additional steps
35  * required for a production-quality application, such as security checks,
36  * input validation, and proper error handling, might not be present in
37  * this sample code.
38  */
39 
40 import java.io.BufferedReader;
41 import java.io.IOException;
42 import java.nio.file.Files;
43 import java.nio.file.Paths;
44 import java.util.*;
45 import java.util.function.*;
46 import java.util.regex.Pattern;
47 import java.util.stream.Collector;
48 import java.util.stream.Collectors;
49 
50 import static java.lang.Double.parseDouble;
51 import static java.util.stream.Collectors.*;
52 
73 public class CSVProcessor {
74 
75  //Number of characters that may be read
76  private static final int READ_AHEAD_LIMIT = 100_000_000;
77 
84  public static void main(String[] args) {
85  if (args.length < 2) {
87  }
88  try (BufferedReader br = new BufferedReader(
89  Files.newBufferedReader(Paths.get(args[args.length - 1])))) {
90  //Assume that the first line contains column names.
91  List<String> header = Arrays.stream(br.readLine().split(","))
92  .map(String::trim).collect(toList());
93  //Calculate an index of the column in question.
94  int column = getColumnNumber(header, args[1]);
95  switch (args[0]) {
96  case "sort":
97  verifyArgumentNumber(args, 4);
98  //Define the sort order.
99  boolean isAsc;
100  switch (args[2].toUpperCase()) {
101  case "ASC":
102  isAsc = true;
103  break;
104  case "DESC":
105  isAsc = false;
106  break;
107  default:
108  printUsageAndExit("Illegal argument" + args[2]);
109  return;//Should not be reached.
110  }
111  /*
112  * Create a comparator that compares lines by comparing
113  * values in the specified column.
114  */
115  Comparator<String> cmp
116  = Comparator.comparing(str -> getCell(str, column),
117  String.CASE_INSENSITIVE_ORDER);
118  /*
119  * sorted(...) is used to sort records.
120  * forEach(...) is used to output sorted records.
121  */
122  br.lines().sorted(isAsc ? cmp : cmp.reversed())
123  .forEach(System.out::println);
124  break;
125  case "search":
126  verifyArgumentNumber(args, 4);
127  /*
128  * Records are filtered by a regex.
129  * forEach(...) is used to output filtered records.
130  */
131  Predicate<String> pattern
132  = Pattern.compile(args[2]).asPredicate();
133  br.lines().filter(str -> pattern.test(getCell(str, column)))
134  .forEach(System.out::println);
135  break;
136  case "groupby":
137  verifyArgumentNumber(args, 3);
138  /*
139  * Group lines by values in the column with collect(...), and
140  * print with forEach(...) for every distinct value within
141  * the column.
142  */
143  br.lines().collect(
144  Collectors.groupingBy(str -> getCell(str, column),
145  toCollection(TreeSet::new)))
146  .forEach((str, set) -> {
147  System.out.println(str + ":");
148  set.forEach(System.out::println);
149  });
150  break;
151  case "stat":
152  verifyArgumentNumber(args, 3);
153 
154  /*
155  * BufferedReader will be read several times.
156  * Mark this point to return here after each pass.
157  * BufferedReader will be read right after the headers line
158  * because it is already read.
159  */
160  br.mark(READ_AHEAD_LIMIT);
161 
162  /*
163  * Statistics can be collected by a custom collector in one
164  * pass. One pass is preferable.
165  */
166  System.out.println(
167  br.lines().collect(new Statistics(column)));
168 
169  /*
170  * Alternatively, statistics can be collected
171  * by a built-in API in several passes.
172  * This method demonstrates how separate operations can be
173  * implemented using a built-in API.
174  */
175  br.reset();
176  statInSeveralPasses(br, column);
177  break;
178  default:
179  printUsageAndExit("Illegal argument" + args[0]);
180  }
181  } catch (IOException e) {
182  printUsageAndExit(e.toString());
183  }
184  }
185 
186  private static void statInSeveralPasses(BufferedReader br, int column)
187  throws IOException {
188  System.out.println("#-----Statistics in several passes-------#");
189  //Create a comparator to compare records by the column.
190  Comparator<String> comparator
191  = Comparator.comparing(
192  (String str) -> parseDouble(getCell(str, column)));
193  //Find max record by using Collectors.maxBy(...)
194  System.out.println(
195  "Max: " + br.lines().collect(maxBy(comparator)).get());
196  br.reset();
197  //Find min record by using Collectors.minBy(...)
198  System.out.println(
199  "Min: " + br.lines().collect(minBy(comparator)).get());
200  br.reset();
201  //Compute the average value and sum with
202  //Collectors.toDoubleSummaryStatistics(...)
203  DoubleSummaryStatistics doubleSummaryStatistics
204  = br.lines().collect(summarizingDouble(
205  str -> parseDouble(getCell(str, column))));
206  System.out.println("Average: " + doubleSummaryStatistics.getAverage());
207  System.out.println("Sum: " + doubleSummaryStatistics.getSum());
208  }
209 
210  private static void verifyArgumentNumber(String[] args, int n) {
211  if (args.length != n) {
212  printUsageAndExit("Expected " + n + " arguments but was "
213  + args.length);
214  }
215  }
216 
217  private static int getColumnNumber(List<String> header, String name) {
218  int column = header.indexOf(name);
219  if (column == -1) {
220  printUsageAndExit("There is no column with name " + name);
221  }
222  return column;
223  }
224 
225  private static String getCell(String record, int column) {
226  return record.split(",")[column].trim();
227  }
228 
229  private static void printUsageAndExit(String... str) {
230  System.out.println("Usages:");
231 
232  System.out.println("CSVProcessor sort COLUMN_NAME ASC|DESC FILE");
233  System.out.println("Sort lines by column COLUMN_NAME in CSV FILE\n");
234 
235  System.out.println("CSVProcessor search COLUMN_NAME REGEX FILE");
236  System.out.println("Search for REGEX in column COLUMN_NAME in CSV FILE\n");
237 
238  System.out.println("CSVProcessor groupby COLUMN_NAME FILE");
239  System.out.println("Split lines into different groups according to column "
240  + "COLUMN_NAME value\n");
241 
242  System.out.println("CSVProcessor stat COLUMN_NAME FILE");
243  System.out.println("Compute max/min/average/sum statistics by column "
244  + "COLUMN_NAME\n");
245 
246  Arrays.asList(str).forEach(System.err::println);
247  System.exit(1);
248  }
249 
250  /*
251  * This is a custom implementation of the Collector interface.
252  * Statistics are objects gather max,min,sum,average statistics.
253  */
254  private static class Statistics
255  implements Collector<String, Statistics, Statistics> {
256 
257 
258  /*
259  * This implementation does not need to be thread safe because
260  * the parallel implementation of
261  * {@link java.util.stream.Stream#collect Stream.collect()}
262  * provides the necessary partitioning and isolation for safe parallel
263  * execution.
264  */
265  private String maxRecord;
266  private String minRecord;
267 
268  private double sum;
269  private int lineCount;
270  private final BinaryOperator<String> maxOperator;
271  private final BinaryOperator<String> minOperator;
272  private final int column;
273 
274  public Statistics(int column) {
275  this.column = column;
276  Comparator<String> cmp = Comparator.comparing(
277  (String str) -> parseDouble(getCell(str, column)));
278  maxOperator = BinaryOperator.maxBy(cmp);
279  minOperator = BinaryOperator.minBy(cmp);
280  }
281 
282  /*
283  * Process line.
284  */
285  public Statistics accept(String line) {
286  maxRecord = maxRecord == null
287  ? line : maxOperator.apply(maxRecord, line);
288  minRecord = minRecord == null
289  ? line : minOperator.apply(minRecord, line);
290 
291  sum += parseDouble(getCell(line, column));
292  lineCount++;
293  return this;
294  }
295 
296 
297  /*
298  * Merge two Statistics.
299  */
301  maxRecord = maxOperator.apply(maxRecord, stat.getMaxRecord());
302  minRecord = minOperator.apply(minRecord, stat.getMinRecord());
303  sum += stat.getSum();
304  lineCount += stat.getLineCount();
305  return this;
306  }
307 
308  @Override
309  public String toString() {
310  StringBuilder sb = new StringBuilder();
311  sb.append("#------Statistics------#\n");
312  sb.append("Max: ").append(getMaxRecord()).append("\n");
313  sb.append("Min: ").append(getMinRecord()).append("\n");
314  sb.append("Sum = ").append(getSum()).append("\n");
315  sb.append("Average = ").append(average()).append("\n");
316  sb.append("#------Statistics------#\n");
317  return sb.toString();
318  }
319 
320  @Override
321  public Supplier<Statistics> supplier() {
322  return () -> new Statistics(column);
323  }
324 
325  @Override
326  public BiConsumer<Statistics, String> accumulator() {
327  return Statistics::accept;
328  }
329 
330  @Override
331  public BinaryOperator<Statistics> combiner() {
332  return Statistics::combine;
333 
334  }
335 
336  @Override
337  public Function<Statistics, Statistics> finisher() {
338  return stat -> stat;
339  }
340 
341  @Override
342  public Set<Characteristics> characteristics() {
343  return EnumSet.of(Characteristics.IDENTITY_FINISH);
344  }
345 
346  private String getMaxRecord() {
347  return maxRecord;
348  }
349 
350  private String getMinRecord() {
351  return minRecord;
352  }
353 
354  private double getSum() {
355  return sum;
356  }
357 
358  private double average() {
359  return sum / lineCount;
360  }
361 
362  private int getLineCount() {
363  return lineCount;
364  }
365 
366  }
367 
368 }
CSVProcessor.Statistics.combine
Statistics combine(Statistics stat)
Definition: CSVProcessor.java:300
CSVProcessor.Statistics.getMinRecord
String getMinRecord()
Definition: CSVProcessor.java:350
CSVProcessor.Statistics.column
final int column
Definition: CSVProcessor.java:272
CSVProcessor.getCell
static String getCell(String record, int column)
Definition: CSVProcessor.java:225
CSVProcessor.Statistics.getMaxRecord
String getMaxRecord()
Definition: CSVProcessor.java:346
CSVProcessor.Statistics.toString
String toString()
Definition: CSVProcessor.java:309
CSVProcessor.Statistics.accept
Statistics accept(String line)
Definition: CSVProcessor.java:285
CSVProcessor.Statistics.lineCount
int lineCount
Definition: CSVProcessor.java:269
CSVProcessor.statInSeveralPasses
static void statInSeveralPasses(BufferedReader br, int column)
Definition: CSVProcessor.java:186
CSVProcessor.Statistics.maxRecord
String maxRecord
Definition: CSVProcessor.java:265
CSVProcessor.Statistics.Statistics
Statistics(int column)
Definition: CSVProcessor.java:274
CSVProcessor.Statistics.getLineCount
int getLineCount()
Definition: CSVProcessor.java:362
CSVProcessor.READ_AHEAD_LIMIT
static final int READ_AHEAD_LIMIT
Definition: CSVProcessor.java:76
CSVProcessor.Statistics.average
double average()
Definition: CSVProcessor.java:358
CSVProcessor.Statistics.sum
double sum
Definition: CSVProcessor.java:268
CSVProcessor.Statistics.minOperator
final BinaryOperator< String > minOperator
Definition: CSVProcessor.java:271
CSVProcessor.main
static void main(String[] args)
Definition: CSVProcessor.java:84
CSVProcessor.Statistics.minRecord
String minRecord
Definition: CSVProcessor.java:266
CSVProcessor.Statistics.accumulator
BiConsumer< Statistics, String > accumulator()
Definition: CSVProcessor.java:326
CSVProcessor.getColumnNumber
static int getColumnNumber(List< String > header, String name)
Definition: CSVProcessor.java:217
CSVProcessor.Statistics.getSum
double getSum()
Definition: CSVProcessor.java:354
CSVProcessor.Statistics.supplier
Supplier< Statistics > supplier()
Definition: CSVProcessor.java:321
CSVProcessor.Statistics.maxOperator
final BinaryOperator< String > maxOperator
Definition: CSVProcessor.java:270
CSVProcessor.printUsageAndExit
static void printUsageAndExit(String... str)
Definition: CSVProcessor.java:229
name
name
Definition: ArchetypeTypeSetParserTest-ignoreDefaultAttribute1-result.txt:2
CSVProcessor.Statistics.characteristics
Set< Characteristics > characteristics()
Definition: CSVProcessor.java:342
CSVProcessor.verifyArgumentNumber
static void verifyArgumentNumber(String[] args, int n)
Definition: CSVProcessor.java:210
record
This document describes some hints and requirements for general development on the CrossfireEditor If you plan to make changes to the editor code or setup please read the following and keep it in derived from a basic editor application called Gridder by Pasi Ker�nen so please communicate with best through the cf devel mailing before considering any fundamental changes About code DO NOT USE TABS No matter what Java development platform you are please configure insert indent Tabs are displayed totally different in every editor and there are millions of different editors out there The insertion of tabs in the source code is messing up the syntax formatting in a way that is UNREPAIRABLE Apart from please keep code indentation accurate This is not just good it helps to keep code readable and in that way dramatically decreases the chance for overlooked bugs Everyone is welcomed to correct indentation errors wherever they are spotted Before you start to do this please double check that your editor is really configured to insert spaces Line feeds may be checked in either in windows or in unix linux style All reasonable text and java editors can deal with both linefeed formats Converting line feeds is but in this case please make sure that only linefeed characters are changed and nothing else is affected Due to the platform independent nature of the editor has the potential to run on almost any given operating system the build process differs greatly between systems as well as java environments In the several people have attempted to add build scripts along with structural changes to optimize the setup on one particular system environment which has led to conflict Please do *not *attempt to change the structure or any directories for the mere purpose of improving a build process or performance in a java environment Build scripts may be placed in the root it would be especially fine if it is just one or two files but the latter is not required Please excuse me for placing such restriction I and many users of the editor greatly appreciate build scripts We just had some real troubles over this issue in the past and I don t want to have them repeated the editor has relatively high performance requirements I ve spent a lot of extra work to keep everything as fast and memory efficient as possible when you add new data fields or calculations in the archetype please make sure they are as efficient as possible and worth both the time and space they consume Now don t be afraid too much No development would be possible without adding calculations and data at all Just bear in mind unlike for many other open source performance does make a difference for the CrossfireEditor The for as many systems as possible In case you are unexperienced with java and note that the graphics look different on every and with every font They also have different sizes proportions and behave different A seemingly trivial and effectless change can wreck havoc for the same GUI run on another system please don t be totally afraid of just keep it in mind Nobody is gonna eat you alive when your code causes a GUI bug The best way to deal with it is to test on different systems Another good thing is to design all GUIs with care and avoid fixed sizes like wherever you can You might notice that the basic structure of the code obeys the model view controller scheme Too bad it is halfway messed up and but please try to stick with it as good as you can Don t make it worse For the record
Definition: Developer_README.txt:89
CSVProcessor
Definition: CSVProcessor.java:73
CSVProcessor.Statistics.combiner
BinaryOperator< Statistics > combiner()
Definition: CSVProcessor.java:331
CSVProcessor.Statistics.finisher
Function< Statistics, Statistics > finisher()
Definition: CSVProcessor.java:337
CSVProcessor.Statistics
Definition: CSVProcessor.java:254