1 /**
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 */
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.File;
7 import java.io.FileNotFoundException;
8 import java.io.IOException;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Set;
15
16 import net.sourceforge.pmd.util.FileFinder;
17
18 public class CPD {
19
20 private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
21 private CPDListener listener = new CPDNullListener();
22 private Tokens tokens = new Tokens();
23 private int minimumTileSize;
24 private MatchAlgorithm matchAlgorithm;
25 private Language language;
26 private boolean skipDuplicates;
27 public static boolean debugEnable = false;
28 private String encoding = System.getProperty("file.encoding");
29
30
31 public CPD(int minimumTileSize, Language language) {
32 this.minimumTileSize = minimumTileSize;
33 this.language = language;
34 }
35
36 public void skipDuplicates() {
37 this.skipDuplicates = true;
38 }
39
40 public void setCpdListener(CPDListener cpdListener) {
41 this.listener = cpdListener;
42 }
43
44 public void setEncoding(String encoding) {
45 this.encoding = encoding;
46 }
47
48 public void go() {
49 TokenEntry.clearImages();
50 matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
51 matchAlgorithm.findMatches();
52 }
53
54 public Iterator<Match> getMatches() {
55 return matchAlgorithm.matches();
56 }
57
58 public void add(File file) throws IOException {
59 add(1, file);
60 }
61
62 public void addAllInDirectory(String dir) throws IOException {
63 addDirectory(dir, false);
64 }
65
66 public void addRecursively(String dir) throws IOException {
67 addDirectory(dir, true);
68 }
69
70 public void add(List<File> files) throws IOException {
71 for (File f: files) {
72 add(files.size(), f);
73 }
74 }
75
76 private void addDirectory(String dir, boolean recurse) throws IOException {
77 if (!(new File(dir)).exists()) {
78 throw new FileNotFoundException("Couldn't find directory " + dir);
79 }
80 FileFinder finder = new FileFinder();
81
82 add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
83 }
84
85 private Set<String> current = new HashSet<String>();
86
87 private void add(int fileCount, File file) throws IOException {
88
89 if (skipDuplicates) {
90
91 String signature = file.getName() + '_' + file.length();
92 if (current.contains(signature)) {
93 System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
94 return;
95 }
96 current.add(signature);
97 }
98
99 if (!file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
100 System.err.println("Skipping " + file + " since it appears to be a symlink");
101 return;
102 }
103
104 listener.addedFile(fileCount, file);
105 SourceCode sourceCode = new SourceCode(new SourceCode.FileCodeLoader(file, encoding));
106 language.getTokenizer().tokenize(sourceCode, tokens);
107 source.put(sourceCode.getFileName(), sourceCode);
108 }
109
110 public static Renderer getRendererFromString(String name, String encoding) {
111 if (name.equalsIgnoreCase("text") || name.equals("")) {
112 return new SimpleRenderer();
113 } else if ("xml".equals(name)) {
114 return new XMLRenderer(encoding);
115 } else if ("csv".equals(name)) {
116 return new CSVRenderer();
117 } else if ("vs".equals(name)) {
118 return new VSRenderer();
119 }
120 try {
121 return (Renderer) Class.forName(name).newInstance();
122 } catch (Exception e) {
123 System.out.println("Can't find class '" + name + "', defaulting to SimpleRenderer.");
124 }
125 return new SimpleRenderer();
126 }
127
128 private static boolean findBooleanSwitch(String[] args, String name) {
129 for (int i = 0; i < args.length; i++) {
130 if (args[i].equals(name)) {
131 return true;
132 }
133 }
134 return false;
135 }
136
137 private static String findRequiredStringValue(String[] args, String name) {
138 for (int i = 0; i < args.length; i++) {
139 if (args[i].equals(name)) {
140 return args[i + 1];
141 }
142 }
143 System.out.println("No " + name + " value passed in");
144 usage();
145 throw new RuntimeException();
146 }
147
148 private static String findOptionalStringValue(String[] args, String name, String defaultValue) {
149 for (int i = 0; i < args.length; i++) {
150 if (args[i].equals(name)) {
151 return args[i + 1];
152 }
153 }
154 return defaultValue;
155 }
156
157 public static void main(String[] args) {
158 if (args.length == 0) {
159 usage();
160 }
161
162 try {
163 boolean skipDuplicateFiles = findBooleanSwitch(args, "--skip-duplicate-files");
164 String languageString = findOptionalStringValue(args, "--language", "java");
165 String formatString = findOptionalStringValue(args, "--format", "text");
166 String encodingString = findOptionalStringValue(args, "--encoding", System.getProperty("file.encoding"));
167 int minimumTokens = Integer.parseInt(findRequiredStringValue(args, "--minimum-tokens"));
168 LanguageFactory f = new LanguageFactory();
169 Language language = f.createLanguage(languageString);
170 Renderer renderer = CPD.getRendererFromString(formatString, encodingString);
171 CPD cpd = new CPD(minimumTokens, language);
172 cpd.setEncoding(encodingString);
173 if (skipDuplicateFiles) {
174 cpd.skipDuplicates();
175 }
176
177 boolean missingFiles = true;
178 for (int position = 0; position < args.length; position++) {
179 if (args[position].equals("--files")) {
180 cpd.addRecursively(args[position + 1]);
181 if ( missingFiles ) {
182 missingFiles = false;
183 }
184 }
185 }
186
187 if ( missingFiles ) {
188 System.out.println("No " + "--files" + " value passed in");
189 usage();
190 throw new RuntimeException();
191 }
192
193 cpd.go();
194 System.out.println(renderer.render(cpd.getMatches()));
195 } catch (Exception e) {
196 e.printStackTrace();
197 }
198 }
199
200 private static void usage() {
201 System.out.println("Usage:");
202 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
203 System.out.println("i.e: ");
204 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
205 System.out.println("or: ");
206 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
207 System.out.println("or: ");
208 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
209 }
210
211 }