/** * <p> * An author or editor may be and institution not a person. In that case the * key generator builds very long keys, e.g.: for “The Attributed * Graph Grammar System (AGG)” -> * “TheAttributedGraphGrammarSystemAGG”. * </p> * * <p> * An institution name should be inside <code>{}</code> brackets. If the * institution name also includes its abbreviation this abbreviation should * be also in <code>{}</code> brackets. For the previous example the value * should look like: * <code>{The Attributed Graph Grammar System ({AGG})}</code>. * </p> * * <p> * If an institution includes its abbreviation, i.e. "...({XYZ})", first * such abbreviation should be used as the key value part of such author. * </p> * * <p> * If an institution does not include its abbreviation the key should be * generated form its name in the following way: * </p> * * <p> * The institution value can contain: institution name, part of the * institution, address, etc. Those information should be separated by * comma. Name of the institution and possible part of the institution * should be on the beginning, while address and secondary information * should be on the end. * </p> * * Each part is examined separately: * <ol> * <li>We remove all tokens of a part which are one of the defined ignore * words (the, press), which end with a dot (ltd., co., ...) and which first * character is lowercase (of, on, di, ...).</li> * <li>We detect a type of the part: university, technology institute, * department, school, rest * <ul> * <li>University: <code>"Uni[NameOfTheUniversity]"</code></li> * <li>Department: will be an abbreviation of all words beginning with the * uppercase letter except of words: <code>d[ei]part.*</code>, school, * faculty</li> * <li>School: same as department</li> * <li>Rest: If there are less than 3 tokens in such part than the result * will be by concatenating those tokens, otherwise the result will be build * from the first letters of words starting with and uppercase letter.</li> * </ul> * </ol> * * Parts are concatenated together in the following way: * <ul> * <li>If there is a university part use it otherwise use the rest part.</li> * <li>If there is a school part append it.</li> * <li>If there is a department part and it is not same as school part * append it.</li> * </ul> * * Rest part is only the first part which do not match any other type. All * other parts (address, ...) are ignored. * * @param content the institution to generate a Bibtex key for * @return <ul> * <li>the institutation key</li> * <li>"" in the case of a failure</li> * <li>null if content is null</li> * </ul> */ private static String generateInstitutionKey(String content) { if (content == null) return null; content = unifyDiacritics(content); List<String> ignore = Arrays.asList(new String[]{ "press", "the" }); content = content.replaceAll("^\\{", "").replaceAll("\\}$", ""); Pattern regex = Pattern.compile(".*\\(\\{([A-Z]+)\\}\\).*"); Matcher matcher = regex.matcher(content); if (matcher.matches()) return matcher.group(1); content = removeDiacritics(content); String[] parts = content.split(","); // Key parts String university = null; String department = null; String school = null; String rest = null; for(int index=0; index<parts.length; index++) { List<String> part = new ArrayList<String>(); // Cleanup: remove unnecessary words. for(String k : parts[index].replaceAll("\\{[A-Z]+\\}", "").split("[ \\-_]")) { if ( !k.equals("") // remove empty && !ignore.contains(k.toLowerCase()) // remove ignored words && k.charAt(k.length()-1) != '.' // remove ltd., co., ... && (k.charAt(0)+"").matches("[A-Z]") // remove of, di, ... || k.length()>=3 && k.toLowerCase().substring(0, 2).equals("uni")) { part.add(k); } } boolean isUniversity = false; // university boolean isTechnology = false; // technology institute boolean isDepartment = false; // departments boolean isSchool = false; // schools // Deciding about a part type... for (String k : part) { if (k.length()>=5 && k.toLowerCase().substring(0, 5).equals("univ")) isUniversity = true; if (k.length()>=6 && k.toLowerCase().substring(0, 6).equals("techn")) isTechnology = true; if (k.toLowerCase().equals("school")) isSchool = true; if (k.length()>=7 && k.toLowerCase().substring(0, 7).matches("d[ei]part") || k.length()>=4 && k.toLowerCase().substring(0, 4).equals("lab")) isDepartment = true; } if (isTechnology) isUniversity = false; // technology institute isn't university :-) // University part looks like: Uni[NameOfTheUniversity] // // If university is detected than the previous part is suggested // as department if (isUniversity) { university = "Uni"; for (String k : part) { if (k.length()>=5 && !k.toLowerCase().substring(0, 5).equals("univ")) university += k; } if (index > 0 && department == null) department = parts[index-1]; // School is an abbreviation of all the words beginning with a // capital letter excluding: department, school and faculty words. // // Explicitly defined department part is build the same way as // school } else if (isSchool || isDepartment) { if (isSchool) school = ""; if (isDepartment) department = ""; for (String k : part) { if (k.length()>=7 && !k.toLowerCase().substring(0, 7).matches("d[ei]part") && !k.toLowerCase().equals("school") && !k.toLowerCase().equals("faculty") && !k.replaceAll("[^A-Z]", "").equals("")) { if (isSchool) school += k.replaceAll("[^A-Z]", ""); if (isDepartment) department += k.replaceAll("[^A-Z]", ""); } } // A part not matching university, department nor school. } else if (rest == null) { rest = ""; // Less than 3 parts -> concatenate those if (part.size() < 3) { for (String k : part) rest += k; // More than 3 parts -> use 1st letter abbreviation } else { for (String k : part) { k = k.replaceAll("[^A-Z]", ""); if (!k.equals("")) rest += k; } } } } // Putting parts together. String result = (university==null ? rest : university) + (school == null ? "" : school) + ((department == null || (school != null && department.equals(school))) ? "" : department); return result; }
/** * Generates the SQL required to populate the entry_types table with jabref * data. * * @param out * The output (PrintSream or Connection) object to which the DML * should be written. */ private void populateEntryTypesTable(Object out) throws SQLException { String query = ""; ArrayList<String> fieldRequirement = new ArrayList<String>(); ArrayList<String> existentTypes = new ArrayList<String>(); if (out instanceof Connection) { ResultSet rs = ((Statement) SQLUtil.processQueryWithResults(out, "SELECT label FROM entry_types")).getResultSet(); while (rs.next()) { existentTypes.add(rs.getString(1)); } } for (BibtexEntryType val : BibtexEntryType.ALL_TYPES.values()) { fieldRequirement.clear(); for (int i = 0; i < SQLUtil.getAllFields().size(); i++) { fieldRequirement.add(i, "gen"); } List<String> reqFields = Arrays .asList(val.getRequiredFields() != null ? val .getRequiredFields() : new String[0]); List<String> optFields = Arrays .asList(val.getOptionalFields() != null ? val .getOptionalFields() : new String[0]); List<String> utiFields = Arrays .asList(val.getUtilityFields() != null ? val .getUtilityFields() : new String[0]); fieldRequirement = SQLUtil.setFieldRequirement( SQLUtil.getAllFields(), reqFields, optFields, utiFields, fieldRequirement); if (!existentTypes.contains(val.getName().toLowerCase())) { String insert = "INSERT INTO entry_types (label, " + fieldStr + ") VALUES ("; query = insert + "'" + val.getName().toLowerCase() + "'"; for (int i = 0; i < fieldRequirement.size(); i++) { query = query + ", '" + fieldRequirement.get(i) + "'"; } query = query + ");"; } else { String[] update = fieldStr.split(","); query = "UPDATE entry_types SET \n"; for (int i = 0; i < fieldRequirement.size(); i++) { query += update[i] + "='" + fieldRequirement.get(i) + "',"; } query = query.substring(0, query.lastIndexOf(",")); query += " WHERE label='" + val.getName().toLowerCase() + "'"; } SQLUtil.processQuery(out, query); } }
Clone fragments detected by clone detection tool
File path: /jabref-2.10/src/java/net/sf/jabref/labelPattern/LabelPatternUtil.java File path: /jabref-2.10/src/java/net/sf/jabref/sql/exporter/DBExporter.java
Method name: String generateInstitutionKey(String) Method name: void populateEntryTypesTable(Object)
Number of AST nodes: 2 Number of AST nodes: 2
1
/**
1
/**
2
	 * <p>
2
	 * Generates the SQL required to populate the entry_types table with jabref
3
	 * An author or editor may be and institution not a person. In that case the
3
	 * data.
4
	 * key generator builds very long keys, e.g.: for “The Attributed
4
	 * 
5
	 * Graph Grammar System (AGG)” ->
5
	 * @param out
6
	 * “TheAttributedGraphGrammarSystemAGG”.
6
	 *            The output (PrintSream or Connection) object to which the DML
7
	 * </p>
7
	 *            should be written.
8
	 * 
8
	 */
9
	 * <p>
9
10
	 * An institution name should be inside <code>{}</code> brackets. If the
10
	private void populateEntryTypesTable(Object out) throws SQLException {
11
	 * institution name also includes its abbreviation this abbreviation should
11
		String query = "";
12
	 * be also in <code>{}</code> brackets. For the previous example the value
12
		ArrayList<String> fieldRequirement = new ArrayList<String>();
13
	 * should look like:
13
14
	 * <code>{The Attributed Graph Grammar System ({AGG})}</code>.
14
		ArrayList<String> existentTypes = new ArrayList<String>();
15
	 * </p>
15
		if (out instanceof Connection) {
16
	 * 
16
			ResultSet rs = ((Statement) SQLUtil.processQueryWithResults(out,
17
	 * <p>
17
					"SELECT label FROM entry_types")).getResultSet();
18
	 * If an institution includes its abbreviation, i.e. "...({XYZ})", first
18
			while (rs.next()) {
19
	 * such abbreviation should be used as the key value part of such author.
19
				existentTypes.add(rs.getString(1));
20
	 * </p>
20
			}
21
	 * 
21
		}
22
	 * <p>
22
		for (BibtexEntryType val : BibtexEntryType.ALL_TYPES.values()) {
23
	 * If an institution does not include its abbreviation the key should be
23
			fieldRequirement.clear();
24
	 * generated form its name in the following way:
24
			for (int i = 0; i < SQLUtil.getAllFields().size(); i++) {
25
	 * </p>
25
				fieldRequirement.add(i, "gen");
26
	 * 
26
			}
27
	 * <p>
27
			List<String> reqFields = Arrays
28
	 * The institution value can contain: institution name, part of the
28
					.asList(val.getRequiredFields() != null ? val
29
	 * institution, address, etc. Those information should be separated by
29
							.getRequiredFields() : new String[0]);
30
	 * comma. Name of the institution and possible part of the institution
30
			List<String> optFields = Arrays
31
	 * should be on the beginning, while address and secondary information
31
					.asList(val.getOptionalFields() != null ? val
32
	 * should be on the end.
32
							.getOptionalFields() : new String[0]);
33
	 * </p>
33
			List<String> utiFields = Arrays
34
	 * 
34
					.asList(val.getUtilityFields() != null ? val
35
	 * Each part is examined separately:
35
							.getUtilityFields() : new String[0]);
36
	 * <ol>
36
			fieldRequirement = SQLUtil.setFieldRequirement(
37
	 * <li>We remove all tokens of a part which are one of the defined ignore
37
					SQLUtil.getAllFields(), reqFields, optFields, utiFields,
38
	 * words (the, press), which end with a dot (ltd., co., ...) and which first
38
					fieldRequirement);
39
	 * character is lowercase (of, on, di, ...).</li>
39
			if (!existentTypes.contains(val.getName().toLowerCase())) {
40
	 * <li>We detect a type of the part: university, technology institute,
40
				String insert = "INSERT INTO entry_types (label, " + fieldStr
41
	 * department, school, rest
41
						+ ") VALUES (";
42
	 * <ul>
42
				query = insert + "'" + val.getName().toLowerCase() + "'";
43
	 * <li>University: <code>"Uni[NameOfTheUniversity]"</code></li>
43
				for (int i = 0; i < fieldRequirement.size(); i++) {
44
	 * <li>Department: will be an abbreviation of all words beginning with the
44
					query = query + ", '" + fieldRequirement.get(i) + "'";
45
	 * uppercase letter except of words: <code>d[ei]part.*</code>, school,
45
				}
46
	 * faculty</li>
46
				query = query + ");";
47
	 * <li>School: same as department</li>
47
			} else {
48
	 * <li>Rest: If there are less than 3 tokens in such part than the result
48
				String[] update = fieldStr.split(",");
49
	 * will be by concatenating those tokens, otherwise the result will be build
49
				query = "UPDATE entry_types SET \n";
50
	 * from the first letters of words starting with and uppercase letter.</li>
50
				for (int i = 0; i < fieldRequirement.size(); i++) {
51
	 * </ul>
51
					query += update[i] + "='" + fieldRequirement.get(i) + "',";
52
	 * </ol>
52
				}
53
	 * 
53
				query = query.substring(0, query.lastIndexOf(","));
54
	 * Parts are concatenated together in the following way:
54
				query += " WHERE label='" + val.getName().toLowerCase() + "'";
55
	 * <ul>
55
			}
56
	 * <li>If there is a university part use it otherwise use the rest part.</li>
56
			SQLUtil.processQuery(out, query);
57
	 * <li>If there is a school part append it.</li>
57
		}
58
	 * <li>If there is a department part and it is not same as school part
58
	}
59
	 * append it.</li>
60
	 * </ul>
61
	 * 
62
	 * Rest part is only the first part which do not match any other type. All
63
	 * other parts (address, ...) are ignored.
64
	 * 
65
	 * @param content the institution to generate a Bibtex key for
66
	 * @return <ul>
67
	 *         <li>the institutation key</li>
68
	 *         <li>"" in the case of a failure</li>
69
	 *         <li>null if content is null</li>
70
	 *         </ul>
71
	 */
72
    private static String generateInstitutionKey(String content) {
73
        if (content == null) return null;
74
        content = unifyDiacritics(content);
75
        List<String> ignore = Arrays.asList(new String[]{ "press", "the" });
76
        content = content.replaceAll("^\\{", "").replaceAll("\\}$", "");
77
        Pattern regex = Pattern.compile(".*\\(\\{([A-Z]+)\\}\\).*");
78
        Matcher matcher = regex.matcher(content);
79
        if (matcher.matches())
80
            return matcher.group(1);
81
82
        content = removeDiacritics(content);
83
        String[] parts = content.split(",");
84
85
        // Key parts
86
        String university = null;
87
        String department = null;
88
        String school     = null;
89
        String rest       = null;
90
91
        for(int index=0; index<parts.length; index++) {
92
            List<String> part = new ArrayList<String>();
93
94
            // Cleanup: remove unnecessary words.
95
            for(String k : parts[index].replaceAll("\\{[A-Z]+\\}", "").split("[ \\-_]")) {
96
                if ( !k.equals("") // remove empty
97
                        && !ignore.contains(k.toLowerCase()) // remove ignored words
98
                        && k.charAt(k.length()-1) != '.' // remove ltd., co., ...
99
                        && (k.charAt(0)+"").matches("[A-Z]") // remove of, di, ...
100
                        || k.length()>=3 && k.toLowerCase().substring(0, 2).equals("uni")) {
101
                    part.add(k);
102
                }
103
            }
104
105
            boolean isUniversity = false; // university
106
            boolean isTechnology = false; // technology institute
107
            boolean isDepartment = false; // departments
108
            boolean isSchool     = false; // schools
109
110
            // Deciding about a part type...
111
            for (String k : part) {
112
                if (k.length()>=5 && k.toLowerCase().substring(0, 5).equals("univ"))
113
                    isUniversity = true;
114
                if (k.length()>=6 && k.toLowerCase().substring(0, 6).equals("techn"))
115
                    isTechnology = true;
116
                if (k.toLowerCase().equals("school"))
117
                    isSchool = true;
118
                if (k.length()>=7 && k.toLowerCase().substring(0, 7).matches("d[ei]part")
119
                        || k.length()>=4 && k.toLowerCase().substring(0, 4).equals("lab"))
120
                    isDepartment = true;
121
            }
122
            if (isTechnology) isUniversity = false; // technology institute isn't university :-)
123
124
            // University part looks like: Uni[NameOfTheUniversity]
125
            //
126
            // If university is detected than the previous part is suggested
127
            // as department
128
            if (isUniversity) {
129
                university = "Uni";
130
                for (String k : part) {
131
                    if (k.length()>=5 && !k.toLowerCase().substring(0, 5).equals("univ"))
132
                        university += k;
133
                }
134
                if (index > 0 && department == null)
135
                    department = parts[index-1];
136
137
            // School is an abbreviation of all the words beginning with a
138
            // capital letter excluding: department, school and faculty words.
139
            //
140
            // Explicitly defined department part is build the same way as
141
            // school
142
            } else if (isSchool || isDepartment) {
143
                if (isSchool)
144
                    school = "";
145
                if (isDepartment)
146
                    department = "";
147
148
                for (String k : part) {
149
                    if (k.length()>=7 && !k.toLowerCase().substring(0, 7).matches("d[ei]part")
150
                            && !k.toLowerCase().equals("school")
151
                            && !k.toLowerCase().equals("faculty")
152
                            && !k.replaceAll("[^A-Z]", "").equals("")) {
153
                        if (isSchool)
154
                            school += k.replaceAll("[^A-Z]", "");
155
                        if (isDepartment)
156
                            department += k.replaceAll("[^A-Z]", "");
157
                    }
158
                }
159
            // A part not matching university, department nor school.
160
            } else if (rest == null) {
161
                rest = "";
162
                // Less than 3 parts -> concatenate those
163
                if (part.size() < 3) {
164
                    for (String k : part)
165
                        rest += k;
166
                // More than 3 parts -> use 1st letter abbreviation
167
                } else {
168
                    for (String k : part) {
169
                        k = k.replaceAll("[^A-Z]", "");
170
                        if (!k.equals(""))
171
                            rest += k;
172
                    }
173
                }
174
            }
175
        }
176
177
        // Putting parts together.
178
        String result = (university==null ? rest : university)
179
                + (school == null ? "" : school)
180
                + ((department == null
181
                    || (school != null && department.equals(school))) ?
182
                            "" : department);
183
        return result;
184
    }
  1. {Non-refactorable}
    Mapping Summary
    Number of mapped statements1
    Number of unmapped statements in the first code fragment1
    Number of unmapped statements in the second code fragment1
    Time elapsed for statement mapping (ms)0.0
    Similarity Score0.500
    Clone typeType 2
    Mapped Statements
    ID Statement ID Statement
    57
    for (String k : part)
    57
    for (String k : part)
    19
    for (int i = 0; i < fieldRequirement.size(); i++)
    Differences
    Expression1Expression2Difference
    java.util.Listjava.util.ArrayListSUBCLASS_TYPE_MISMATCH
    java.util.Listjava.util.ArrayListSUBCLASS_TYPE_MISMATCH
    19
    for (int i = 0; i < fieldRequirement.size(); i++)
                                                                                                                  
    20
    query = query + ", '" + fieldRequirement.get(i) + "'";
    Preondition Violations
    Unmatched statement query=query + ", '" + fieldRequirement.get(i)+ "'"; cannot be moved before or after the extracted code, because it has dependencies to/from statements that will be extracted
    20
    query = query + ", '" + fieldRequirement.get(i) + "'";
    58
    rest += k;
    58
    rest += k;
    Preondition Violations
    Unmatched statement rest+=k; cannot be moved before or after the extracted code, because it has dependencies to/from statements that will be extracted
                            
    Precondition Violations (3)
    Row Violation
    1Unmatched statement query=query + ", '" + fieldRequirement.get(i)+ "'"; cannot be moved before or after the extracted code, because it has dependencies to/from statements that will be extracted
    2Unmatched statement rest+=k; cannot be moved before or after the extracted code, because it has dependencies to/from statements that will be extracted
    3Clone fragment #1 returns variable k with type java.lang.String , while Clone fragment #2 returns variable i with type int