1 | /** | | 1 | /** |
2 | * Parse the entries in the source, and return a List of BibtexEntry | | 2 | * Parse the entries in the source, and return a List of BibtexEntry |
3 | * objects. | | 3 | * objects. |
4 | */ | | 4 | */ |
5 | public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { | | 5 | public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { |
6 | ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>(); | | 6 | |
7 | StringBuffer sb = new StringBuffer(); | | 7 | ArrayList<BibtexEntry> bibItems = new ArrayList<BibtexEntry>(); |
8 | BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); | | 8 | BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); |
9 | String str; | | 9 | String line; |
10 | while ((str = in.readLine()) != null){ | | 10 | HashMap<String, String> hm = new HashMap<String, String>(); |
11 | sb.append(str); | | 11 | HashMap<String, StringBuffer> lines = new HashMap<String, StringBuffer>(); |
12 | sb.append("\n"); | | 12 | StringBuffer previousLine = null; |
13 | } | | 13 | while ((line = in.readLine()) != null){ |
14 | String[] entries = sb.toString().replaceAll("\u2013", "-").replaceAll("\u2014", "--").replaceAll("\u2015", "--").split("ER -.*\\n"); | | 14 | if (line.length() == 0) continue; // ignore empty lines, e.g. at file |
15 | | | 15 | // end |
16 | for (int i = 0; i < entries.length; i++){ | | 16 | // entry delimiter -> item complete |
17 | | | 17 | if (line.equals("------")){ |
18 | if (entries[i].trim().length() == 0) | | 18 | String[] type = new String[2]; |
19 | continue; | | 19 | String[] pages = new String[2]; |
20 | | | 20 | String country = null; |
21 | String type = "", author = "", editor = "", startPage = "", endPage = "", | | 21 | String address = null; |
22 | comment = ""; | | 22 | String titleST = null; |
23 | HashMap<String, String> hm = new HashMap<String, String>(); | | 23 | String titleTI = null; |
24 | | | 24 | Vector<String> comments = new Vector<String>(); |
25 | | | 25 | // add item |
26 | String[] fields = entries[i].split("\n"); | | 26 | for (Map.Entry<String, StringBuffer> entry : lines.entrySet()){ |
27 | | | 27 | if (entry.getKey().equals("AU")) hm.put("author", entry.getValue() |
28 | for (int j = 0; j < fields.length; j++){ | | 28 | .toString()); |
29 | StringBuffer current = new StringBuffer(fields[j]); | | 29 | else if (entry.getKey().equals("TI")) titleTI = entry.getValue() |
30 | boolean done = false; | | 30 | .toString(); |
31 | while (!done && (j < fields.length-1)) { | | 31 | else if (entry.getKey().equals("ST")) titleST = entry.getValue() |
32 | if ((fields[j+1].length() >= 6) && !fields[j+1].substring(2, 6).equals(" - ")) { | | 32 | .toString(); |
33 | if ((current.length() > 0) | | 33 | else if (entry.getKey().equals("YP")) hm.put("year", entry |
34 | && !Character.isWhitespace(current.charAt(current.length()-1)) | | 34 | .getValue().toString()); |
35 | && !Character.isWhitespace(fields[j+1].charAt(0))) | | 35 | else if (entry.getKey().equals("VL")) hm.put("volume", entry |
36 | current.append(' '); | | 36 | .getValue().toString()); |
37 | current.append(fields[j+1]); | | 37 | else if (entry.getKey().equals("NB")) hm.put("number", entry |
38 | j++; | | 38 | .getValue().toString()); |
39 | } else | | 39 | else if (entry.getKey().equals("PS")) pages[0] = entry.getValue() |
40 | done = true; | | 40 | .toString(); |
41 | } | | 41 | else if (entry.getKey().equals("PE")) pages[1] = entry.getValue() |
42 | String entry = current.toString(); | | 42 | .toString(); |
43 | if (entry.length() < 6) continue; | | 43 | else if (entry.getKey().equals("KW")) hm.put("keywords", entry |
44 | else{ | | 44 | .getValue().toString()); |
45 | String lab = entry.substring(0, 2); | | 45 | //else if (entry.getKey().equals("RM")) |
46 | String val = entry.substring(6).trim(); | | 46 | // hm.put("",entry.getValue().toString()); |
47 | if (lab.equals("TY")){ | | 47 | //else if (entry.getKey().equals("RU")) |
48 | if (val.equals("BOOK")) type = "book"; | | 48 | // hm.put("",entry.getValue().toString()); |
49 | else if (val.equals("JOUR") || val.equals("MGZN")) type = "article"; | | 49 | else if (entry.getKey().equals("RT")) type[0] = entry.getValue() |
50 | else if (val.equals("THES")) type = "phdthesis"; | | 50 | .toString(); |
51 | else if (val.equals("UNPB")) type = "unpublished"; | | 51 | else if (entry.getKey().equals("SB")) comments.add("Subject: " |
52 | else if (val.equals("RPRT")) type = "techreport"; | | 52 | + entry.getValue().toString()); |
53 | else if (val.equals("CONF")) type = "inproceedings"; | | 53 | else if (entry.getKey().equals("SA")) comments |
54 | else if (val.equals("CHAP")) type = "incollection";//"inbook"; | | 54 | .add("Secondary Authors: " + entry.getValue().toString()); |
55 | | | 55 | else if (entry.getKey().equals("NT")) hm.put("note", entry |
56 | else type = "other"; | | 56 | .getValue().toString()); |
57 | }else if (lab.equals("T1") || lab.equals("TI")) { | | 57 | //else if (entry.getKey().equals("PP")) |
58 | String oldVal = hm.get("title"); | | 58 | // hm.put("",entry.getValue().toString()); |
59 | if (oldVal == null) | | 59 | else if (entry.getKey().equals("PB")) hm.put("publisher", entry |
60 | hm.put("title", val); | | 60 | .getValue().toString()); |
61 | else { | | 61 | else if (entry.getKey().equals("TA")) comments |
62 | if (oldVal.endsWith(":") || oldVal.endsWith(".") || oldVal.endsWith("?")) | | 62 | .add("Tertiary Authors: " + entry.getValue().toString()); |
63 | hm.put("title", oldVal+" "+val); | | 63 | else if (entry.getKey().equals("TT")) comments |
64 | else | | 64 | .add("Tertiary Title: " + entry.getValue().toString()); |
65 | hm.put("title", oldVal+": "+val); | | 65 | else if (entry.getKey().equals("ED")) hm.put("edition", entry |
66 | } | | 66 | .getValue().toString()); |
67 | } | | 67 | //else if (entry.getKey().equals("DP")) |
68 | // = | | 68 | // hm.put("",entry.getValue().toString()); |
69 | // val; | | 69 | else if (entry.getKey().equals("TW")) type[1] = entry.getValue() |
70 | else if (lab.equals("T2") || lab.equals("T3") || lab.equals("BT")) { | | 70 | .toString(); |
71 | hm.put("booktitle", val); | | 71 | else if (entry.getKey().equals("QA")) comments |
72 | } | | 72 | .add("Quaternary Authors: " + entry.getValue().toString()); |
73 | else if (lab.equals("AU") || lab.equals("A1")) { | | 73 | else if (entry.getKey().equals("QT")) comments |
74 | if (author.equals("")) // don't add " and " for the first author | | 74 | .add("Quaternary Title: " + entry.getValue().toString()); |
75 | author = val; | | 75 | else if (entry.getKey().equals("IS")) hm.put("isbn", entry |
76 | else author += " and " + val; | | 76 | .getValue().toString()); |
77 | } | | 77 | //else if (entry.getKey().equals("LA")) |
78 | else if (lab.equals("A2")){ | | 78 | // hm.put("",entry.getValue().toString()); |
79 | if (editor.equals("")) // don't add " and " for the first editor | | 79 | else if (entry.getKey().equals("AB")) hm.put("abstract", entry |
80 | editor = val; | | 80 | .getValue().toString()); |
81 | else editor += " and " + val; | | 81 | //else if (entry.getKey().equals("DI")) |
82 | } | | 82 | // hm.put("",entry.getValue().toString()); |
83 | else if (lab.equals("JA") || lab.equals("JF") || lab.equals("JO")) { | | 83 | //else if (entry.getKey().equals("DM")) |
84 | if (type.equals("inproceedings")) | | 84 | // hm.put("",entry.getValue().toString()); |
85 | hm.put("booktitle", val); | | 85 | //else if (entry.getKey().equals("AV")) |
86 | else | | 86 | // hm.put("",entry.getValue().toString()); |
87 | hm.put("journal", val); | | 87 | //else if (entry.getKey().equals("PR")) |
88 | } | | 88 | // hm.put("",entry.getValue().toString()); |
89 | | | 89 | //else if (entry.getKey().equals("LO")) |
90 | else if (lab.equals("SP")) startPage = val; | | 90 | // hm.put("",entry.getValue().toString()); |
91 | else if (lab.equals("PB")) { | | 91 | else if (entry.getKey().equals("AD")) address = entry.getValue() |
92 | if (type.equals("phdthesis")) | | 92 | .toString(); |
93 | hm.put("school", val); | | 93 | else if (entry.getKey().equals("LG")) hm.put("language", entry |
94 | else | | 94 | .getValue().toString()); |
95 | hm.put("publisher", val); | | 95 | else if (entry.getKey().equals("CO")) country = entry.getValue() |
96 | } | | 96 | .toString(); |
97 | else if (lab.equals("AD") || lab.equals("CY")) | | 97 | else if (entry.getKey().equals("UR") || entry.getKey().equals("AT")){ |
98 | hm.put("address", val); | | 98 | String s = entry.getValue().toString().trim(); |
99 | else if (lab.equals("EP")) endPage = val; | | 99 | hm.put(s.startsWith("http://") || s.startsWith("ftp://") ? "url" |
100 | else if (lab.equals("SN")) | | 100 | : "pdf", entry.getValue().toString()); |
101 | hm.put("issn", val); | | 101 | }else if (entry.getKey().equals("C1")) comments.add("Custom1: " |
102 | else if (lab.equals("VL")) hm.put("volume", val); | | 102 | + entry.getValue().toString()); |
103 | else if (lab.equals("IS")) hm.put("number", val); | | 103 | else if (entry.getKey().equals("C2")) comments.add("Custom2: " |
104 | else if (lab.equals("N2") || lab.equals("AB")) { | | 104 | + entry.getValue().toString()); |
105 | String oldAb = hm.get("abstract"); | | 105 | else if (entry.getKey().equals("C3")) comments.add("Custom3: " |
106 | if (oldAb == null) | | 106 | + entry.getValue().toString()); |
107 | hm.put("abstract", val); | | 107 | else if (entry.getKey().equals("C4")) comments.add("Custom4: " |
108 | else | | 108 | + entry.getValue().toString()); |
109 | hm.put("abstract", oldAb+"\n"+val); | | 109 | //else if (entry.getKey().equals("RD")) |
110 | } | | 110 | // hm.put("",entry.getValue().toString()); |
111 | | | 111 | //else if (entry.getKey().equals("MB")) |
112 | else if (lab.equals("UR")) hm.put("url", val); | | 112 | // hm.put("",entry.getValue().toString()); |
113 | else if ((lab.equals("Y1") || lab.equals("PY")) && val.length() >= 4) { | | 113 | else if (entry.getKey().equals("C5")) comments.add("Custom5: " |
114 | String[] parts = val.split("/"); | | 114 | + entry.getValue().toString()); |
115 | hm.put("year", parts[0]); | | 115 | else if (entry.getKey().equals("C6")) comments.add("Custom6: " |
116 | if ((parts.length > 1) && (parts[1].length() > 0)) { | | 116 | + entry.getValue().toString()); |
117 | try { | | 117 | //else if (entry.getKey().equals("FA")) |
118 | int month = Integer.parseInt(parts[1]); | | 118 | // hm.put("",entry.getValue().toString()); |
119 | if ((month > 0) && (month <= 12)) { | | 119 | //else if (entry.getKey().equals("CN")) |
120 | //System.out.println(Globals.MONTHS[month-1]); | | 120 | // hm.put("",entry.getValue().toString()); |
121 | hm.put("month", "#"+Globals.MONTHS[month-1]+"#"); | | 121 | else if (entry.getKey().equals("DE")) hm.put("annote", entry |
122 | } | | 122 | .getValue().toString()); |
123 | } catch (NumberFormatException ex) { | | 123 | //else if (entry.getKey().equals("RP")) |
124 | // The month part is unparseable, so we ignore it. | | 124 | // hm.put("",entry.getValue().toString()); |
125 | } | | 125 | //else if (entry.getKey().equals("DF")) |
126 | } | | 126 | // hm.put("",entry.getValue().toString()); |
127 | } | | 127 | //else if (entry.getKey().equals("RS")) |
128 | | | 128 | // hm.put("",entry.getValue().toString()); |
129 | else if (lab.equals("KW")){ | | 129 | else if (entry.getKey().equals("CA")) comments.add("Categories: " |
130 | if (!hm.containsKey("keywords")) hm.put("keywords", val); | | 130 | + entry.getValue().toString()); |
131 | else{ | | 131 | //else if (entry.getKey().equals("WP")) |
132 | String kw = hm.get("keywords"); | | 132 | // hm.put("",entry.getValue().toString()); |
133 | hm.put("keywords", kw + ", " + val); | | 133 | else if (entry.getKey().equals("TH")) comments.add("Short Title: " |
134 | } | | 134 | + entry.getValue().toString()); |
135 | } | | 135 | //else if (entry.getKey().equals("WR")) |
136 | else if (lab.equals("U1") || lab.equals("U2") || lab.equals("N1")) { | | 136 | // hm.put("",entry.getValue().toString()); |
137 | if (comment.length() > 0) | | 137 | //else if (entry.getKey().equals("EW")) |
138 | comment = comment+"\n"; | | 138 | // hm.put("",entry.getValue().toString()); |
139 | comment = comment+val; | | 139 | else if (entry.getKey().equals("SE")) hm.put("chapter", entry |
140 | } | | 140 | .getValue().toString()); |
141 | // Added ID import 2005.12.01, Morten Alver: | | 141 | //else if (entry.getKey().equals("AC")) |
142 | else if (lab.equals("ID")) | | 142 | // hm.put("",entry.getValue().toString()); |
143 | hm.put("refid", val); | | 143 | //else if (entry.getKey().equals("LP")) |
144 | // Added doi import (sciencedirect.com) 2011.01.10, Alexander Hug <alexander@alexanderhug.info> | | 144 | // hm.put("",entry.getValue().toString()); |
145 | else if (lab.equals("M3")){ | | 145 | } |
146 | String doi = val; | | 146 | |
147 | if (doi.startsWith("doi:")){ | | 147 | String bibtexType = "misc"; |
148 | doi = doi.replaceAll("(?i)doi:", "").trim(); | | 148 | // to find type, first check TW, then RT |
149 | hm.put("doi", doi); | | 149 | for (int i = 1; i >= 0 && bibtexType.equals("misc"); --i){ |
150 | } | | 150 | if (type[i] == null) continue; |
151 | } | | 151 | type[i] = type[i].toLowerCase(); |
152 | } | | 152 | if (type[i].indexOf("article") >= 0) bibtexType = "article"; |
153 | // fix authors | | 153 | else if (type[i].indexOf("journal") >= 0) bibtexType = "article"; |
154 | if (author.length() > 0) { | | 154 | else if (type[i].indexOf("book section") >= 0) bibtexType = "inbook"; |
155 | author = AuthorList.fixAuthor_lastNameFirst(author); | | 155 | else if (type[i].indexOf("book") >= 0) bibtexType = "book"; |
156 | hm.put("author", author); | | 156 | else if (type[i].indexOf("conference") >= 0) bibtexType = "inproceedings"; |
157 | } | | 157 | else if (type[i].indexOf("proceedings") >= 0) bibtexType = "inproceedings"; |
158 | if (editor.length() > 0) { | | 158 | else if (type[i].indexOf("report") >= 0) bibtexType = "techreport"; |
159 | editor = AuthorList.fixAuthor_lastNameFirst(editor); | | 159 | else if (type[i].indexOf("thesis") >= 0 |
160 | hm.put("editor", editor); | | 160 | && type[i].indexOf("master") >= 0) bibtexType = "mastersthesis"; |
161 | } | | 161 | else if (type[i].indexOf("thesis") >= 0) bibtexType = "phdthesis"; |
162 | if (comment.length() > 0) { | | 162 | } |
163 | hm.put("comment", comment); | | 163 | |
164 | } | | 164 | // depending on bibtexType, decide where to place the titleRT and |
165 | | | 165 | // titleTI |
166 | hm.put("pages", startPage + "--" + endPage); | | 166 | if (bibtexType.equals("article")){ |
167 | } | | 167 | if (titleST != null) hm.put("journal", titleST); |
168 | BibtexEntry b = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals | | 168 | if (titleTI != null) hm.put("title", titleTI); |
169 | .getEntryType(type)); // id assumes an existing database so don't | | 169 | }else if (bibtexType.equals("inbook")){ |
170 | | | 170 | if (titleST != null) hm.put("booktitle", titleST); |
171 | // Remove empty fields: | | 171 | if (titleTI != null) hm.put("title", titleTI); |
172 | ArrayList<Object> toRemove = new ArrayList<Object>(); | | 172 | }else{ |
173 | for (Iterator<String> it = hm.keySet().iterator(); it.hasNext();) { | | 173 | if (titleST != null) hm.put("booktitle", titleST); // should not |
174 | Object key = it.next(); | | 174 | // happen, I |
175 | String content = hm.get(key); | | 175 | // think |
176 | if ((content == null) || (content.trim().length() == 0)) | | 176 | if (titleTI != null) hm.put("title", titleTI); |
177 | toRemove.add(key); | | 177 | } |
178 | } | | 178 | |
179 | for (Iterator<Object> iterator = toRemove.iterator(); iterator.hasNext();) { | | 179 | // concatenate pages |
180 | hm.remove(iterator.next()); | | 180 | if (pages[0] != null || pages[1] != null) hm.put("pages", |
181 | | | 181 | (pages[0] != null ? pages[0] : "") |
182 | } | | 182 | + (pages[1] != null ? "--" + pages[1] : "")); |
183 | | | 183 | |
184 | // create one here | | 184 | // concatenate address and country |
185 | b.setField(hm); | | 185 | if (address != null) hm.put("address", address |
186 | bibitems.add(b); | | 186 | + (country != null ? ", " + country : "")); |
187 | | | 187 | |
188 | } | | 188 | if (comments.size() > 0){ // set comment if present |
189 | | | 189 | StringBuffer s = new StringBuffer(); |
190 | return bibitems; | | 190 | for (int i = 0; i < comments.size(); ++i) |
191 | | | 191 | s.append(i > 0 ? "; " : "").append(comments.elementAt(i).toString()); |
192 | } | | 192 | hm.put("comment", s.toString()); |
| | | 193 | } |
| | | 194 | BibtexEntry b = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, |
| | | 195 | Globals.getEntryType(bibtexType)); |
| | | 196 | b.setField(hm); |
| | | 197 | bibItems.add(b); |
| | | 198 | |
| | | 199 | hm.clear(); |
| | | 200 | lines.clear(); |
| | | 201 | previousLine = null; |
| | | 202 | |
| | | 203 | continue; |
| | | 204 | } |
| | | 205 | // new key |
| | | 206 | if (line.startsWith("--") && line.length() >= 7 |
| | | 207 | && line.substring(4, 7).equals("-- ")){ |
| | | 208 | lines.put(line.substring(2, 4), previousLine = new StringBuffer(line |
| | | 209 | .substring(7))); |
| | | 210 | continue; |
| | | 211 | } |
| | | 212 | // continuation (folding) of previous line |
| | | 213 | if (previousLine == null) // sanity check; should never happen |
| | | 214 | return null; |
| | | 215 | previousLine.append(line.trim()); |
| | | 216 | } |
| | | 217 | |
| | | 218 | return bibItems; |
| | | 219 | } |