1 | /** | | 1 | /** |
2 | * Implements grammer rule "Authors" | | 2 | * Parse the entries in the source, and return a List of BibtexEntry |
3 | * | | 3 | * objects. |
4 | * @param be | | 4 | */ |
5 | * @throws IOException | | 5 | public List<BibtexEntry> importEntries(InputStream stream, OutputPrinter status) throws IOException { |
6 | */ | | 6 | ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>(); |
7 | private void parseAuthors(BibtexEntry be) throws IOException { | | 7 | StringBuffer sb = new StringBuffer(); |
8 | // read authors and institutions | | 8 | |
9 | String authors = ""; | | 9 | BufferedReader in = |
10 | String institutions = ""; | | 10 | new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); |
11 | while (this.lastLine != null && !this.lastLine.equals("") && !startsWithKeyword(recognizedFields)) { | | 11 | |
12 | | | 12 | String str; |
13 | // read single author | | 13 | |
14 | String author = null; | | 14 | while ((str = in.readLine()) != null) { |
15 | String institution = null; | | 15 | if (str.length() < 3) |
16 | boolean institutionDone = false; | | 16 | continue; |
17 | if (this.lastLine.indexOf('(') >= 0) { | | 17 | |
18 | author = this.lastLine.substring(0, this.lastLine.indexOf('(')).trim(); | | 18 | // begining of a new item |
19 | institutionDone = this.lastLine.indexOf(')') > 0; | | 19 | if (str.substring(0, 6).equals("PMID- ")) |
20 | institution = this.lastLine.substring(this.lastLine.indexOf('(') + 1, institutionDone && this.lastLine.indexOf(')') > this.lastLine.indexOf('(') + 1 ? this.lastLine.indexOf(')') : this.lastLine.length()).trim(); | | 20 | sb.append("::").append(str); |
21 | } else { | | 21 | else { |
22 | author = this.lastLine.substring(0, this.lastLine.length()).trim(); | | 22 | String beg = str.substring(0, 6); |
23 | institutionDone = true; | | 23 | |
24 | } | | 24 | if (beg.indexOf(" ") > 0) { |
25 | | | 25 | sb.append(" ## "); // mark the begining of each field |
26 | readLine(); | | 26 | sb.append(str); |
27 | while (!institutionDone && this.lastLine!= null) { | | 27 | } else { |
28 | institutionDone = this.lastLine.indexOf(')') > 0; | | 28 | sb.append("EOLEOL"); // mark the end of each line |
29 | institution += this.lastLine.substring(0, institutionDone ? this.lastLine.indexOf(')') : this.lastLine.length()).trim(); | | 29 | sb.append(str.trim()); |
30 | readLine(); | | 30 | } |
31 | } | | 31 | } |
32 | | | 32 | } |
33 | if (author != null) { | | 33 | |
34 | authors += !authors.equals("") ? " and " + author : "" + author; | | 34 | String[] entries = sb.toString().split("::"); |
35 | } | | 35 | |
36 | if (institution != null) { | | 36 | // skip the first entry as it is either empty or has document header |
37 | institutions += !institutions.equals("") ? " and " + institution : "" + institution; | | 37 | HashMap<String, String> hm = new HashMap<String, String>(); |
38 | } | | 38 | |
39 | } | | 39 | for (int i = 0; i < entries.length; i++) { |
40 | | | 40 | String[] fields = entries[i].split(" ## "); |
41 | if (!authors.equals("")) { | | 41 | |
42 | be.setField("author", authors); | | 42 | if (fields.length == 0) |
43 | } | | 43 | fields = entries[i].split("\n"); |
44 | if (!institutions.equals("")) { | | 44 | |
45 | be.setField("institution", institutions); | | 45 | String Type = ""; |
46 | } | | 46 | String pages = ""; |
47 | } | | 47 | String shortauthor = ""; |
| | | 48 | String fullauthor = ""; |
| | | 49 | hm.clear(); |
| | | 50 | |
| | | 51 | for (int j = 0; j < fields.length; j++) { |
| | | 52 | System.out.println(">>>"+fields[j]+"<<<"); |
| | | 53 | |
| | | 54 | //empty field don't do anything |
| | | 55 | if (fields[j].length() <= 2) |
| | | 56 | continue; |
| | | 57 | |
| | | 58 | String beg = fields[j].substring(0, 6); |
| | | 59 | String value = fields[j].substring(6); |
| | | 60 | value = value.trim(); |
| | | 61 | |
| | | 62 | if (beg.equals("PT - ")) { |
| | | 63 | // PT = value.replaceAll("JOURNAL ARTICLE", "article").replaceAll("Journal Article", "article"); |
| | | 64 | Type = "article"; //make all of them PT? |
| | | 65 | } else if (beg.equals("TY - ")) { |
| | | 66 | if ("CONF".equals(value)) |
| | | 67 | Type = "inproceedings"; |
| | | 68 | } else if (beg.equals("JO - ")) |
| | | 69 | hm.put("booktitle", value); |
| | | 70 | else if (beg.equals("FAU - ")) { |
| | | 71 | String tmpauthor = value.replaceAll("EOLEOL", " and "); |
| | | 72 | |
| | | 73 | // if there is already someone there then append with "and" |
| | | 74 | if (!"".equals(fullauthor)) |
| | | 75 | fullauthor = fullauthor + " and " + tmpauthor; |
| | | 76 | else |
| | | 77 | fullauthor = tmpauthor; |
| | | 78 | } else if (beg.equals("AU - ")) { |
| | | 79 | String tmpauthor = value.replaceAll("EOLEOL", " and ").replaceAll(" ", ", "); |
| | | 80 | |
| | | 81 | // if there is already someone there then append with "and" |
| | | 82 | if (!"".equals(shortauthor)) |
| | | 83 | shortauthor = shortauthor + " and " + tmpauthor; |
| | | 84 | else |
| | | 85 | shortauthor = tmpauthor; |
| | | 86 | } else if (beg.equals("TI - ")) |
| | | 87 | hm.put("title", value.replaceAll("EOLEOL", " ")); |
| | | 88 | else if (beg.equals("TA - ")) |
| | | 89 | hm.put("journal", value.replaceAll("EOLEOL", " ")); |
| | | 90 | else if (beg.equals("AB - ")) |
| | | 91 | hm.put("abstract", value.replaceAll("EOLEOL", " ")); |
| | | 92 | else if (beg.equals("PG - ")) |
| | | 93 | pages = value.replaceAll("-", "--"); |
| | | 94 | else if (beg.equals("IP - ")) |
| | | 95 | hm.put("number", value); |
| | | 96 | else if (beg.equals("DP - ")) { |
| | | 97 | String[] parts = value.split(" "); // sometimes this is just year, sometimes full date |
| | | 98 | hm.put("year", parts[0]); |
| | | 99 | } else if (beg.equals("VI - ")) |
| | | 100 | hm.put("volume", value); |
| | | 101 | else if (beg.equals("AID - ")) { |
| | | 102 | String[] parts = value.split(" "); |
| | | 103 | if ("[doi]".equals(parts[1])) { |
| | | 104 | hm.put("doi", parts[0]); |
| | | 105 | hm.put("url", "http://dx.doi.org/" + parts[0]); |
| | | 106 | } |
| | | 107 | } |
| | | 108 | } |
| | | 109 | |
| | | 110 | if (!"".equals(pages)) |
| | | 111 | hm.put("pages", pages); |
| | | 112 | if (!"".equals(fullauthor)) |
| | | 113 | hm.put("author", fullauthor); |
| | | 114 | else if (!"".equals(shortauthor)) |
| | | 115 | hm.put("author", shortauthor); |
| | | 116 | |
| | | 117 | BibtexEntry b = |
| | | 118 | new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals.getEntryType(Type)); // id assumes an existing database so don't |
| | | 119 | |
| | | 120 | // create one here |
| | | 121 | b.setField(hm); |
| | | 122 | |
| | | 123 | // the first bibitem is always empty, presumably as a result of trying |
| | | 124 | // to parse header informaion. So add only if we have at least author or |
| | | 125 | // title fields. |
| | | 126 | if (hm.get("author") != null || hm.get("title") != null) |
| | | 127 | bibitems.add(b); |
| | | 128 | } |
| | | 129 | |
| | | 130 | return bibitems; |
| | | 131 | } |