package marf.Classification.Stochastic;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.util.Hashtable;
import marf.Classification.ClassificationException;
import marf.Classification.Distance.DiffDistance;
import marf.FeatureExtraction.IFeatureExtraction;
import marf.Stats.StatisticalObject;
import marf.Stats.WordStats;
import marf.Storage.Result;
import marf.Storage.StorageException;
import marf.util.Arrays;
import marf.util.comparators.FrequencyComparator;

/* loaded from: input_file:marf/Classification/Stochastic/ZipfLaw.class */
public class ZipfLaw extends Stochastic {
    public static final int DEFAULT_OUTPUT_PAGE_SIZE = 100;
    private Hashtable oStats;
    private StatisticalObject[] aoSortedStatRefs;
    private boolean bDumpLogariphm;
    private int iMaxWordLength;
    private int iMinWordLength;
    private int iOutputPageSize;
    private static final long serialVersionUID = -7356104653992493029L;

    public ZipfLaw(String str) {
        super(null);
        this.oStats = null;
        this.aoSortedStatRefs = null;
        this.bDumpLogariphm = true;
        this.iMaxWordLength = 0;
        this.iMinWordLength = Integer.MAX_VALUE;
        this.iOutputPageSize = 100;
        this.strFilename = str;
        this.oStats = new Hashtable();
        this.oObjectToSerialize = this;
        this.iCurrentDumpMode = 0;
    }

    public ZipfLaw(IFeatureExtraction iFeatureExtraction) {
        super(iFeatureExtraction);
        this.oStats = null;
        this.aoSortedStatRefs = null;
        this.bDumpLogariphm = true;
        this.iMaxWordLength = 0;
        this.iMinWordLength = Integer.MAX_VALUE;
        this.iOutputPageSize = 100;
        this.strFilename = getTrainingSetFilename().replaceAll("marf.Storage.TrainingSet", getClass().getName());
        this.oStats = new Hashtable();
        this.oObjectToSerialize = this;
    }

    @Override // marf.Classification.Stochastic.Stochastic, marf.Classification.IClassification
    public boolean classify(double[] dArr) throws ClassificationException {
        try {
            collectStatistics(dArr);
            StatisticalObject[] statisticalObjectArr = (StatisticalObject[]) this.aoSortedStatRefs.clone();
            restore();
            double[] dArr2 = new double[statisticalObjectArr.length];
            double[] dArr3 = new double[this.aoSortedStatRefs.length];
            int i = 0;
            int i2 = 0;
            for (StatisticalObject statisticalObject : statisticalObjectArr) {
                i += statisticalObject.getFrequency();
            }
            for (int i3 = 0; i3 < this.aoSortedStatRefs.length; i3++) {
                i2 += this.aoSortedStatRefs[i3].getFrequency();
            }
            for (int i4 = 0; i4 < statisticalObjectArr.length; i4++) {
                dArr2[i4] = statisticalObjectArr[i4].getFrequency() / i;
            }
            for (int i5 = 0; i5 < this.aoSortedStatRefs.length; i5++) {
                dArr3[i5] = this.aoSortedStatRefs[i5].getFrequency() / i2;
            }
            this.oResultSet.addResult(1, new DiffDistance(null).distance(dArr3, dArr2));
            return true;
        } catch (ClassificationException e) {
            throw e;
        } catch (Exception e2) {
            throw new ClassificationException(e2);
        }
    }

    @Override // marf.Classification.Stochastic.Stochastic, marf.Classification.Classification, marf.Classification.IClassification
    public boolean train(double[] dArr) throws ClassificationException {
        try {
            restore();
            collectStatistics(dArr);
            dump();
            return true;
        } catch (ClassificationException e) {
            throw e;
        } catch (Exception e2) {
            throw new ClassificationException(e2);
        }
    }

    @Override // marf.Classification.Stochastic.Stochastic, marf.Classification.IClassification
    public Result getResult() {
        return super.getResult();
    }

    public final void collectStatistics(double[] dArr) throws ClassificationException {
        try {
            this.iMaxWordLength = 1;
            this.iMinWordLength = 1;
            for (int i = 0; i < dArr.length; i++) {
                StatisticalObject statisticalObject = (StatisticalObject) this.oStats.get(new Double(dArr[i]));
                if (statisticalObject == null) {
                    this.oStats.put(new Double(dArr[i]), new StatisticalObject(1));
                } else {
                    statisticalObject.incFrequency();
                }
            }
            sort();
            rankAll();
        } catch (RuntimeException e) {
            throw new ClassificationException(e);
        }
    }

    public final void collectStatistics(StreamTokenizer streamTokenizer) throws ClassificationException {
        while (streamTokenizer.nextToken() != -1) {
            try {
                String str = streamTokenizer.sval;
                if (str == null) {
                    System.err.println(new StringBuffer().append("WARNING: null sval for token type: (").append(streamTokenizer.ttype).append(",").append((char) streamTokenizer.ttype).append(")").toString());
                } else {
                    if (str.length() > this.iMaxWordLength) {
                        this.iMaxWordLength = str.length();
                    }
                    if (str.length() < this.iMinWordLength) {
                        this.iMinWordLength = str.length();
                    }
                    WordStats wordStats = (WordStats) this.oStats.get(str);
                    if (wordStats == null) {
                        this.oStats.put(new String(str), new WordStats(1, str));
                    } else {
                        wordStats.incFrequency();
                    }
                }
            } catch (Exception e) {
                throw new ClassificationException(e);
            }
        }
        sort();
        rankAll();
    }

    private void sort() {
        this.aoSortedStatRefs = (StatisticalObject[]) this.oStats.values().toArray(new StatisticalObject[0]);
        Arrays.sort(this.aoSortedStatRefs, new FrequencyComparator(1));
    }

    private final void rankAll() {
        for (int i = 0; i < this.aoSortedStatRefs.length; i++) {
            this.aoSortedStatRefs[i].setRank(i + 1);
        }
    }

    public final void dumpAll() {
        System.out.println("f = Frequency, r = Rank");
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= this.aoSortedStatRefs.length) {
                break;
            }
            System.out.println(new StringBuffer().append("\n---------------------------------\nWords from ").append(i2 + 1).append(" to ").append(i2 + this.iOutputPageSize).append("\n").append("---------------------------------\n\n").toString());
            System.out.println("Columns: r, f, f*r, word");
            StringBuffer stringBuffer = new StringBuffer();
            int i3 = 0;
            while (true) {
                if (i3 < (this.aoSortedStatRefs.length - i2 > this.iOutputPageSize ? this.iOutputPageSize : this.aoSortedStatRefs.length - i2)) {
                    StatisticalObject statisticalObject = this.aoSortedStatRefs[i2 + i3];
                    stringBuffer.append(statisticalObject.getRank()).append("\t").append(statisticalObject.getFrequency()).append("\t").append(statisticalObject.getFrequency() * statisticalObject.getRank()).append("\t");
                    if (statisticalObject instanceof WordStats) {
                        stringBuffer.append(((WordStats) statisticalObject).getLexeme());
                    }
                    stringBuffer.append("\n");
                    i3++;
                }
            }
            System.out.print(stringBuffer);
            i = i2 + (10 * this.iOutputPageSize);
        }
        int[] iArr = new int[this.iOutputPageSize];
        int i4 = 1;
        for (int length = this.aoSortedStatRefs.length - 1; length > 0; length--) {
            if (this.aoSortedStatRefs[length].getFrequency() == i4) {
                int i5 = i4 - 1;
                iArr[i5] = iArr[i5] + 1;
            } else {
                i4 = this.aoSortedStatRefs[length].getFrequency();
                if (i4 < this.iOutputPageSize) {
                    iArr[i4 - 1] = 1;
                } else {
                    System.err.println(new StringBuffer().append("WARNING: Occurence of a frequency (").append(i4).append(") exceeds ").append("output page size (").append(this.iOutputPageSize).append("), and, therefore, ignored.").toString());
                    i4 = 1;
                }
            }
        }
        System.out.println("\nFrequency of frequencies\n------------------------\nf\tC(f,w)");
        for (int i6 = 0; i6 < this.iOutputPageSize; i6++) {
            System.out.println(new StringBuffer().append(i6 + 1).append("\t").append(iArr[i6]).toString());
        }
    }

    public final void dumpGraphValues() throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new StringBuffer().append(this.strFilename).append(".csv").toString()));
        if (this.bDumpLogariphm) {
            bufferedWriter.write("log(rank),log(frequency)");
        } else {
            bufferedWriter.write("rank,frequency");
        }
        for (int i = 0; i < this.aoSortedStatRefs.length; i++) {
            if (this.bDumpLogariphm) {
                bufferedWriter.write(new StringBuffer().append(Math.log(this.aoSortedStatRefs[i].getRank())).append(",").append(Math.log(this.aoSortedStatRefs[i].getFrequency())).toString());
            } else {
                bufferedWriter.write(new StringBuffer().append(this.aoSortedStatRefs[i].getRank()).append(",").append(this.aoSortedStatRefs[i].getFrequency()).toString());
            }
            bufferedWriter.newLine();
        }
        bufferedWriter.close();
    }

    @Override // marf.Storage.StorageManager
    public synchronized void backSynchronizeObject() {
        ZipfLaw zipfLaw = (ZipfLaw) this.oObjectToSerialize;
        this.oStats = zipfLaw.getStats();
        this.aoSortedStatRefs = zipfLaw.getSortedStatRefs();
        this.bDumpLogariphm = zipfLaw.isDumpLogariphmOn();
        this.iMaxWordLength = zipfLaw.getMaxWordLength();
        this.iMinWordLength = zipfLaw.getMinWordLength();
        this.oObjectToSerialize = this;
    }

    @Override // marf.Classification.Classification, marf.Storage.StorageManager, marf.Storage.IStorageManager
    public synchronized void dump() throws StorageException {
        switch (this.iCurrentDumpMode) {
            case 0:
                dumpGzipBinary();
                return;
            case 1:
                dumpCSV();
                return;
            case 2:
                dumpBinary();
                return;
            default:
                throw new StorageException(new StringBuffer().append("Unsupported dump mode: ").append(this.iCurrentDumpMode).toString());
        }
    }

    @Override // marf.Classification.Classification, marf.Storage.StorageManager, marf.Storage.IStorageManager
    public synchronized void restore() throws StorageException {
        switch (this.iCurrentDumpMode) {
            case 0:
                restoreGzipBinary();
                return;
            case 1:
                restoreCSV();
                return;
            case 2:
                restoreBinary();
                return;
            default:
                throw new StorageException(new StringBuffer().append("Unsupported dump mode: ").append(this.iCurrentDumpMode).toString());
        }
    }

    @Override // marf.Storage.StorageManager, marf.Storage.IStorageManager
    public synchronized void dumpCSV() throws StorageException {
        try {
            dumpGraphValues();
        } catch (IOException e) {
            throw new StorageException(e);
        }
    }

    public boolean isDumpLogariphmOn() {
        return this.bDumpLogariphm;
    }

    public void setDumpLogariphm(boolean z) {
        this.bDumpLogariphm = z;
    }

    public final StatisticalObject[] getSortedStatRefs() {
        return this.aoSortedStatRefs;
    }

    public final Hashtable getStats() {
        return this.oStats;
    }

    public final WordStats getWordStats(String str) {
        return (WordStats) this.oStats.get(str);
    }

    public final int getMaxWordLength() {
        return this.iMaxWordLength;
    }

    public final int getMinWordLength() {
        return this.iMinWordLength;
    }

    @Override // marf.Storage.StorageManager
    public String toString() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Minimum word length: ").append(this.iMinWordLength).append("\n").append("Maximum word length: ").append(this.iMaxWordLength).append("\n").append("Dictionary size: ").append(this.oStats.size()).append("\n").append("Stats Dictionary:\n").append(this.oStats);
        return stringBuffer.toString();
    }

    public static String getMARFSourceCodeRevision() {
        return "$Revision: 1.32 $";
    }
}
