fehlende ordner geadded

2017-09-11 13:03:20 +02:00 · 2017-09-11 13:03:20 +02:00 · 67e6f8845c
parent 991353b1bb
commit 67e6f8845c
33 changed files with 2691 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -26,6 +26,8 @@ wheels/
 *.egg-info/
 .installed.cfg
 *.egg
+openthesaurus.csv
+

 # PyInstaller
 #  Usually these files are written by a python script from a template
--- a/java_LabledLDA/.idea/libraries/lib.xml
+++ b/java_LabledLDA/.idea/libraries/lib.xml
@ -0,0 +1,10 @@
+<component name="libraryTable">
+  <library name="lib">
+    <CLASSES>
+      <root url="file://$PROJECT_DIR$/lib" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+    <jarDirectory url="file://$PROJECT_DIR$/lib" recursive="false" />
+  </library>
+</component>
--- a/java_LabledLDA/.idea/misc.xml
+++ b/java_LabledLDA/.idea/misc.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_7" default="true" project-jdk-name="1.7" project-jdk-type="JavaSDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
--- a/java_LabledLDA/.idea/modules.xml
+++ b/java_LabledLDA/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/LabledLDA.iml" filepath="$PROJECT_DIR$/LabledLDA.iml" />
+    </modules>
+  </component>
+</project>
--- a/java_LabledLDA/.idea/workspace.xml
+++ b/java_LabledLDA/.idea/workspace.xml
@ -0,0 +1,439 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="df2270cf-7b1b-40a3-9553-573e1ae67abd" name="Default" comment="" />
+    <ignored path="$PROJECT_DIR$/out/" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
+      <file leaf-file-name="LDA.java" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/src/jgibblda/LDA.java">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="77">
+              <caret line="38" column="5" lean-forward="true" selection-start-line="38" selection-start-column="5" selection-end-line="38" selection-end-column="5" />
+              <folding>
+                <element signature="imports" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="Inferencer.java" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/src/jgibblda/Inferencer.java">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="792">
+              <caret line="71" column="0" lean-forward="false" selection-start-line="71" selection-start-column="0" selection-end-line="71" selection-end-column="0" />
+              <folding />
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="Estimator.java" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/src/jgibblda/Estimator.java">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="221">
+              <caret line="113" column="0" lean-forward="false" selection-start-line="113" selection-start-column="0" selection-end-line="113" selection-end-column="0" />
+              <folding />
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="GradleLocalSettings">
+    <option name="externalProjectsViewState">
+      <projects_view />
+    </option>
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/src/jgibblda/LDA.java" />
+      </list>
+    </option>
+  </component>
+  <component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
+  <component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
+  <component name="JsGulpfileManager">
+    <detection-done>true</detection-done>
+    <sorting>DEFINITION_ORDER</sorting>
+  </component>
+  <component name="ProjectFrameBounds" extendedState="6">
+    <option name="x" value="1991" />
+    <option name="y" value="89" />
+    <option name="width" value="1838" />
+    <option name="height" value="810" />
+  </component>
+  <component name="ProjectView">
+    <navigator currentView="ProjectPane" proportions="" version="1">
+      <flattenPackages />
+      <showMembers />
+      <showModules />
+      <showLibraryContents />
+      <hideEmptyPackages />
+      <abbreviatePackageNames />
+      <autoscrollToSource />
+      <autoscrollFromSource />
+      <sortByType />
+      <manualOrder />
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="java_LabledLDA" type="b2602c69:ProjectViewProjectNode" />
+              <item name="java_LabledLDA" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="java_LabledLDA" type="b2602c69:ProjectViewProjectNode" />
+              <item name="java_LabledLDA" type="462c0819:PsiDirectoryNode" />
+              <item name="models" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="java_LabledLDA" type="b2602c69:ProjectViewProjectNode" />
+              <item name="java_LabledLDA" type="462c0819:PsiDirectoryNode" />
+              <item name="models" type="462c0819:PsiDirectoryNode" />
+              <item name="bla" type="462c0819:PsiDirectoryNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+      <pane id="AndroidView" />
+      <pane id="Scratches" />
+      <pane id="Scope" />
+      <pane id="PackagesPane" />
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="WebServerToolWindowFactoryState" value="false" />
+    <property name="aspect.path.notification.shown" value="true" />
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="project.structure.last.edited" value="Libraries" />
+    <property name="project.structure.proportion" value="0.15" />
+    <property name="project.structure.side.proportion" value="0.2" />
+  </component>
+  <component name="RecentsManager">
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$" />
+    </key>
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="RunManager" selected="Application.LDA">
+    <configuration name="LDA" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true">
+      <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea">
+        <pattern>
+          <option name="PATTERN" value="jgibblda.*" />
+          <option name="ENABLED" value="true" />
+        </pattern>
+      </extension>
+      <option name="MAIN_CLASS_NAME" value="jgibblda.LDA" />
+      <option name="VM_PARAMETERS" value="" />
+      <option name="PROGRAM_PARAMETERS" value="-dfile test.txt.gz -dir models/test -est -ntopics 4" />
+      <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
+      <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
+      <option name="ALTERNATIVE_JRE_PATH" />
+      <option name="ENABLE_SWING_INSPECTOR" value="false" />
+      <option name="ENV_VARIABLES" />
+      <option name="PASS_PARENT_ENVS" value="true" />
+      <module name="LabledLDA" />
+      <envs />
+    </configuration>
+    <configuration default="true" type="Applet" factoryName="Applet">
+      <option name="HTML_USED" value="false" />
+      <option name="WIDTH" value="400" />
+      <option name="HEIGHT" value="300" />
+      <option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy" />
+      <module />
+    </configuration>
+    <configuration default="true" type="Application" factoryName="Application">
+      <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
+      <option name="MAIN_CLASS_NAME" />
+      <option name="VM_PARAMETERS" />
+      <option name="PROGRAM_PARAMETERS" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
+      <option name="ALTERNATIVE_JRE_PATH" />
+      <option name="ENABLE_SWING_INSPECTOR" value="false" />
+      <option name="ENV_VARIABLES" />
+      <option name="PASS_PARENT_ENVS" value="true" />
+      <module name="" />
+      <envs />
+    </configuration>
+    <configuration default="true" type="JUnit" factoryName="JUnit">
+      <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
+      <module name="" />
+      <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
+      <option name="ALTERNATIVE_JRE_PATH" />
+      <option name="PACKAGE_NAME" />
+      <option name="MAIN_CLASS_NAME" />
+      <option name="METHOD_NAME" />
+      <option name="TEST_OBJECT" value="class" />
+      <option name="VM_PARAMETERS" value="-ea" />
+      <option name="PARAMETERS" />
+      <option name="WORKING_DIRECTORY" value="$MODULE_DIR$" />
+      <option name="ENV_VARIABLES" />
+      <option name="PASS_PARENT_ENVS" value="true" />
+      <option name="TEST_SEARCH_SCOPE">
+        <value defaultName="singleModule" />
+      </option>
+      <envs />
+      <patterns />
+    </configuration>
+    <configuration default="true" type="#org.jetbrains.idea.devkit.run.PluginConfigurationType" factoryName="Plugin">
+      <module name="" />
+      <option name="VM_PARAMETERS" value="-Xmx512m -Xms256m -XX:MaxPermSize=250m -ea" />
+      <option name="PROGRAM_PARAMETERS" />
+      <predefined_log_file id="idea.log" enabled="true" />
+    </configuration>
+    <configuration default="true" type="Remote" factoryName="Remote">
+      <option name="USE_SOCKET_TRANSPORT" value="true" />
+      <option name="SERVER_MODE" value="false" />
+      <option name="SHMEM_ADDRESS" value="javadebug" />
+      <option name="HOST" value="localhost" />
+      <option name="PORT" value="5005" />
+    </configuration>
+    <configuration default="true" type="TestNG" factoryName="TestNG">
+      <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
+      <module name="" />
+      <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
+      <option name="ALTERNATIVE_JRE_PATH" />
+      <option name="SUITE_NAME" />
+      <option name="PACKAGE_NAME" />
+      <option name="MAIN_CLASS_NAME" />
+      <option name="METHOD_NAME" />
+      <option name="GROUP_NAME" />
+      <option name="TEST_OBJECT" value="CLASS" />
+      <option name="VM_PARAMETERS" value="-ea" />
+      <option name="PARAMETERS" />
+      <option name="WORKING_DIRECTORY" value="$MODULE_DIR$" />
+      <option name="OUTPUT_DIRECTORY" />
+      <option name="ANNOTATION_TYPE" />
+      <option name="ENV_VARIABLES" />
+      <option name="PASS_PARENT_ENVS" value="true" />
+      <option name="TEST_SEARCH_SCOPE">
+        <value defaultName="singleModule" />
+      </option>
+      <option name="USE_DEFAULT_REPORTERS" value="false" />
+      <option name="PROPERTIES_FILE" />
+      <envs />
+      <properties />
+      <listeners />
+    </configuration>
+    <recent_temporary>
+      <list size="1">
+        <item index="0" class="java.lang.String" itemvalue="Application.LDA" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="ShelveChangesManager" show_recycled="false">
+    <option name="remove_strategy" value="false" />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="df2270cf-7b1b-40a3-9553-573e1ae67abd" name="Default" comment="" />
+      <created>1503915939387</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1503915939387</updated>
+      <workItem from="1503915946354" duration="2197000" />
+      <workItem from="1503922346467" duration="827000" />
+    </task>
+    <servers />
+  </component>
+  <component name="TimeTrackingManager">
+    <option name="totallyTimeSpent" value="3024000" />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="1919" y="0" width="1922" height="1081" extended-state="6" />
+    <layout>
+      <window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
+      <window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Palette&#9;" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Image Layers" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Capture Analysis" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
+      <window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.49428868" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Capture Tool" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
+      <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+      <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+      <window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Theme Preview" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
+      <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3997923" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
+      <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
+      <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
+      <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
+      <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
+      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+    </layout>
+  </component>
+  <component name="TypeScriptGeneratedFilesManager">
+    <option name="version" value="1" />
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="XDebuggerManager">
+    <breakpoint-manager />
+    <watches-manager />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/src/jgibblda/LDA.java">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="0">
+          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
+          <folding>
+            <element signature="imports" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/src/jgibblda/Inferencer.java">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="792">
+          <caret line="71" column="0" lean-forward="false" selection-start-line="71" selection-start-column="0" selection-end-line="71" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/src/jgibblda/Estimator.java">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="1548">
+          <caret line="113" column="0" lean-forward="false" selection-start-line="113" selection-start-column="0" selection-end-line="113" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/src/LDA.java" />
+    <entry file="file://$PROJECT_DIR$/src/jgibblda/Inferencer.java">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="792">
+          <caret line="71" column="0" lean-forward="false" selection-start-line="71" selection-start-column="0" selection-end-line="71" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/src/jgibblda/Estimator.java">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="221">
+          <caret line="113" column="0" lean-forward="false" selection-start-line="113" selection-start-column="0" selection-end-line="113" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/src/jgibblda/LDA.java">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="77">
+          <caret line="38" column="5" lean-forward="true" selection-start-line="38" selection-start-column="5" selection-end-line="38" selection-end-column="5" />
+          <folding>
+            <element signature="imports" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+  <component name="masterDetails">
+    <states>
+      <state key="ArtifactsStructureConfigurable.UI">
+        <settings>
+          <artifact-editor />
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+      <state key="FacetStructureConfigurable.UI">
+        <settings>
+          <last-edited>No facets are configured</last-edited>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+      <state key="GlobalLibrariesConfigurable.UI">
+        <settings>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+      <state key="JdkListConfigurable.UI">
+        <settings>
+          <last-edited>1.7</last-edited>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+      <state key="ModuleStructureConfigurable.UI">
+        <settings>
+          <last-edited>LabledLDA</last-edited>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+                <option value="0.6" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+      <state key="ProjectLibrariesConfigurable.UI">
+        <settings>
+          <last-edited>lib</last-edited>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+    </states>
+  </component>
+</project>
--- a/java_LabledLDA/LICENSE
+++ b/java_LabledLDA/LICENSE
@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--- a/java_LabledLDA/LabledLDA.iml
+++ b/java_LabledLDA/LabledLDA.iml
@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/bin" isTestSource="false" generated="true" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="lib" level="project" />
+  </component>
+</module>
--- a/java_LabledLDA/README.md
+++ b/java_LabledLDA/README.md
@ -0,0 +1,109 @@
+Labeled LDA in Java (based on JGibbLDA)
+=======================================
+
+This is a Java implementation of Labeled LDA based on the popular
+[JGibbLDA](http://jgibblda.sourceforge.net/) package. The code has been heavily
+refactored and a few additional options have been added. See sections below for
+more details.
+
+Data Format
+-----------
+
+The input data format is similar to the [JGibbLDA input data
+format](http://jgibblda.sourceforge.net/#_2.3._Input_Data_Format), with some
+minor cosmetic changes and additional support for document labels necessary for
+Labeled LDA. We first describe the (modified) input format for unlabeled
+documents, followed by the (new) input format for labeled documents.
+
+**Changed from JGibbLDA**: All input/output files must be Gzipped.
+
+### Unlabeled Documents
+
+Unlabeled documents have the following format:
+
+    document_1
+    document_2
+    ...
+    document_m
+
+where each document is a space-separated list of terms, i.e.,:
+
+    document_i = term_1 term_2 ... term_n
+
+**Changed from JGibbLDA**: The first line *should not* be an integer indicating
+the number of documents in the file. The original JGibbLDA code has been
+modified to identify the number of documents automatically.
+
+**Note**: Labeled and unlabeled documents may be mixed in the input file, thus
+you must ensure that unlabeled documents do not begin with a left square bracket
+(see Labeled Document input format below). One easy fix is to prepend a space
+character (' ') to each unlabeled document line.
+
+### Labeled Documents
+
+Labeled documents follow a format similar to unlabeled documents, but the with
+labels given at the beginning of each line and surrounded by square brackets,
+e.g.:
+
+    [label_1,1 label_1,2 ... label_1,l_1] document_1
+    [label_2,1 label_2,2 ... label_2,l_2] document_2
+    ...
+    [label_m,1 label_m,2 ... label_m,l_m] document_m
+
+where each label is an integer in the range [0, K-1], for K equal to the number
+of topics (-ntopics).
+
+**Note**: Labeled and unlabeled documents may be mixed in the input file. An
+unlabeled document is equivalent to labeling a document with every label in the
+range [0, K-1].
+
+Usage
+-----
+
+Please see the [JGibbLDA usage](http://jgibblda.sourceforge.net/#_2.2._Command_Line_&_Input_Parameter), noting the following changes:
+
+*   All input files must be Gzipped. All output files are also Gzipped.
+
+*   New options have been added:
+
+    **-nburnin <int>**: Discard this many initial iterations when taking samples.
+
+    **-samplinglag <int>**: The number of iterations between samples.
+
+    **-infseparately**: Inference is done separately for each document, as if
+    inference for each document was performed in isolation.
+
+    **-unlabeled**: Ignore document labels, i.e., treat every document as
+    unlabeled.
+
+*   Some options have been deleted:
+
+    **-wordmap**: Filename is automatically built based on model path.
+
+Contact
+-------
+
+Please direct questions to [Myle Ott](myleott@gmail.com).
+
+License
+-------
+
+Following JGibbLDA, this code is licensed under the GPLv2. Please see the
+LICENSE file for the full license.
+
+Labeled LDA in Java
+Copyright (C) 2008-2013 Myle Ott (Labeled LDA), Xuan-Hieu Phan and Cam-Tu Nguyen (JGibbLDA)
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
--- a/java_LabledLDA/models/tickets/.others.gz
+++ b/java_LabledLDA/models/tickets/.others.gz
--- a/java_LabledLDA/models/tickets/.tassign.gz
+++ b/java_LabledLDA/models/tickets/.tassign.gz
--- a/java_LabledLDA/models/tickets/.theta.gz
+++ b/java_LabledLDA/models/tickets/.theta.gz
--- a/java_LabledLDA/models/tickets/.twords.gz
+++ b/java_LabledLDA/models/tickets/.twords.gz
--- a/java_LabledLDA/models/tickets/.wordmap.gz
+++ b/java_LabledLDA/models/tickets/.wordmap.gz
--- a/java_LabledLDA/models/tickets/tickets.gz
+++ b/java_LabledLDA/models/tickets/tickets.gz
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/Dictionary.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/Dictionary.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/Document.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/Document.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/Estimator.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/Estimator.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/Inferencer.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/Inferencer.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/LDA.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/LDA.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/LDACmdOption.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/LDACmdOption.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/LDADataset.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/LDADataset.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/Model.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/Model.class
--- a/java_LabledLDA/out/production/LabledLDA/jgibblda/Pair.class
+++ b/java_LabledLDA/out/production/LabledLDA/jgibblda/Pair.class
--- a/java_LabledLDA/src/jgibblda/Dictionary.java
+++ b/java_LabledLDA/src/jgibblda/Dictionary.java
@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+package jgibblda;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import gnu.trove.map.hash.TObjectIntHashMap;
+import gnu.trove.map.hash.TIntObjectHashMap;
+
+public class Dictionary {
+    public TObjectIntHashMap<String> word2id;
+    public TIntObjectHashMap<String> id2word;
+
+    //--------------------------------------------------
+    // constructors
+    //--------------------------------------------------
+
+    public Dictionary(){
+        word2id = new TObjectIntHashMap<String>();
+        id2word = new TIntObjectHashMap<String>();
+    }
+
+    //---------------------------------------------------
+    // get/set methods
+    //---------------------------------------------------
+
+    public String getWord(int id){
+        return id2word.get(id);
+    }
+
+    public int getID(String word){
+        return word2id.get(word);
+    }
+
+    //----------------------------------------------------
+    // checking methods
+    //----------------------------------------------------
+    /**
+     * check if this dictionary contains a specified word
+     */
+    public boolean contains(String word){
+        return word2id.containsKey(word);
+    }
+
+    public boolean contains(int id){
+        return id2word.containsKey(id);
+    }
+    //---------------------------------------------------
+    // manupulating methods
+    //---------------------------------------------------
+    /**
+     * add a word into this dictionary
+     * return the corresponding id
+     */
+    public int addWord(String word){
+        if (!contains(word)){
+            int id = word2id.size();
+
+            word2id.put(word, id);
+            id2word.put(id,word);
+
+            return id;
+        }
+        else return getID(word);		
+    }
+
+    //---------------------------------------------------
+    // I/O methods
+    //---------------------------------------------------
+    /**
+     * read dictionary from file
+     */
+    public boolean readWordMap(String wordMapFile)
+    {
+        try {
+            BufferedReader reader = new BufferedReader(new InputStreamReader(
+                        new GZIPInputStream(
+                            new FileInputStream(wordMapFile)), "UTF-8"));
+            String line;
+
+            for (int i = 0; (line = reader.readLine()) != null; i++) {
+                String word = line.trim();
+                id2word.put(i, word);
+                word2id.put(word, i);
+            }
+
+            reader.close();
+            return true;
+        }
+        catch (Exception e) {
+            System.out.println("Error while reading dictionary:" + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }		
+    }
+
+    public boolean writeWordMap(String wordMapFile)
+    {
+        try {
+            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+                        new GZIPOutputStream(
+                            new FileOutputStream(wordMapFile)), "UTF-8"));
+
+            //write word to id
+            for (int i = 0; i < id2word.size(); i++) {
+                writer.write(id2word.get(i) + "\n");
+            }
+
+            writer.close();
+            return true;
+        }
+        catch (Exception e) {
+            System.out.println("Error while writing word map " + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+    }
+}
--- a/java_LabledLDA/src/jgibblda/Document.java
+++ b/java_LabledLDA/src/jgibblda/Document.java
@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import gnu.trove.list.array.TIntArrayList;
+
+public class Document {
+
+    //----------------------------------------------------
+    //Instance Variables
+    //----------------------------------------------------
+    public int[] words;
+    public String rawStr = "";
+    public int length;
+    public int[] labels = null;
+
+    public Document(TIntArrayList doc){
+        this.length = doc.size();
+        this.words = new int[length];
+        for (int i = 0; i < length; i++){
+            this.words[i] = doc.get(i);
+        }
+    }
+
+    public Document(TIntArrayList doc, String rawStr)
+    {
+        this(doc);
+        this.rawStr = rawStr;
+    }
+
+    public Document(TIntArrayList doc, String rawStr, TIntArrayList tlabels)
+    {
+        this(doc, rawStr);
+        this.labels = tlabels != null ? tlabels.toArray() : null;
+    }
+}
--- a/java_LabledLDA/src/jgibblda/Estimator.java
+++ b/java_LabledLDA/src/jgibblda/Estimator.java
@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public class Estimator
+{
+    // output model
+    protected Model trnModel;
+    LDACmdOption option;
+
+    public Estimator(LDACmdOption option) throws FileNotFoundException, IOException
+    {
+        this.option = option;
+
+        trnModel = new Model(option);
+
+        if (option.est){
+            trnModel.init(true);
+        }
+        else if (option.estc){
+            trnModel.init(false);
+        }
+    }
+
+    public void estimate()
+    {
+        System.out.println("Sampling " + trnModel.niters + " iterations!");
+        System.out.print("Iteration");
+        for (int startIter = ++trnModel.liter; trnModel.liter <= startIter - 1 + trnModel.niters; trnModel.liter++){
+            System.out.format("%6d", trnModel.liter);
+
+            // for all z_i
+            for (int m = 0; m < trnModel.M; m++){				
+                for (int n = 0; n < trnModel.data.docs.get(m).length; n++){
+                    // z_i = z[m][n]
+                    // sample from p(z_i|z_-i, w)
+                    int topic = sampling(m, n);
+                    trnModel.z[m].set(n, topic);
+                }// end for each word
+            }// end for each document
+
+            if ((trnModel.liter == startIter - 1 + trnModel.niters) ||
+                    (trnModel.liter > trnModel.nburnin && trnModel.liter % trnModel.samplingLag == 0)) {
+                trnModel.updateParams();
+            }
+
+            System.out.print("\b\b\b\b\b\b");
+        }// end iterations
+        trnModel.liter--;
+
+        System.out.println("\nSaving the final model!");
+        trnModel.saveModel();
+    }
+
+    /**
+     * Do sampling
+     * @param m document number
+     * @param n word number
+     * @return topic id
+     */
+    public int sampling(int m, int n)
+    {
+        // remove z_i from the count variable
+        int topic = trnModel.z[m].get(n);
+        int w = trnModel.data.docs.get(m).words[n];
+
+        trnModel.nw[w][topic] -= 1;
+        trnModel.nd[m][topic] -= 1;
+        trnModel.nwsum[topic] -= 1;
+        trnModel.ndsum[m] -= 1;
+
+        double Vbeta = trnModel.V * trnModel.beta;
+
+        // get labels for this document
+        int[] labels = trnModel.data.docs.get(m).labels;
+
+        // determine number of possible topics for this document
+        int K_m = (labels == null) ? trnModel.K : labels.length;
+
+        // do multinominal sampling via cumulative method
+        double[] p = trnModel.p;
+        for (int k = 0; k < K_m; k++) {
+            topic = labels == null ? k : labels[k];
+
+            p[k] = (trnModel.nd[m][topic] + trnModel.alpha) *
+                (trnModel.nw[w][topic] + trnModel.beta) /
+                (trnModel.nwsum[topic] + Vbeta);
+        }
+
+        // cumulate multinomial parameters
+        for (int k = 1; k < K_m; k++) {
+            p[k] += p[k - 1];
+        }
+
+        // scaled sample because of unnormalized p[]
+        double u = Math.random() * p[K_m - 1];
+
+        for (topic = 0; topic < K_m; topic++){
+            if (p[topic] > u) //sample topic w.r.t distribution p
+                break;
+        }
+
+        // map [0, K_m - 1] topic to [0, K - 1] topic according to labels
+        if (labels != null) {
+            topic = labels[topic];
+        }
+
+        // add newly estimated z_i to count variables
+        trnModel.nw[w][topic] += 1;
+        trnModel.nd[m][topic] += 1;
+        trnModel.nwsum[topic] += 1;
+        trnModel.ndsum[m] += 1;
+
+        return topic;
+    }
+}
--- a/java_LabledLDA/src/jgibblda/Inferencer.java
+++ b/java_LabledLDA/src/jgibblda/Inferencer.java
@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public class Inferencer
+{
+    // Train model
+    public Model trnModel;
+    public Dictionary globalDict;
+    private LDACmdOption option;
+
+    private Model newModel;
+
+    //-----------------------------------------------------
+    // Init method
+    //-----------------------------------------------------
+    public Inferencer(LDACmdOption option) throws FileNotFoundException, IOException
+    {
+        this.option = option;
+
+        trnModel = new Model(option);
+        trnModel.init(false);
+
+        globalDict = trnModel.data.localDict;
+    }
+
+    //inference new model ~ getting data from a specified dataset
+    public Model inference() throws FileNotFoundException, IOException
+    {
+        newModel = new Model(option, trnModel);
+        newModel.init(true);
+        newModel.initInf();
+
+        System.out.println("Sampling " + newModel.niters + " iterations for inference!");		
+        System.out.print("Iteration");
+        for (newModel.liter = 1; newModel.liter <= newModel.niters; newModel.liter++){
+            System.out.format("%6d", newModel.liter);
+
+            // for all newz_i
+            for (int m = 0; m < newModel.M; ++m){
+                for (int n = 0; n < newModel.data.docs.get(m).length; n++){
+                    // sample from p(z_i|z_-1,w)
+                    int topic = infSampling(m, n);
+                    newModel.z[m].set(n, topic);
+                }
+            }//end foreach new doc
+
+            if ((newModel.liter == newModel.niters) ||
+                    (newModel.liter > newModel.nburnin && newModel.liter % newModel.samplingLag == 0)) {
+                newModel.updateParams(trnModel);
+            }
+
+            System.out.print("\b\b\b\b\b\b");
+        }// end iterations
+        newModel.liter--;
+
+        System.out.println("\nSaving the inference outputs!");
+        String outputPrefix = newModel.dfile;
+        if (outputPrefix.endsWith(".gz")) {
+            outputPrefix = outputPrefix.substring(0, outputPrefix.length() - 3);
+        }
+        newModel.saveModel(outputPrefix + ".");
+
+        return newModel;
+    }
+
+    /**
+     * do sampling for inference
+     * m: document number
+     * n: word number?
+     */
+    protected int infSampling(int m, int n)
+    {
+        // remove z_i from the count variables
+        int topic = newModel.z[m].get(n);
+        int _w = newModel.data.docs.get(m).words[n];
+        int w = newModel.data.lid2gid.get(_w);
+
+        newModel.nw[_w][topic] -= 1;
+        newModel.nd[m][topic] -= 1;
+        newModel.nwsum[topic] -= 1;
+        newModel.ndsum[m] -= 1;
+
+        int[] nw_inf_m__w = null;
+        if (option.infSeparately) {
+            nw_inf_m__w = newModel.nw_inf.get(m).get(_w);
+            nw_inf_m__w[topic] -= 1;
+            newModel.nwsum_inf[m][topic] -= 1;
+        }
+
+        double Vbeta = trnModel.V * newModel.beta;
+
+        // get labels for this document
+        int[] labels = newModel.data.docs.get(m).labels;
+
+        // determine number of possible topics for this document
+        int K_m = (labels == null) ? newModel.K : labels.length;
+
+        // do multinomial sampling via cumulative method		
+        double[] p = newModel.p;
+        for (int k = 0; k < K_m; k++) {
+            topic = labels == null ? k : labels[k];
+
+            int nw_k, nwsum_k;
+            if (option.infSeparately) {
+                nw_k = nw_inf_m__w[topic];
+                nwsum_k = newModel.nwsum_inf[m][topic];
+            } else {
+                nw_k = newModel.nw[_w][topic];
+                nwsum_k = newModel.nwsum[topic];
+            }
+
+            p[k] = (newModel.nd[m][topic] + newModel.alpha) *
+                (trnModel.nw[w][topic] + nw_k + newModel.beta) /
+                (trnModel.nwsum[topic] + nwsum_k + Vbeta);
+        }
+
+        // cumulate multinomial parameters
+        for (int k = 1; k < K_m; k++){
+            p[k] += p[k - 1];
+        }
+
+        // scaled sample because of unnormalized p[]
+        double u = Math.random() * p[K_m - 1];
+
+        for (topic = 0; topic < K_m; topic++){
+            if (p[topic] > u)
+                break;
+        }
+
+        // map [0, K_m - 1] topic to [0, K - 1] topic according to labels
+        if (labels != null) {
+            topic = labels[topic];
+        }
+
+        // add newly estimated z_i to count variables
+        newModel.nw[_w][topic] += 1;
+        newModel.nd[m][topic] += 1;
+        newModel.nwsum[topic] += 1;
+        newModel.ndsum[m] += 1;
+
+        if (option.infSeparately) {
+            nw_inf_m__w[topic] += 1;
+            newModel.nwsum_inf[m][topic] += 1;
+        }
+
+        return topic;
+    }
+}
--- a/java_LabledLDA/src/jgibblda/LDA.java
+++ b/java_LabledLDA/src/jgibblda/LDA.java
@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.io.FileNotFoundException;
+
+import org.kohsuke.args4j.*;
+
+
+public class LDA
+{
+    public static void main(String args[])
+    {
+        LDACmdOption option = new LDACmdOption();
+        CmdLineParser parser = new CmdLineParser(option);
+
+        try {
+            if (args.length == 0){
+                showHelp(parser);
+                return;
+            }
+
+            parser.parseArgument(args);
+
+            if (option.est || option.estc){
+                Estimator estimator = new Estimator(option);
+                estimator.estimate();
+            }
+            else if (option.inf){
+                Inferencer inferencer = new Inferencer(option);
+                Model newModel = inferencer.inference();
+            }
+        } catch (CmdLineException cle){
+            System.out.println("Command line error: " + cle.getMessage());
+            showHelp(parser);
+            return;
+        } catch (FileNotFoundException e) {
+            e.printStackTrace();
+            return;
+        } catch (Exception e){
+            System.out.println("Error in main: " + e.getMessage());
+            e.printStackTrace();
+            return;
+        }
+    }
+
+    public static void showHelp(CmdLineParser parser){
+        System.out.println("LDA [options ...] [arguments...]");
+        parser.printUsage(System.out);
+    }
+
+}
--- a/java_LabledLDA/src/jgibblda/LDACmdOption.java
+++ b/java_LabledLDA/src/jgibblda/LDACmdOption.java
@ -0,0 +1,51 @@
+package jgibblda;
+
+import org.kohsuke.args4j.*;
+
+public class LDACmdOption {
+
+    @Option(name="-est", usage="Specify whether we want to estimate model from scratch")
+        public boolean est = false;
+
+    @Option(name="-estc", usage="Specify whether we want to continue the last estimation")
+        public boolean estc = false;
+
+    @Option(name="-inf", usage="Specify whether we want to do inference")
+        public boolean inf = true;
+
+    @Option(name="-infseparately", usage="Do inference for each document separately")
+        public boolean infSeparately = false;
+
+    @Option(name="-unlabeled", usage="Ignore document labels")
+        public boolean unlabeled = false;
+
+    @Option(name="-dir", usage="Specify directory")
+        public String dir = "";
+
+    @Option(name="-dfile", usage="Specify data file (*.gz)")
+        public String dfile = "";
+
+    @Option(name="-model", usage="Specify the model name")
+        public String modelName = "";
+
+    @Option(name="-alpha", usage="Specify alpha")
+        public double alpha = -1;
+
+    @Option(name="-beta", usage="Specify beta")
+        public double beta = -1;
+
+    @Option(name="-ntopics", usage="Specify the number of topics")
+        public int K = 100;
+
+    @Option(name="-niters", usage="Specify the number of iterations")
+        public int niters = 1000;
+
+    @Option(name="-nburnin", usage="Specify the number of burn-in iterations")
+        public int nburnin = 500;
+
+    @Option(name="-samplinglag", usage="Specify the sampling lag")
+        public int samplingLag = 5;
+
+    @Option(name="-twords", usage="Specify the number of most likely words to be printed for each topic")
+        public int twords = 100;
+}
--- a/java_LabledLDA/src/jgibblda/LDADataset.java
+++ b/java_LabledLDA/src/jgibblda/LDADataset.java
@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+package jgibblda;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.zip.GZIPInputStream;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.set.hash.TIntHashSet;
+
+public class LDADataset {
+    //---------------------------------------------------------------
+    // Instance Variables
+    //---------------------------------------------------------------
+
+    public Dictionary localDict = new Dictionary();			// local dictionary	
+    public ArrayList<Document> docs = new ArrayList<Document>(); 		// a list of documents	
+    public int M = 0; 			 		// number of documents
+    public int V = 0;			 		// number of words
+
+    // map from local coordinates (id) to global ones 
+    // null if the global dictionary is not set
+    public TIntIntHashMap lid2gid = null; 
+
+    //link to a global dictionary (optional), null for train data, not null for test data
+    public Dictionary globalDict = null;	 		
+
+    //-------------------------------------------------------------
+    //Public Instance Methods
+    //-------------------------------------------------------------
+    public void setM(int M)
+    {
+        this.M = M;
+    }
+
+    public void setDictionary(Dictionary globalDict)
+    {
+        lid2gid = new TIntIntHashMap();
+        this.globalDict = globalDict;
+    }
+
+    /**
+     * set the document at the index idx if idx is greater than 0 and less than M
+     * @param doc document to be set
+     * @param idx index in the document array
+     */	
+    public void setDoc(Document doc, int idx){
+        if (idx < docs.size()) {
+            docs.set(idx, doc);
+        } else {
+            docs.add(idx, doc);
+        }
+    }
+
+    /**
+     * add a new document
+     * @param str string contains doc
+     */
+    public void addDoc(String str, boolean unlabeled)
+    {
+        // read document labels (if provided)
+        TIntArrayList labels = null;
+        if (str.startsWith("[")) {
+            String[] labelsBoundary = str.
+                substring(1). // remove initial '['
+                split("]", 2); // separate labels and str between ']'
+            String[] labelStrs = labelsBoundary[0].trim().split("[ \\t]");
+            str = labelsBoundary[1].trim();
+
+            // parse labels (unless we're ignoring the labels)
+            if (!unlabeled) {
+                // store labels in a HashSet to ensure uniqueness
+                TIntHashSet label_set = new TIntHashSet();
+                for (String labelStr : labelStrs) {
+                    try {
+                        label_set.add(Integer.parseInt(labelStr.trim()));
+                    } catch (NumberFormatException nfe) {
+                        System.err.println("Unknown document label ( " + labelStr + " ) for document " + docs.size() + ".");
+                    }
+                }
+                labels = new TIntArrayList(label_set);
+                labels.sort();
+            }
+        }
+
+        String[] words = str.split("[ \\t\\n]");
+        TIntArrayList ids = new TIntArrayList();
+        for (String word : words){
+            if (word.trim().equals("")) {
+                continue;
+            }
+
+            int _id = localDict.word2id.size();
+
+            if (localDict.contains(word))		
+                _id = localDict.getID(word);
+
+            if (globalDict != null) {
+                //get the global id					
+                if (globalDict.contains(word)) {
+                    localDict.addWord(word);
+
+                    lid2gid.put(_id, globalDict.getID(word));
+                    ids.add(_id);
+                }
+            }
+            else {
+                localDict.addWord(word);
+                ids.add(_id);
+            }
+        }
+
+        setDoc(new Document(ids, str, labels), docs.size());
+
+        V = localDict.word2id.size();
+    }
+
+    //---------------------------------------------------------------
+    // I/O methods
+    //---------------------------------------------------------------
+
+    /**
+     * read a dataset from a file
+     * @return true if success and false otherwise
+     */
+    public boolean readDataSet(String filename, boolean unlabeled) throws FileNotFoundException, IOException
+    {
+        BufferedReader reader = new BufferedReader(new InputStreamReader(
+                    new GZIPInputStream(
+                        new FileInputStream(filename)), "UTF-8"));
+        try {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                addDoc(line, unlabeled);
+            }
+            setM(docs.size());
+
+            // debug output
+            System.out.println("Dataset loaded:");
+            System.out.println("\tM:" + M);
+            System.out.println("\tV:" + V);
+
+            return true;
+        } finally {
+            reader.close();
+        }
+    }
+}
--- a/java_LabledLDA/src/jgibblda/Model.java
+++ b/java_LabledLDA/src/jgibblda/Model.java
@ -0,0 +1,669 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+package jgibblda;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.StringTokenizer;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntObjectHashMap;
+
+public class Model {	
+
+    //---------------------------------------------------------------
+    //	Class Variables
+    //---------------------------------------------------------------
+
+    public static String tassignSuffix = ".tassign.gz";	 // suffix for topic assignment file
+    public static String thetaSuffix   = ".theta.gz";    // suffix for theta (topic - document distribution) file
+    public static String phiSuffix     = ".phi.gz";      // suffix for phi file (topic - word distribution) file
+    public static String othersSuffix  = ".others.gz"; 	 // suffix for containing other parameters
+    public static String twordsSuffix  = ".twords.gz";	 // suffix for file containing words-per-topics
+    public static String wordMapSuffix  = ".wordmap.gz"; // suffix for file containing word to id map
+
+    //---------------------------------------------------------------
+    //	Model Parameters and Variables
+    //---------------------------------------------------------------
+
+
+    public String dir = "./";
+    public String dfile = "trndocs.dat";
+    public boolean unlabeled = false;
+    public String modelName = "model";
+    public LDADataset data; // link to a dataset
+
+    public int M = 0;          // dataset size (i.e., number of docs)
+    public int V = 0;          // vocabulary size
+    public int K = 100;        // number of topics
+    public double alpha;       // LDA hyperparameters
+    public double beta = 0.01; // LDA hyperparameters
+    public int niters = 1000;  // number of Gibbs sampling iteration
+    public int nburnin = 500;  // number of Gibbs sampling burn-in iterations
+    public int samplingLag = 5;// Gibbs sampling sample lag
+    public int numSamples = 1; // number of samples taken
+    public int liter = 0;      // the iteration at which the model was saved	
+    public int twords = 20;    // print out top words per each topic
+
+    // Estimated/Inferenced parameters
+    public double[][] theta = null; // theta: document - topic distributions, size M x K
+    public double[][] phi = null;   // phi: topic-word distributions, size K x V
+
+    // Temp variables while sampling
+    public TIntArrayList[] z = null; // topic assignments for words, size M x doc.size()
+    protected int[][] nw = null;       // nw[i][j]: number of instances of word/term i assigned to topic j, size V x K
+    protected int[][] nd = null;       // nd[i][j]: number of words in document i assigned to topic j, size M x K
+    protected int[] nwsum = null;      // nwsum[j]: total number of words assigned to topic j, size K
+    protected int[] ndsum = null;      // ndsum[i]: total number of words in document i, size M
+
+    protected ArrayList<TIntObjectHashMap<int[]>> nw_inf = null;       // nw[m][i][j]: number of instances of word/term i assigned to topic j in doc m, size M x V x K
+    protected int[][] nwsum_inf = null;      // nwsum[m][j]: total number of words assigned to topic j in doc m, size M x K
+
+    // temp variables for sampling
+    protected double[] p = null; 
+
+    //---------------------------------------------------------------
+    //	Constructors
+    //---------------------------------------------------------------	
+
+    public Model(LDACmdOption option) throws FileNotFoundException, IOException
+    {
+        this(option, null);
+    }
+
+    public Model(LDACmdOption option, Model trnModel) throws FileNotFoundException, IOException
+    {
+        modelName = option.modelName;
+        K = option.K;
+
+        alpha = option.alpha;
+        if (alpha < 0.0)
+            alpha = 50.0 / K;
+
+        if (option.beta >= 0)
+            beta = option.beta;
+
+        niters = option.niters;
+        nburnin = option.nburnin;
+        samplingLag = option.samplingLag;
+
+        dir = option.dir;
+        if (dir.endsWith(File.separator))
+            dir = dir.substring(0, dir.length() - 1);
+
+        dfile = option.dfile;
+        unlabeled = option.unlabeled;
+        twords = option.twords;
+
+        // initialize dataset
+        data = new LDADataset();
+
+        // process trnModel (if given)
+        if (trnModel != null) {
+            data.setDictionary(trnModel.data.localDict);
+            K = trnModel.K;
+
+            // use hyperparameters from model (if not overridden in options)
+            if (option.alpha < 0.0)
+                alpha = trnModel.alpha;
+            if (option.beta < 0.0)
+                beta = trnModel.beta;
+        }
+
+        // read in data
+        data.readDataSet(dir + File.separator + dfile, unlabeled);
+    }
+
+    //---------------------------------------------------------------
+    //	Init Methods
+    //---------------------------------------------------------------
+
+    /**
+     * Init parameters for estimation or inference
+     */
+    public boolean init(boolean random)
+    {
+        if (random) {
+            M = data.M;
+            V = data.V;
+            z = new TIntArrayList[M];
+        } else {
+            if (!loadModel()) {
+                System.out.println("Fail to load word-topic assignment file of the model!"); 
+                return false;
+            }
+
+            // debug output
+            System.out.println("Model loaded:");
+            System.out.println("\talpha:" + alpha);
+            System.out.println("\tbeta:" + beta);
+            System.out.println("\tK:" + K);
+            System.out.println("\tM:" + M);
+            System.out.println("\tV:" + V);
+        }
+
+        p = new double[K];
+
+        initSS();
+
+        for (int m = 0; m < data.M; m++){
+            if (random) {
+                z[m] = new TIntArrayList();
+            }
+
+            // initilize for z
+            int N = data.docs.get(m).length;
+            for (int n = 0; n < N; n++){
+                int w = data.docs.get(m).words[n];
+                int topic;
+
+                // random init a topic or load existing topic from z[m]
+                if (random) {
+                    topic = (int)Math.floor(Math.random() * K);
+                    z[m].add(topic);
+                } else {
+                    topic = z[m].get(n);
+                }
+
+                nw[w][topic]++; // number of instances of word assigned to topic j
+                nd[m][topic]++; // number of words in document i assigned to topic j
+                nwsum[topic]++; // total number of words assigned to topic j
+            }
+
+            ndsum[m] = N; // total number of words in document i
+        }
+
+        theta = new double[M][K];		
+        phi = new double[K][V];
+
+        return true;
+    }
+
+    public boolean initInf()
+    {
+        nw_inf = new ArrayList<TIntObjectHashMap<int[]>>();
+
+        nwsum_inf = new int[M][K];
+        for (int m = 0; m < M; m++) {
+            for (int k = 0; k < K; k++) {
+                nwsum_inf[m][k] = 0;
+            }
+        }
+
+        for (int m = 0; m < data.M; m++){
+            nw_inf.add(m, new TIntObjectHashMap<int[]>());
+
+            // initilize for z
+            int N = data.docs.get(m).length;
+            for (int n = 0; n < N; n++){
+                int w = data.docs.get(m).words[n];
+                int topic = z[m].get(n);
+
+                if (!nw_inf.get(m).containsKey(w)) {
+                    int[] nw_inf_m_w = new int[K];
+                    for (int k = 0; k < K; k++) {
+                        nw_inf_m_w[k] = 0;
+                    }
+                    nw_inf.get(m).put(w, nw_inf_m_w);
+                }
+
+                nw_inf.get(m).get(w)[topic]++; // number of instances of word assigned to topic j in doc m
+                //nw_inf[m][w][topic]++; // number of instances of word assigned to topic j in doc m
+                nwsum_inf[m][topic]++; // total number of words assigned to topic j in doc m
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Init sufficient stats
+     */
+    protected void initSS()
+    {
+        nw = new int[V][K];
+        for (int w = 0; w < V; w++){
+            for (int k = 0; k < K; k++){
+                nw[w][k] = 0;
+            }
+        }
+
+        nd = new int[M][K];
+        for (int m = 0; m < M; m++){
+            for (int k = 0; k < K; k++){
+                nd[m][k] = 0;
+            }
+        }
+
+        nwsum = new int[K];
+        for (int k = 0; k < K; k++){
+            nwsum[k] = 0;
+        }
+
+        ndsum = new int[M];
+        for (int m = 0; m < M; m++){
+            ndsum[m] = 0;
+        }
+    }
+
+    //---------------------------------------------------------------
+    //	Update Methods
+    //---------------------------------------------------------------
+
+    public void updateParams()
+    {
+        updateTheta();
+        updatePhi();
+        numSamples++;
+    }
+    public void updateParams(Model trnModel)
+    {
+        updateTheta();
+        updatePhi(trnModel);
+        numSamples++;
+    }
+
+    public void updateTheta()
+    {
+        double Kalpha = K * alpha;
+        for (int m = 0; m < M; m++) {
+            for (int k = 0; k < K; k++) {
+                if (numSamples > 1) theta[m][k] *= numSamples - 1; // convert from mean to sum
+                theta[m][k] += (nd[m][k] + alpha) / (ndsum[m] + Kalpha);
+                if (numSamples > 1) theta[m][k] /= numSamples; // convert from sum to mean
+            }
+        }
+    }
+
+    public void updatePhi()
+    {
+        double Vbeta = V * beta;
+        for (int k = 0; k < K; k++) {
+            for (int w = 0; w < V; w++) {
+                if (numSamples > 1) phi[k][w] *= numSamples - 1; // convert from mean to sum
+                phi[k][w] += (nw[w][k] + beta) / (nwsum[k] + Vbeta);
+                if (numSamples > 1) phi[k][w] /= numSamples; // convert from sum to mean
+            }
+        }
+    }
+
+    // for inference
+    public void updatePhi(Model trnModel)
+    {
+        double Vbeta = trnModel.V * beta;
+        for (int k = 0; k < K; k++) {
+            for (int _w = 0; _w < V; _w++) {
+                if (data.lid2gid.containsKey(_w)) {
+                    int id = data.lid2gid.get(_w);
+
+                    if (numSamples > 1) phi[k][_w] *= numSamples - 1; // convert from mean to sum
+                    phi[k][_w] += (trnModel.nw[id][k] + nw[_w][k] + beta) / (trnModel.nwsum[k] + nwsum[k] + Vbeta);
+                    if (numSamples > 1) phi[k][_w] /= numSamples; // convert from sum to mean
+                } // else ignore words that don't appear in training
+            } //end foreach word
+        } // end foreach topic
+    }
+
+    //---------------------------------------------------------------
+    //	I/O Methods
+    //---------------------------------------------------------------
+
+    /**
+     * Save model
+     */
+    public boolean saveModel()
+    {
+        return saveModel("");
+    }
+    public boolean saveModel(String modelPrefix)
+    {
+        if (!saveModelTAssign(dir + File.separator + modelPrefix + modelName + tassignSuffix)) {
+            return false;
+        }
+
+        if (!saveModelOthers(dir + File.separator + modelPrefix + modelName + othersSuffix)) {
+            return false;
+        }
+
+        if (!saveModelTheta(dir + File.separator + modelPrefix + modelName + thetaSuffix)) {
+            return false;
+        }
+
+        //if (!saveModelPhi(dir + File.separator + modelPrefix + modelName + phiSuffix)) {
+        //    return false;
+        //}
+
+        if (twords > 0) {
+            if (!saveModelTwords(dir + File.separator + modelPrefix + modelName + twordsSuffix)) {
+                return false;
+            }
+        }
+
+        if (!data.localDict.writeWordMap(dir + File.separator + modelPrefix + modelName + wordMapSuffix)) {
+            return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Save word-topic assignments for this model
+     */
+    public boolean saveModelTAssign(String filename) {
+        int i, j;
+
+        try{
+            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+                        new GZIPOutputStream(
+                            new FileOutputStream(filename)), "UTF-8"));
+
+            //write docs with topic assignments for words
+            for (i = 0; i < data.M; i++) {
+                for (j = 0; j < data.docs.get(i).length; ++j) {
+                    writer.write(data.docs.get(i).words[j] + ":" + z[i].get(j) + " ");
+                }
+                writer.write("\n");
+            }
+
+            writer.close();
+        }
+        catch (Exception e) {
+            System.out.println("Error while saving model tassign: " + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Save theta (topic distribution) for this model
+     */
+    public boolean saveModelTheta(String filename) {
+        try{
+            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+                        new GZIPOutputStream(
+                            new FileOutputStream(filename)), "UTF-8"));
+
+            for (int i = 0; i < M; i++) {
+                for (int j = 0; j < K; j++) {
+                    if (theta[i][j] > 0) {
+                        writer.write(j + ":" + theta[i][j] + " ");
+                    }
+                }
+                writer.write("\n");
+            }
+            writer.close();
+        }
+        catch (Exception e){
+            System.out.println("Error while saving topic distribution file for this model: " + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Save word-topic distribution
+     */
+    public boolean saveModelPhi(String filename)
+    {
+        try {
+            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+                        new GZIPOutputStream(
+                            new FileOutputStream(filename)), "UTF-8"));
+
+            for (int i = 0; i < K; i++) {
+                for (int j = 0; j < V; j++) {
+                    if (phi[i][j] > 0) {
+                        writer.write(j + ":" + phi[i][j] + " ");
+                    }
+                }
+                writer.write("\n");
+            }
+            writer.close();
+        }
+        catch (Exception e) {
+            System.out.println("Error while saving word-topic distribution:" + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Save other information of this model
+     */
+    public boolean saveModelOthers(String filename){
+        try{
+            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+                        new GZIPOutputStream(
+                            new FileOutputStream(filename)), "UTF-8"));
+
+            writer.write("alpha=" + alpha + "\n");
+            writer.write("beta=" + beta + "\n");
+            writer.write("ntopics=" + K + "\n");
+            writer.write("ndocs=" + M + "\n");
+            writer.write("nwords=" + V + "\n");
+            writer.write("liters=" + liter + "\n");
+
+            writer.close();
+        }
+        catch(Exception e){
+            System.out.println("Error while saving model others:" + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Save model the most likely words for each topic
+     */
+    public boolean saveModelTwords(String filename){
+        try{
+            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+                        new GZIPOutputStream(
+                            new FileOutputStream(filename)), "UTF-8"));
+
+            if (twords > V){
+                twords = V;
+            }
+
+            for (int k = 0; k < K; k++){
+                ArrayList<Pair> wordsProbsList = new ArrayList<Pair>(); 
+                for (int w = 0; w < V; w++){
+                    Pair p = new Pair(w, phi[k][w], false);
+
+                    wordsProbsList.add(p);
+                }//end foreach word
+
+                //print topic				
+                writer.write("Topic " + k + ":\n");
+                Collections.sort(wordsProbsList);
+
+                for (int i = 0; i < twords; i++){
+                    if (data.localDict.contains((Integer)wordsProbsList.get(i).first)){
+                        String word = data.localDict.getWord((Integer)wordsProbsList.get(i).first);
+
+                        writer.write("\t" + word + "\t" + wordsProbsList.get(i).second + "\n");
+                    }
+                }
+            } //end foreach topic			
+
+            writer.close();
+        }
+        catch(Exception e){
+            System.out.println("Error while saving model twords: " + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Load saved model
+     */
+    public boolean loadModel(){
+        if (!readOthersFile(dir + File.separator + modelName + othersSuffix))
+            return false;
+
+        if (!readTAssignFile(dir + File.separator + modelName + tassignSuffix))
+            return false;
+
+        // read dictionary
+        Dictionary dict = new Dictionary();
+        if (!dict.readWordMap(dir + File.separator + modelName + wordMapSuffix))
+            return false;
+
+        data.localDict = dict;
+
+        return true;
+    }
+
+    /**
+     * Load "others" file to get parameters
+     */
+    protected boolean readOthersFile(String otherFile){
+        try {
+            BufferedReader reader = new BufferedReader(new InputStreamReader(
+                        new GZIPInputStream(
+                            new FileInputStream(otherFile)), "UTF-8"));
+            String line;
+            while((line = reader.readLine()) != null){
+                StringTokenizer tknr = new StringTokenizer(line,"= \t\r\n");
+
+                int count = tknr.countTokens();
+                if (count != 2)
+                    continue;
+
+                String optstr = tknr.nextToken();
+                String optval = tknr.nextToken();
+
+                if (optstr.equalsIgnoreCase("alpha")){
+                    alpha = Double.parseDouble(optval);					
+                }
+                else if (optstr.equalsIgnoreCase("beta")){
+                    beta = Double.parseDouble(optval);
+                }
+                else if (optstr.equalsIgnoreCase("ntopics")){
+                    K = Integer.parseInt(optval);
+                }
+                else if (optstr.equalsIgnoreCase("liter")){
+                    liter = Integer.parseInt(optval);
+                }
+                else if (optstr.equalsIgnoreCase("nwords")){
+                    V = Integer.parseInt(optval);
+                }
+                else if (optstr.equalsIgnoreCase("ndocs")){
+                    M = Integer.parseInt(optval);
+                }
+                else {
+                    // any more?
+                }
+            }
+
+            reader.close();
+        }
+        catch (Exception e){
+            System.out.println("Error while reading other file:" + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Load word-topic assignments for this model
+     */
+    protected boolean readTAssignFile(String tassignFile)
+    {
+        try {
+            int i,j;
+            BufferedReader reader = new BufferedReader(new InputStreamReader(
+                        new GZIPInputStream(
+                            new FileInputStream(tassignFile)), "UTF-8"));
+
+            String line;
+            z = new TIntArrayList[M];			
+            data = new LDADataset();
+            data.setM(M);
+            data.V = V;			
+            for (i = 0; i < M; i++){
+                line = reader.readLine();
+                StringTokenizer tknr = new StringTokenizer(line, " \t\r\n");
+
+                int length = tknr.countTokens();
+
+                TIntArrayList words = new TIntArrayList();
+                TIntArrayList topics = new TIntArrayList();
+                for (j = 0; j < length; j++){
+                    String token = tknr.nextToken();
+
+                    StringTokenizer tknr2 = new StringTokenizer(token, ":");
+                    if (tknr2.countTokens() != 2){
+                        System.out.println("Invalid word-topic assignment line\n");
+                        return false;
+                    }
+
+                    words.add(Integer.parseInt(tknr2.nextToken()));
+                    topics.add(Integer.parseInt(tknr2.nextToken()));
+                }//end for each topic assignment
+
+                //allocate and add new document to the corpus
+                Document doc = new Document(words);
+                data.setDoc(doc, i);
+
+                //assign values for z
+                z[i] = new TIntArrayList();
+                for (j = 0; j < topics.size(); j++){
+                    z[i].add(topics.get(j));
+                }
+
+            }//end for each doc
+
+            reader.close();
+        }
+        catch (Exception e){
+            System.out.println("Error while loading model: " + e.getMessage());
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+}
--- a/java_LabledLDA/src/jgibblda/Pair.java
+++ b/java_LabledLDA/src/jgibblda/Pair.java
@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.util.Comparator;
+
+public class Pair implements Comparable<Pair> {
+    public Object first;
+    public Comparable second;
+    public static boolean naturalOrder = false;
+
+    public Pair(Object k, Comparable v){
+        first = k;
+        second = v;		
+    }
+
+    public Pair(Object k, Comparable v, boolean naturalOrder){
+        first = k;
+        second = v;
+        Pair.naturalOrder = naturalOrder; 
+    }
+
+    public int compareTo(Pair p){
+        if (naturalOrder)
+            return this.second.compareTo(p.second);
+        else return -this.second.compareTo(p.second);
+    }
+}
--- a/old/testo.py
+++ b/old/testo.py
@ -0,0 +1,199 @@
+# -*- coding: utf-8 -*-
+import functools
+import re
+
+import spacy
+import textacy
+from spacy.tokens import Doc
+from spacy.tagger import Tagger
+
+import xml.etree.ElementTree as ET
+
+PARSER = spacy.load('de')
+stop_words = list(__import__("spacy." + PARSER.lang, globals(), locals(), ['object']).STOP_WORDS)
+
+def compose(*functions):
+    def compose2(f, g):
+        return lambda x: f(g(x))
+    return functools.reduce(compose2, functions, lambda x: x)
+
+
+def cleanTexts(textstream, parser, attr):
+
+    #input str-stream output str-stream
+    pipe = parser.pipe(textstream)
+
+    for doc in pipe:
+
+        tokens = [tok.text for tok in doc
+                  if tok.pos_ not in attr
+                  and tok.tag_ not in attr
+                  and tok.ent_ not in attr
+                  and tok.text not in attr
+                  and tok.lower_ not in attr]
+
+
+        yield " ".join(tokens)
+
+
+        """
+def cleanDoc_lemmatize(doc,parser=PARSER):
+    return parser(" ".join([tok.lemma_ for tok in doc ]))
+
+
+def cleanDoc_STOPS(doc,parser=PARSER, stop_words=None, keep=None):
+    if stop_words is None:
+        stop_words = list(__import__("spacy." + parser.lang, globals(), locals(), ['object']).STOP_WORDS)
+
+    if hasattr(keep, '__iter__'):
+        for k in keep:
+            try:
+                stop_words.remove(k)
+            except ValueError:
+                pass
+
+    return parser(" ".join([tok.text for tok in doc if tok.text not in stop_words]))
+
+
+
+def cleanDoc_ENT(doc,parser=PARSER, keeponly=False, attr=["WORK_OF_ART", "ORG", "PRODUCT", "LOC"]):
+    if keeponly:
+        return  parser(" ".join([tok.text for tok in doc if tok.ent_ in attr]))
+    else:
+        return  parser(" ".join([tok.text for tok in doc if tok.ent_ not in attr]))
+
+
+
+def cleanDoc_POS(doc,parser=PARSER, keeponly=False, attr=["SPACE", "PUNCT"]):
+    if keeponly:
+        return parser(" ".join([tok.text for tok in doc if tok.pos_ in attr]))
+    else:
+        return parser(" ".join([tok.text for tok in doc if tok.pos_ not in attr]))
+"""
+
+
+def cleanTexts_POS(spacypipe, keeponly=False, attr=["SPACE", "PUNCT"]):
+    """
+    :param spacypipe: spacypipe
+    :param keeponly: bool . If True, only attr will be kept. If false, all attr will be deleted
+    :param attr: [str] pos_ or ent_type_
+    :yields: stream of strings: full-length cleaned text
+    """
+    if keeponly:
+        for doc in spacypipe:
+            yield " ".join([tok.text for tok in doc if tok.pos_ in attr])
+
+    else:
+        for doc in spacypipe:
+            yield " ".join([tok.text for tok in doc if tok.pos_ not in attr])
+
+def cleanText_POS(text,parser=PARSER, keeponly=False, attr=["SPACE", "PUNCT"]):
+    """
+    :param txt: str
+    :param keeponly: bool . If True, only attr will be kept. If false, all attr will be deleted
+    :param attr: [str] pos_ or ent_type_
+    :return: str
+    """
+    doc = parser(text)
+
+    if keeponly:
+        return " ".join([tok.text for tok in doc if tok.pos_ in attr])
+    else:
+        return " ".join([tok.text for tok in doc if tok.pos_ not in attr])
+
+
+def removeWhitespace(string):
+    return re.sub(r'(\r\n|\r|\n|(\s)+)', ' ', string)
+
+def removeWords(string, words):
+    big_regex = re.compile('|'.join(map(re.escape, words)))
+    return big_regex.sub("", string)
+
+
+
+
+
+
+def generateMainTextfromTicketXML(path2xml, main_textfield='Beschreibung', cleaning_function=None):
+    """
+    generates strings from XML
+    :param path2xml:
+    :param main_textfield:
+    :param cleaning_function:
+    :yields strings
+    """
+    import xml.etree.ElementTree as ET
+
+    tree = ET.parse(path2xml, ET.XMLParser(encoding="utf-8"))
+    root = tree.getroot()
+
+
+    for ticket in root:
+        text = "ERROR"
+        for field in ticket:
+            if field.tag == main_textfield:
+                if cleaning_function:
+                    text = cleaning_function(field.text)
+                else:
+                    text = field.text
+        yield text
+
+def generateMetadatafromTicketXML(path2xml, key_function_pairs_to_clean, leave_out=['Beschreibung']):
+    import xml.etree.ElementTree as ET
+
+    tree = ET.parse(path2xml, ET.XMLParser(encoding="utf-8"))
+
+    root = tree.getroot()
+
+    for ticket in root:
+        metadata = {}
+        for field in ticket:
+            if field.tag not in leave_out:
+
+                if field.tag in key_function_pairs_to_clean:
+                    metadata[field.tag] = key_function_pairs_to_clean[field.tag](field.text)
+                else:
+                    metadata[field.tag] = field.text
+
+        yield metadata
+
+
+
+
+string = "Frau Hinrichs überdenkt die tu Situation a@bc.de und 545453 macht ' dann neue Anträge. \n           Dieses Ticket wird geschlossen \n            \n test"
+
+#print(removeWords(string,["die", "neue"]))
+
+# in:str out:str
+cleanString = compose(
+    cleanText_POS,
+    functools.partial(textacy.preprocess.replace_emails, replace_with=u'EMAIL')
+)
+
+key_function_pairs_to_clean = {
+    "Loesung":removeWhitespace,
+    "Zusammenfassung":cleanText_POS
+}
+"""
+# in:str-gen out:str-gen
+cleanStream = compose(
+    removeSTOP,
+    lemmatize,
+    cleanEnt
+)
+"""
+# content: xml -> stringCleaning -> pipe -> docCleaning -> corpus
+# metadata:xml -> -> stringCleaning -> corpus
+
+corpus = textacy.Corpus(PARSER)
+
+
+
+
+corpus.add_texts(
+    cleanTexts(generateMainTextfromTicketXML("ticketSamples.xml"),PARSER,["PUNCT","SPACE","PERSON"])#,
+    #generateMetadatafromTicketXML("ticketSamples.xml",key_function_pairs_to_clean=key_function_pairs_to_clean)
+)
+
+print(corpus[0].text)
+