diff --git a/.gitignore b/.gitignore index d2a74a9..5fdb4fb 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,8 @@ wheels/ *.egg-info/ .installed.cfg *.egg +openthesaurus.csv + # PyInstaller # Usually these files are written by a python script from a template diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..a0fd830 --- /dev/null +++ b/config.ini @@ -0,0 +1,39 @@ +[default] + +thesauruspath = openthesaurus.csv +path2xml = ticketSamples.xml + +language = de + +[preprocessing] + +ents2keep = WORK_OF_ART,ORG,PRODUCT,LOC + +custom_words = grüßen,fragen + +#lemmatize = True + + +[topic modeling] + +ngrams = (1,2) + +min_df = 0 +max_df = 1.0 +no_below = 20 +no_above = 0.5 + +topicModel = lda + +top_topic_words = 5 + +top_document_labels_per_topic = 2 + + + + + + + + + diff --git a/java_LabledLDA/.idea/libraries/lib.xml b/java_LabledLDA/.idea/libraries/lib.xml new file mode 100644 index 0000000..fa8838a --- /dev/null +++ b/java_LabledLDA/.idea/libraries/lib.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/java_LabledLDA/.idea/misc.xml b/java_LabledLDA/.idea/misc.xml new file mode 100644 index 0000000..5182650 --- /dev/null +++ b/java_LabledLDA/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/java_LabledLDA/.idea/modules.xml b/java_LabledLDA/.idea/modules.xml new file mode 100644 index 0000000..3d6ae4f --- /dev/null +++ b/java_LabledLDA/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/java_LabledLDA/.idea/workspace.xml b/java_LabledLDA/.idea/workspace.xml new file mode 100644 index 0000000..d2f2495 --- /dev/null +++ b/java_LabledLDA/.idea/workspace.xml @@ -0,0 +1,439 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + DEFINITION_ORDER + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +