diff --git a/config.ini b/config.ini
index 453ac97..98ebd26 100644
--- a/config.ini
+++ b/config.ini
@@ -41,8 +41,8 @@ filename=topicModelTickets.log
 
 
 [de_corpus]
-#input=M42-Export/Tickets_small.csv
-input=M42-Export/de_tickets.csv
+input=M42-Export/Tickets_small.csv
+#input=M42-Export/de_tickets.csv
 
 path=corpi/
 
@@ -64,7 +64,7 @@ metaliste=TicketNumber,Subject,CreatedDate,categoryName,Impact,Urgency,BenutzerI
 
 #ents2keep=WORK_OF_ART,ORG,PRODUCT,LOC
 
-custom_words=geehrt,dr,not,frage,betreff,gerne,dame,herr,frau,hilfe,moeglichkeit,beste,freuen,voraus,problem,lauten,bedanken,voraus,hallo,gerne,freundlich,fragen,fehler,bitten,ehre,lieb,liebe,gruesse,helfen,versuchen,unbestimmt,woche,tadelos,klappen,mittlerweile,bekommen,erreichbar,gruss,auffahren,vorgang,hinweis,name,gruss,id,erfolg,folge,team,absender,versenden,vorname,strasse,prozess,portal,moeglichkeit,fremd,wende,rueckfrage,stehen,verfuegung,funktionieren,pruefen,hoffen,ok
+custom_words=eintrag,element,nutzer,einrichtung,abteilung,gebaeude,raum,ansprechpartner,geehrt,dr,not,frage,betreff,gerne,dame,herr,frau,hilfe,moeglichkeit,beste,freuen,voraus,problem,lauten,bedanken,voraus,hallo,gerne,freundlich,fragen,fehler,bitten,ehre,lieb,liebe,gruesse,helfen,versuchen,unbestimmt,woche,tadelos,klappen,mittlerweile,bekommen,erreichbar,gruss,auffahren,vorgang,hinweis,name,gruss,id,erfolg,folge,team,absender,versenden,vorname,strasse,prozess,portal,moeglichkeit,fremd,wende,rueckfrage,stehen,verfuegung,funktionieren,pruefen,hoffen,ok
 
 
 
diff --git a/java_LabledLDA/models/tickets/.others.gz b/java_LabledLDA/models/tickets/.others.gz
index dfc54ee..27f7097 100644
Binary files a/java_LabledLDA/models/tickets/.others.gz and b/java_LabledLDA/models/tickets/.others.gz differ
diff --git a/java_LabledLDA/models/tickets/.tassign.gz b/java_LabledLDA/models/tickets/.tassign.gz
index 5be480f..4a5b35b 100644
Binary files a/java_LabledLDA/models/tickets/.tassign.gz and b/java_LabledLDA/models/tickets/.tassign.gz differ
diff --git a/java_LabledLDA/models/tickets/.theta.gz b/java_LabledLDA/models/tickets/.theta.gz
index 21cecf7..ce505a0 100644
Binary files a/java_LabledLDA/models/tickets/.theta.gz and b/java_LabledLDA/models/tickets/.theta.gz differ
diff --git a/java_LabledLDA/models/tickets/.twords.gz b/java_LabledLDA/models/tickets/.twords.gz
index 7dfe763..9673a6e 100644
Binary files a/java_LabledLDA/models/tickets/.twords.gz and b/java_LabledLDA/models/tickets/.twords.gz differ
diff --git a/java_LabledLDA/models/tickets/.wordmap.gz b/java_LabledLDA/models/tickets/.wordmap.gz
index 0792c8d..4b38701 100644
Binary files a/java_LabledLDA/models/tickets/.wordmap.gz and b/java_LabledLDA/models/tickets/.wordmap.gz differ
diff --git a/java_LabledLDA/models/tickets/tickets.gz b/java_LabledLDA/models/tickets/tickets.gz
index 4a252a2..54fbfc5 100644
Binary files a/java_LabledLDA/models/tickets/tickets.gz and b/java_LabledLDA/models/tickets/tickets.gz differ
diff --git a/main.py b/main.py
index fe673c0..abbfb66 100644
--- a/main.py
+++ b/main.py
@@ -2,18 +2,21 @@
 import matplotlib
 matplotlib.use('Agg')
 import time
+import init
+
 import corporization
 import preprocessing
 import topicModeling
 import cleaning
-from miscellaneous import *
 
+from miscellaneous import *
 
 # ssh madonna "nohup /usr/bin/python3 -u /home/jannis.grundmann/PycharmProjects/topicModelingTickets/main.py &> /home/jannis.grundmann/PycharmProjects/topicModelingTickets/printout_main.log &"
 start = time.time()
 
-import init
-init.main()
+
+
+#init.main()
 logprint("")
 
 corporization.main()
@@ -25,14 +28,32 @@ logprint("")
 preprocessing.main()   # ~5h
 logprint("")
 
-#topicModeling.main(use_raw=False,algorithm="llda")
+
+"""
+topicModeling.main(use_raw=False,algorithm="lsa")
 logprint("")
 
-#topicModeling.main(use_raw=True)
 
 topicModeling.main(use_raw=False,algorithm="lda")
 logprint("")
 
+
+topicModeling.main(use_raw=False,algorithm="nmf")
+logprint("")
+
+
+topicModeling.main(use_raw=False,algorithm="llda")
+logprint("")
+"""
+
+
+
+
+
+
+
+
+
 logprint("")
 
 end = time.time()
diff --git a/miscellaneous.py b/miscellaneous.py
index d528665..59c09e3 100644
--- a/miscellaneous.py
+++ b/miscellaneous.py
@@ -154,18 +154,6 @@ def printRandomDoc(textacyCorpus):
     print()
 
 
-def corpus2Text(corpus):
-    for doc in corpus:
-        yield doc.text
-
-def corpus2Meta(corpus):
-    for doc in corpus:
-        yield doc.metadata
-
-def saveplaincorpustext(corpus,path):
-    textacy.fileio.write_file_lines(corpus2Text(corpus),filepath=path )
-
-
 
 def save_corpus(corpus, corpus_path, corpus_name):
     """
@@ -175,42 +163,22 @@ def save_corpus(corpus, corpus_path, corpus_name):
     :param corpus_name: str (should content the language like "_de_")
     """
 
-    """
-    # save stringstore
-    stringstore_path = corpus_path + corpus_name + '_strings.json'
-    with open(stringstore_path, "w") as file:
-        parser.vocab.strings.dump(file)
-
-    #todo save vocab?
-   """
-
     # save parser
     parser = corpus.spacy_lang
     parserpath = corpus_path + str(parser.lang) + '_parser'
     parser.save_to_directory(parserpath)
 
-    ##
-
-    # save content
-    contentpath = corpus_path + corpus_name + "_content.bin"
-    textacy.fileio.write_spacy_docs((doc.spacy_doc for doc in corpus), contentpath)
-
-    #save plain content
+    # save plain content + meta
     plainpath = corpus_path + corpus_name + "_content.json"
-    textacy.fileio.write_json_lines(({"index" : doc.corpus_index, "content" : doc.text} for doc in corpus), plainpath)
-
-    # save meta
-    metapath = corpus_path + corpus_name + "_meta.json"
-    #meta_gen = (doc.metadata.update({"index": doc.corpus_index}) for doc in corpus)
-    meta_gen = gen_meta(corpus)
-    textacy.fileio.write_json_lines(meta_gen, metapath)
+    textacy.fileio.write_json_lines(gen_dicts(corpus), plainpath)
 
 
-def gen_meta(corpus):
+
+def gen_dicts(corpus):
     for doc in corpus:
-        meta = doc.metadata
-        meta.update({"index": doc.corpus_index})
-        yield meta
+        dict = {"index" : doc.corpus_index, "content" : doc.text}
+        dict.update(doc.metadata)
+        yield dict
 
 
 
@@ -233,7 +201,6 @@ def load_corpus(corpus_path, corpus_name, lang="de"):
     # load parser
     parser = spacy.load(lang)
 
-
     stringstorepath = corpus_path + str(lang) + '_parser'+'/vocab/strings.json'
     with open(stringstorepath) as file:
         parser.vocab.strings.load(file)
@@ -244,46 +211,35 @@ def load_corpus(corpus_path, corpus_name, lang="de"):
     #load corpus
     corpus = textacy.Corpus(parser)
 
-
-    contentpath = corpus_path + corpus_name + "_content.bin"
     plainpath = corpus_path + corpus_name + "_content.json"
-    metapath = corpus_path + corpus_name + "_meta.json"
-    """
-    try:
-        spacy_docs = textacy.fileio.read_spacy_docs(corpus.spacy_vocab, contentpath)
-        metadata_stream = textacy.fileio.read_json_lines(metapath)
-
-        for spacy_doc, metadata in zip(spacy_docs, metadata_stream):
-            corpus.add_doc(
-            textacy.Doc(spacy_doc, lang=corpus.spacy_lang, metadata=metadata))
-    except:
-    """
-    # neu init!!
-    #corpus = textacy.Corpus(parser)
-
     plain_stream = textacy.fileio.read_json_lines(plainpath)  # yields {int : str}
-    metadata_stream = textacy.fileio.read_json_lines(metapath)
-
-    for plain, metadata in zip(plain_stream, metadata_stream):
-        if plain["index"] == metadata["index"]:
-            corpus.add_doc(textacy.Doc(plain["content"], lang=corpus.spacy_lang, metadata=metadata))
-        else:
-            raise IndexError
 
+    for plain in plain_stream:
+        meta = {}
+        for key,value in plain.items():
+            if key != "content" and key != "index":
+                meta[key] = value
+            corpus.add_doc(textacy.Doc(plain["content"], lang=corpus.spacy_lang, metadata=meta))
 
     return corpus, corpus.spacy_lang
 
 
 
 
+"""
+def corpus2Text(corpus):
+    for doc in corpus:
+        yield doc.text
+
+def corpus2Meta(corpus):
+    for doc in corpus:
+        yield doc.metadata
+
+def saveplaincorpustext(corpus,path):
+    textacy.fileio.write_file_lines(corpus2Text(corpus),filepath=path )
 
 def save_corpusV2(corpus, corpus_path, corpus_name):
-    """
-    saves a textacy-corpus including spacy-parser
-    :param corpus: textacy-Corpus
-    :param corpus_path: str
-    :param corpus_name: str (should content the language like "_de_")
-    """
+
 
     # save parser
     parser = corpus.spacy_lang
@@ -302,13 +258,7 @@ def save_corpusV2(corpus, corpus_path, corpus_name):
             file.write(json.dumps(doc.metadata))
 
 def load_corpusV2(corpus_path, corpus_name, lang="de"):
-    """
-    Load textacy-Corpus including spacy-parser out from file
-    :param corpus_path: str
-    :param corpus_name: str (should content the language like "_de_")
-    :param lang: str (language code) ir spacy.Language
-    :return: texracy.Corpus, spacy.language
-    """
+
 
     # ckeck for language
     if "de_" in corpus_name:
@@ -359,5 +309,5 @@ def yield_fromdir(path,spacy_vocab=None,type=".pkl"):
     else:
         for filename in filelist:
             yield load_obj(path+filename)
-
+"""
 
diff --git a/topicModeling.py b/topicModeling.py
index 7b16ae0..170943d 100644
--- a/topicModeling.py
+++ b/topicModeling.py
@@ -9,7 +9,7 @@ import sys
 import json
 import os.path
 import subprocess
-from textacy import Vectorizer
+from textacy import Vectorizer, viz
 
 from miscellaneous import *
 import textacy
@@ -163,7 +163,8 @@ def jgibbsLLDA(corpus, path2save_results, top_topic_words=7, add_default_topic=F
     jgibbsLLDA_root = FILEPATH + "/java_LabledLDA/"
 
     LLDA_filepath = "{0}models/tickets/tickets.gz".format(jgibbsLLDA_root)
-    dict_path = "{0}models/tickets/labeldict.txt".format(jgibbsLLDA_root)
+    #dict_path = "{0}models/tickets/labeldict.txt".format(jgibbsLLDA_root)
+    dict_path = FILEPATH +"results/labeldict.txt".format(jgibbsLLDA_root)
 
     # printlog(str("LABELDICT: {0}".format(labeldict)))
     #logprint(str("LABELDICT-length: {0}".format(len(labeldict))))
@@ -243,6 +244,30 @@ def jgibbsLLDA(corpus, path2save_results, top_topic_words=7, add_default_topic=F
 
     textacy.fileio.write_file_lines(result, path2save_results)
     #####################################################################################################################
+
+    #todo llda termite plot
+    """
+    topic_inds=[] #<class 'list'>: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
+
+    # get topic and term labels
+    # <class 'tuple'>: ('topic 0', 'topic 1', 'topic 2', 'topic 3', 'topic 4', 'topic 5', 'topic 6', 'topic 7', 'topic 8', 'topic 9', 'topic 10', 'topic 11', 'topic 12', 'topic 13', 'topic 14')
+    topic_labels = tuple('topic {}'.format(topic_ind) for topic_ind in topic_inds)
+
+    # <class 'tuple'>: ('hardware', 'raum', 'adresse', 'gebaeude', 'tu', 'uni', 'ticket', 'email', 'account', 'nummer', 'mail', 'outlook', 'karte', 'drucker', 'server', 'service', 'antwort', 'verbindung', 'herzliche', 'einrichten', 'vergessen', 'wenden', 'ews', 'anhang', 'form', 'konto', 'nachricht', 'unterstuetzung', 'passwort', 'unicard', 'semester', 'system', 'aenderung', 'rueckmeldung', 'meldung', 'zugreifen', 'login', 'adressat', 'sender', 'kurs', 'made', 'mittwoch', 'note', 'our', 'korrespondenz', 'unbeschadet', 'boss', 'unterrichten', 'telefax', 'zugang', 'probleme', 'zugriff', 'mitarbeiterin', 'internet', 'daten', 'anmeldung', 'aendern', 'unterschrift', 'loeschen', 'anmelden', 'datei', 'message', 'laptop', 'benoetigt', 'link', 'montag', 'programm', 'ordner', 'personal', 'rechner', 'veranstaltung', 'august', 'lizenz', 'anschluss', 'mitarbeiter', 'erwuenscht', 'umzug', 'pc', 'uniaccount', 'amt', 'fax', 'it', 'institut', 'nutzer', 'bild', 'type', 'prof', 'verantwortlicher', 'bemerkung', 'antragsteller', 'element', 'hahn', 'eintrag', 'telefonbuch', 'ansprechpartner', 'universitaet', 'physik', 'abteilung', 'fakultaet', 'software', 'dezernat', 'einrichtung', 'telefon', 'lehrstuhl', 'buero')
+    term_labels = tuple(id2term[term_ind] for term_ind in term_inds)
+
+    # get topic-term weights to size dots
+    #[[ 0.02721858 -0.03898025  0.00047936 ...,  0.05862538 -0.07742336  0.04761928]
+    # [ 0.14977875 -0.24192522 -0.00620335 ..., -0.0497216   0.08269951    -0.05715901]
+    # [ 0.04977951  0.02296709  0.01214562 ...,  0.11444371 -0.15212482     0.21481788]
+    # ..., 
+    # [ 
+    term_topic_weights = np.array([self.model.components_[topic_ind][term_inds]
+                                   for topic_ind in topic_inds]).T
+
+    viz.draw_termite_plot(
+        term_topic_weights, topic_labels, term_labels, save=path2save_results)
+    """
     logprint("")
 
     end = time.time()
@@ -275,9 +300,9 @@ def main(use_raw=False, algorithm="llda"):
     # idee https://en.wikipedia.org/wiki/Noisy_text_analytics
     # idee https://gate.ac.uk/family/
 
-    # todo gescheites tf(-idf) maß finden
-    # todo topics zusammenfassen
-    # frage wieviele tickets pro topic?
+    # todo llda topics zusammenfassen
+    # idee lda so trainieren, dass zuordnung term <-> topic nicht zu schwach wird, aber möglichst viele topics
+    # frage lda wieviele tickets pro topic?
 
     """
     ngrams = 1
@@ -300,25 +325,25 @@ def main(use_raw=False, algorithm="llda"):
     if algorithm == "llda":
         top_topic_words = 5
         add_default_topic = False
-        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdefault" if add_default_topic else "")
+        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdef" if add_default_topic else "")
         jgibbsLLDA(de_corpus, path2save_results=path2save_results, top_topic_words=top_topic_words,
                    add_default_topic=add_default_topic)
 
         top_topic_words = 5
         add_default_topic = True
-        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdefault" if add_default_topic else "")
+        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdef" if add_default_topic else "")
         jgibbsLLDA(de_corpus, path2save_results=path2save_results, top_topic_words=top_topic_words,
                    add_default_topic=add_default_topic)
 
         top_topic_words = 10
         add_default_topic = False
-        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdefault" if add_default_topic else "")
+        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdef" if add_default_topic else "")
         jgibbsLLDA(de_corpus, path2save_results=path2save_results, top_topic_words=top_topic_words,
                    add_default_topic=add_default_topic)
 
         top_topic_words = 10
         add_default_topic = True
-        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdefault" if add_default_topic else "")
+        path2save_results = resultspath +  "_{}_{}.txt".format("top"+str(top_topic_words), "wdef" if add_default_topic else "")
         jgibbsLLDA(de_corpus, path2save_results=path2save_results, top_topic_words=top_topic_words,
                    add_default_topic=add_default_topic)
 
@@ -339,15 +364,70 @@ def main(use_raw=False, algorithm="llda"):
 
         labeldict = {k: v for v, k in enumerate(labelist)}
 
-        n_topics = 15
 
 
         textacyTopicModeling(ngrams = 1,
                              min_df = 1,
-                             max_df = 0.8,
+                             max_df = 0.9,
                              topicModel = algorithm,
-                             n_topics =n_topics,
+                             n_topics =15,
                              corpus=de_corpus)
+
+        textacyTopicModeling(ngrams=1,
+                             min_df=1,
+                             max_df=0.9,
+                             topicModel=algorithm,
+                             n_topics=20,
+                             corpus=de_corpus)
+
+        textacyTopicModeling(ngrams=1,
+                             min_df=1,
+                             max_df=0.9,
+                             topicModel=algorithm,
+                             n_topics=25,
+                             corpus=de_corpus)
+
+
+        textacyTopicModeling(ngrams=1,
+                             min_df=1,
+                             max_df=0.9,
+                             topicModel=algorithm,
+                             n_topics=30,
+                             corpus=de_corpus)
+
+
+
+        textacyTopicModeling(ngrams=(1, 2),
+                             min_df=1,
+                             max_df=0.9,
+                             topicModel=algorithm,
+                             n_topics=15,
+                             corpus=de_corpus)
+
+        textacyTopicModeling(ngrams = (1,2),
+                             min_df = 1,
+                             max_df = 0.9,
+                             topicModel = algorithm,
+                             n_topics =20,
+                             corpus=de_corpus)
+
+        textacyTopicModeling(ngrams = (1,2),
+                             min_df = 1,
+                             max_df = 0.9,
+                             topicModel = algorithm,
+                             n_topics =25,
+                             corpus=de_corpus)
+
+
+        textacyTopicModeling(ngrams = (1,2),
+                             min_df = 1,
+                             max_df = 0.9,
+                             topicModel = algorithm,
+                             n_topics =30,
+                             corpus=de_corpus)
+
+
+
         """
         textacyTopicModeling(ngrams = (1,2),
                              min_df = 1,