diff --git a/config.ini b/config.ini
index 5612339..a0fd830 100644
--- a/config.ini
+++ b/config.ini
@@ -7,14 +7,27 @@ language = de
 
 [preprocessing]
 
-ents = WORK_OF_ART,ORG,PRODUCT,LOC
+ents2keep = WORK_OF_ART,ORG,PRODUCT,LOC
 
 custom_words = grüßen,fragen
 
 #lemmatize = True
 
-default_return_first_Syn = False
-
+
+[topic modeling]
+
+ngrams = (1,2)
+
+min_df = 0
+max_df = 1.0
+no_below = 20
+no_above = 0.5
+
+topicModel = lda
+
+top_topic_words = 5
+
+top_document_labels_per_topic = 2
 
 
 
diff --git a/preprocessing.py b/preprocessing.py
index 61c1709..7dda81c 100644
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -6,7 +6,7 @@ import re
 import subprocess
 import time
 import xml.etree.ElementTree as ET
-
+import sys
 import spacy
 import textacy
 from scipy import *
@@ -113,9 +113,12 @@ def keepOnlyENT(ent_list,parser=PARSER):
 def removeAllENT(ent_list, parser=PARSER):
     return lambda doc:  parser(" ".join([tok.text for tok in doc if tok.ent_type_ not in ent_list]))
 
+def keepUniqueTokens(parser=PARSER):
+    return  lambda doc: parser(" ".join(set([tok.text for tok in doc])))
 
+def lemmatize(parser=PARSER):
+    return lambda doc:  parser(" ".join([tok.lemma_ for tok in doc]))
 
-doc2Set = lambda doc: str(set([tok.text for tok in doc]))
 doc2String = lambda doc : doc.text
 
 
@@ -215,21 +218,24 @@ def generate_labled_lines(textacyCorpus):
 
 
 
-ents = config.get("preprocessing","ents").split(",")
+ents = config.get("preprocessing","ents2keep").split(",")
 
-clean_in_content=compose(
+
+clean_in_content=compose(   #anmrk.: unterste-funktion iwrd zuerst ausgeführt
 
     doc2String,
-    #normalizeSynonyms(default_return_first_Syn=config.get("preprocessing","default_return_first_Syn")),
+    keepUniqueTokens(),
+    #normalizeSynonyms(default_return_first_Syn=False),
+    lemmatize(),
     replaceEmails(),
     replaceURLs(),
     replaceTwitterMentions(),
-    removeWords(stop_words),
-    #removeAllPOS(["SPACE","PUNCT"]),
     #removeAllENT(ents),
     keepOnlyPOS(['NOUN'])
 )
 
+
+
 clean_in_meta = {
     "Loesung":removeAllPOS(["SPACE"]),
     "Zusammenfassung":removeAllPOS(["SPACE","PUNCT"])
@@ -238,9 +244,6 @@ clean_in_meta = {
 
 
 
-
-
-
 ## add files to textacy-corpus,
 print("add texts to textacy-corpus...")
 corpus.add_texts(
@@ -250,9 +253,18 @@ corpus.add_texts(
 
 printRandomDoc(corpus)
 
+#idee 3 versch. Corpi
 
 
-####################'####################'      Variablen todo alles in config
+
+####################'####################'
+
+
+
+
+
+
+####################'####################'    todo alles in config
 
 ngrams = (1,2)
 
@@ -268,7 +280,7 @@ weighting = ('tf' if topicModel == 'lda' else 'tfidf')
 top_topic_words = 5
 top_document_labels_per_topic = 2
 
-n_topics = 4
+n_topics = len(set(corpus[0].metadata.keys()))+1 #+1 wegen einem default-topic