topicModelingTickets/config.ini

40 lines
370 B
INI
Raw Normal View History

[default]
thesauruspath = openthesaurus.csv
path2xml = ticketSamples.xml
language = de
[preprocessing]
2017-09-11 13:24:20 +02:00
ents2keep = WORK_OF_ART,ORG,PRODUCT,LOC
custom_words = grüßen,fragen
#lemmatize = True
2017-09-11 13:24:20 +02:00
[topic modeling]
ngrams = (1,2)
min_df = 0
max_df = 1.0
no_below = 20
no_above = 0.5
topicModel = lda
top_topic_words = 5
top_document_labels_per_topic = 2