52 lines
910 B
INI
52 lines
910 B
INI
[filepath]
|
|
|
|
thesauruspath = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/openthesaurus.csv
|
|
|
|
path2xml = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/ticketSamples.xml
|
|
|
|
path2csv = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/Tickets_2017-09-13.csv
|
|
|
|
small = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/Tickets_small.csv
|
|
|
|
|
|
logfile = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/topicModelTickets.log
|
|
|
|
lemmas = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/lemmatization-de.txt
|
|
|
|
|
|
|
|
language = de
|
|
|
|
[preprocessing]
|
|
|
|
ents2keep = WORK_OF_ART,ORG,PRODUCT,LOC
|
|
|
|
custom_words = grüßen,fragen,damen,probleme,herren,dank
|
|
|
|
#lemmatize = True
|
|
|
|
|
|
[topic modeling]
|
|
|
|
ngrams = (1,2)
|
|
|
|
min_df = 0
|
|
max_df = 1.0
|
|
no_below = 20
|
|
no_above = 0.5
|
|
|
|
topicModel = lda
|
|
|
|
top_topic_words = 5
|
|
|
|
top_document_labels_per_topic = 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|