topicModelingTickets/config.ini

52 lines
910 B
INI

[filepath]
thesauruspath = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/openthesaurus.csv
path2xml = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/ticketSamples.xml
path2csv = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/Tickets_2017-09-13.csv
small = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/Tickets_small.csv
logfile = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/topicModelTickets.log
lemmas = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/lemmatization-de.txt
language = de
[preprocessing]
ents2keep = WORK_OF_ART,ORG,PRODUCT,LOC
custom_words = grüßen,fragen,damen,probleme,herren,dank
#lemmatize = True
[topic modeling]
ngrams = (1,2)
min_df = 0
max_df = 1.0
no_below = 20
no_above = 0.5
topicModel = lda
top_topic_words = 5
top_document_labels_per_topic = 2