topicModelingTickets/config.ini

93 lines
1.3 KiB
INI
Raw Normal View History

2017-10-16 14:01:38 +02:00
[thesaurus]
2017-10-17 10:13:49 +02:00
input = deWordNet.xml
pickle_file = thesaurus_dict.pkl
2017-10-16 14:01:38 +02:00
[spellchecking]
2017-10-17 10:13:49 +02:00
input = deu_news_2015_1M-sentences.txt
pickle_file = words_dict.pkl
2017-10-16 14:01:38 +02:00
[lemmatization]
2017-10-17 10:13:49 +02:00
input = lemmas.txt
pickle_file = lemma_dict.pkl
2017-10-16 14:01:38 +02:00
[nouns]
2017-10-17 10:13:49 +02:00
input1 = nomen.txt
input2 = nomen2.txt
pickle_file = nouns_list.pkl
2017-10-16 14:01:38 +02:00
[firstnames]
2017-10-17 10:13:49 +02:00
input = firstnames.txt
pickle_file = firstnames_list.pkl
2017-10-16 14:01:38 +02:00
[de_stopwords]
2017-10-17 10:13:49 +02:00
input1 = de_stopwords_1.txt
input2 = de_stopwords_2.txt
input3 = de_stopwords_3.txt
pickle_file = stopwords_list.pkl
2017-10-16 14:01:38 +02:00
[logging]
level = INFO
2017-10-17 10:13:49 +02:00
filename = topicModelTickets.log
2017-10-16 14:01:38 +02:00
[de_corpus]
2017-10-17 10:13:49 +02:00
#input = M42-Export/Tickets_med.csv
#input = M42-Export/Tickets_small.csv
#input = M42-Export/Tickets_mini.csv
input = M42-Export/de_tickets.csv
2017-10-16 14:01:38 +02:00
2017-10-17 10:13:49 +02:00
path = corpi/
2017-10-16 14:01:38 +02:00
[en_corpus]
2017-10-17 10:13:49 +02:00
input = M42-Export/en_tickets.csv
2017-10-16 14:01:38 +02:00
2017-10-17 10:13:49 +02:00
path = corpi/
2017-10-16 14:01:38 +02:00
[tickets]
content_collumn_name = Description
metaliste = TicketNumber,Subject,CreatedDate,categoryName,Impact,Urgency,BenutzerID,VerantwortlicherID,EigentuemerID,Solution
[preprocessing]
2017-09-11 13:24:20 +02:00
ents2keep = WORK_OF_ART,ORG,PRODUCT,LOC
custom_words = grüßen,fragen,damen,probleme,herren,dank
#lemmatize = True
2017-09-11 13:24:20 +02:00
[topic modeling]
ngrams = (1,2)
min_df = 0
max_df = 1.0
no_below = 20
no_above = 0.5
topicModel = lda
top_topic_words = 5
top_document_labels_per_topic = 2