98 lines
2.7 KiB
INI
98 lines
2.7 KiB
INI
[thesaurus]
|
|
input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/deWordNet.xml
|
|
pickle_file = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/thesaurus_dict.pkl
|
|
|
|
|
|
[spellchecking]
|
|
input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/deu_news_2015_1M-sentences.txt
|
|
pickle_file = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/words_dict.pkl
|
|
|
|
|
|
[lemmatization]
|
|
input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/lemmas.txt
|
|
pickle_file = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/lemma_dict.pkl
|
|
|
|
|
|
[nouns]
|
|
input1 = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/nomen.txt
|
|
input2 = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/nomen2.txt
|
|
pickle_file = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/nouns_list.pkl
|
|
|
|
|
|
[firstnames]
|
|
input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/firstnames.txt
|
|
pickle_file = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/firstnames_list.pkl
|
|
|
|
|
|
[de_stopwords]
|
|
input1 = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/de_stopwords_1.txt
|
|
input2 = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/de_stopwords_2.txt
|
|
input3 = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/de_stopwords_3.txt
|
|
pickle_file = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/stopwords_list.pkl
|
|
|
|
|
|
[logging]
|
|
level = INFO
|
|
filename = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/topicModelTickets.log
|
|
|
|
|
|
[de_corpus]
|
|
#input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/Tickets_med.csv
|
|
#input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/Tickets_small.csv
|
|
#input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/Tickets_mini.csv
|
|
input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/de_tickets.csv
|
|
|
|
path = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/corpi/
|
|
raw = de_raw_ticket
|
|
pre = de_pre_ticket
|
|
|
|
|
|
[en_corpus]
|
|
input = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/M42-Export/en_tickets.csv
|
|
|
|
path = /home/jannis.grundmann/PycharmProjects/topicModelingTickets/corpi/
|
|
raw = en_raw_ticket
|
|
pre = en_pre_ticket
|
|
|
|
|
|
|
|
[tickets]
|
|
content_collumn_name = Description
|
|
metaliste = TicketNumber,Subject,CreatedDate,categoryName,Impact,Urgency,BenutzerID,VerantwortlicherID,EigentuemerID,Solution
|
|
|
|
|
|
|
|
|
|
[preprocessing]
|
|
|
|
ents2keep = WORK_OF_ART,ORG,PRODUCT,LOC
|
|
|
|
custom_words = grüßen,fragen,damen,probleme,herren,dank
|
|
|
|
#lemmatize = True
|
|
|
|
|
|
[topic modeling]
|
|
|
|
ngrams = (1,2)
|
|
|
|
min_df = 0
|
|
max_df = 1.0
|
|
no_below = 20
|
|
no_above = 0.5
|
|
|
|
topicModel = lda
|
|
|
|
top_topic_words = 5
|
|
|
|
top_document_labels_per_topic = 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|