95 lines
1.6 KiB
INI
95 lines
1.6 KiB
INI
[thesaurus]
|
|
input=deWordNet.xml
|
|
pickle_file=thesaurus_dict.pkl
|
|
|
|
|
|
[spellchecking]
|
|
input=deu_news_2015_1M-sentences.txt
|
|
pickle_file=words_dict.pkl
|
|
|
|
|
|
[lemmatization]
|
|
input=lemmas.txt
|
|
pickle_file=lemma_dict.pkl
|
|
|
|
|
|
[nouns]
|
|
input1=nomen.txt
|
|
input2=nomen2.txt
|
|
pickle_file=nouns_list.pkl
|
|
|
|
|
|
[firstnames]
|
|
input=firstnames.txt
|
|
pickle_file=firstnames_list.pkl
|
|
|
|
|
|
[de_stopwords]
|
|
input1=de_stopwords_1.txt
|
|
input2=de_stopwords_2.txt
|
|
input3=de_stopwords_3.txt
|
|
pickle_file=de_stopwords_list.pkl
|
|
|
|
[en_stopwords]
|
|
|
|
pickle_file=en_stopwords_list.pkl
|
|
|
|
|
|
[logging]
|
|
level=INFO
|
|
filename=topicModelTickets.log
|
|
|
|
|
|
[de_corpus]
|
|
#input=M42-Export/Tickets_small.csv
|
|
input=M42-Export/de_tickets.csv
|
|
|
|
path=corpi/
|
|
|
|
[en_corpus]
|
|
input=M42-Export/en_tickets.csv
|
|
|
|
path=corpi/
|
|
|
|
|
|
|
|
[tickets]
|
|
content_collumn_name=Description
|
|
metaliste=TicketNumber,Subject,CreatedDate,categoryName,Impact,Urgency,BenutzerID,VerantwortlicherID,EigentuemerID,Solution
|
|
|
|
|
|
|
|
|
|
[preprocessing]
|
|
|
|
#ents2keep=WORK_OF_ART,ORG,PRODUCT,LOC
|
|
|
|
custom_words=geehrt,dr,not,frage,betreff,gerne,dame,herr,frau,hilfe,moeglichkeit,beste,freuen,voraus,problem,lauten,bedanken,voraus,hallo,gerne,freundlich,fragen,fehler,bitten,ehre,lieb,liebe,gruesse,helfen,versuchen,unbestimmt,woche,tadelos,klappen,mittlerweile,bekommen,erreichbar,gruss,auffahren,vorgang,hinweis,name,gruss,id,erfolg,folge,team,absender,versenden,vorname,strasse,prozess,portal,moeglichkeit,fremd,wende,rueckfrage,stehen,verfuegung,funktionieren,pruefen,hoffen,ok
|
|
|
|
|
|
|
|
|
|
[topicmodeling]
|
|
|
|
ngrams=(1,2)
|
|
|
|
min_df=0
|
|
max_df=1.0
|
|
no_below=20
|
|
no_above=0.5
|
|
|
|
topicModel=lda
|
|
|
|
top_topic_words=5
|
|
|
|
top_document_labels_per_topic=2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|