topicModelingTickets/main.py

76 lines
1.5 KiB
Python
Raw Normal View History

2017-10-16 14:01:38 +02:00
# -*- coding: utf-8 -*-
2017-10-30 12:56:52 +01:00
import matplotlib
matplotlib.use('Agg')
2017-10-17 10:13:49 +02:00
import time
2017-11-03 11:49:26 +01:00
import init
2017-11-27 12:49:05 +01:00
from datetime import datetime
2017-10-16 14:01:38 +02:00
import corporization
import preprocessing
2017-10-18 17:37:20 +02:00
import topicModeling
2017-10-25 09:46:44 +02:00
import cleaning
2017-10-16 14:01:38 +02:00
2017-11-03 11:49:26 +01:00
from miscellaneous import *
2017-10-16 14:01:38 +02:00
2017-11-06 12:54:59 +01:00
# ssh madonna "nohup /usr/bin/python3 -u /home/jannis.grundmann/PycharmProjects/topicModelingTickets/main.py &> /home/jannis.grundmann/PycharmProjects/topicModelingTickets/log/printout_main.log &"
2017-10-17 10:13:49 +02:00
start = time.time()
2017-10-16 14:01:38 +02:00
2017-11-17 11:46:57 +01:00
# idee http://bigartm.org/
# idee http://wiki.languagetool.org/tips-and-tricks
# idee https://en.wikipedia.org/wiki/Noisy_text_analytics
# idee https://gate.ac.uk/family/
2017-11-03 11:49:26 +01:00
2017-11-17 11:46:57 +01:00
2017-11-27 12:49:05 +01:00
# idee häufige n-gramme raus (zB damen und herren)
# idee llda topics zusammenfassen
2017-11-17 11:46:57 +01:00
# idee lda so trainieren, dass zuordnung term <-> topic nicht zu schwach wird, aber möglichst viele topics
# frage welche mitarbeiter bearbeiteten welche Topics? idee topics mit mitarbeiternummern erstzen
# idee word vorher mit semantischen netz abgleichen: wenn zu weit entfernt, dann ignore
# todo modelle testen
2017-11-17 11:46:57 +01:00
2017-11-27 12:49:05 +01:00
logprint("main.py started at {}".format(datetime.now()))
2017-11-17 11:46:57 +01:00
2017-11-27 12:49:05 +01:00
"""
2017-11-06 12:54:59 +01:00
init.main()
2017-10-25 09:46:44 +02:00
logprint("")
2017-10-16 14:01:38 +02:00
corporization.main()
2017-10-25 09:46:44 +02:00
logprint("")
cleaning.main()
logprint("")
2017-10-16 14:01:38 +02:00
2017-11-27 12:49:05 +01:00
preprocessing.main()
2017-10-25 09:46:44 +02:00
logprint("")
2017-11-27 12:49:05 +01:00
"""
2017-10-16 14:01:38 +02:00
2017-11-03 11:49:26 +01:00
2017-11-17 11:46:57 +01:00
#topicModeling.main(algorithm="lsa")
2017-10-25 09:46:44 +02:00
logprint("")
2017-10-30 12:56:52 +01:00
2017-11-17 11:46:57 +01:00
#topicModeling.main(algorithm="nmf")
2017-10-30 12:56:52 +01:00
logprint("")
2017-11-03 11:49:26 +01:00
2017-11-27 12:49:05 +01:00
#topicModeling.main(algorithm="llda")
2017-11-03 11:49:26 +01:00
logprint("")
2017-11-27 12:49:05 +01:00
topicModeling.main(algorithm="lda")
2017-11-03 11:49:26 +01:00
logprint("")
2017-11-27 12:49:05 +01:00
2017-10-17 10:13:49 +02:00
end = time.time()
2017-11-27 12:49:05 +01:00
logprint("main.py finished at {}".format(datetime.now()))
2017-10-25 09:46:44 +02:00
logprint("Total Time Elapsed: {0} min".format((end - start) / 60))
2017-10-18 17:37:20 +02:00