topicModelingTickets/main.py

# -*- coding: utf-8 -*-
import matplotlib
matplotlib.use('Agg')
import time
import init

import corporization
import preprocessing
import topicModeling
import cleaning

from miscellaneous import *

# ssh madonna "nohup /usr/bin/python3 -u /home/jannis.grundmann/PycharmProjects/topicModelingTickets/main.py &> /home/jannis.grundmann/PycharmProjects/topicModelingTickets/log/printout_main.log &"
start = time.time()

# idee http://bigartm.org/
# idee http://wiki.languagetool.org/tips-and-tricks
# idee https://en.wikipedia.org/wiki/Noisy_text_analytics
# idee https://gate.ac.uk/family/


# todo llda topics zusammenfassen
# idee lda so trainieren, dass zuordnung term <-> topic nicht zu schwach wird, aber möglichst viele topics
# frage wieviele tickets pro topic?
# todo modelle testen
# frage welche mitarbeiter bearbeiteten welche Topics? idee topics mit mitarbeiternummern erstzen
# frage wenn 155 versch. kb-einträge benutzt wurden, wieso gibt es nur 139 topics?
# idee word vorher mit semantischen netz abgleichen: wenn zu weit entfernt, dann ignore

#todo FREITAG zeichnen, refactoring


"""

init.main()
logprint("")

corporization.main()
logprint("")

cleaning.main()
logprint("")

preprocessing.main()   # ~5h
logprint("")


"""


#topicModeling.main(algorithm="lsa")
logprint("")


#topicModeling.main(algorithm="nmf")
logprint("")


#topicModeling.main(algorithm="lda")
logprint("")


topicModeling.main(algorithm="llda")
logprint("")


end = time.time()
logprint("Total Time Elapsed: {0} min".format((end - start) / 60))


#800*400
aufgeräumt 2017-10-16 14:01:38 +02:00			`# -- coding: utf-8 --`
termiteplot für lda 2017-10-30 12:56:52 +01:00			`import matplotlib`
			`matplotlib.use('Agg')`
weiter aufgeräumt 2017-10-17 10:13:49 +02:00			`import time`
commit vor refactoring 2017-11-03 11:49:26 +01:00			`import init`

aufgeräumt 2017-10-16 14:01:38 +02:00			`import corporization`
			`import preprocessing`
preprocessing abgeschlossen 2017-10-18 17:37:20 +02:00			`import topicModeling`
topicmodeling jgibbsllda lauffähig 2017-10-25 09:46:44 +02:00			`import cleaning`
aufgeräumt 2017-10-16 14:01:38 +02:00
commit vor refactoring 2017-11-03 11:49:26 +01:00			`from miscellaneous import *`
aufgeräumt 2017-10-16 14:01:38 +02:00
lauffähige version 2017-11-06 12:54:59 +01:00			`# ssh madonna "nohup /usr/bin/python3 -u /home/jannis.grundmann/PycharmProjects/topicModelingTickets/main.py &> /home/jannis.grundmann/PycharmProjects/topicModelingTickets/log/printout_main.log &"`
weiter aufgeräumt 2017-10-17 10:13:49 +02:00			`start = time.time()`
aufgeräumt 2017-10-16 14:01:38 +02:00
refactored 2017-11-17 11:46:57 +01:00			`# idee http://bigartm.org/`
			`# idee http://wiki.languagetool.org/tips-and-tricks`
			`# idee https://en.wikipedia.org/wiki/Noisy_text_analytics`
			`# idee https://gate.ac.uk/family/`
commit vor refactoring 2017-11-03 11:49:26 +01:00

refactored 2017-11-17 11:46:57 +01:00
			`# todo llda topics zusammenfassen`
			`# idee lda so trainieren, dass zuordnung term <-> topic nicht zu schwach wird, aber möglichst viele topics`
			`# frage wieviele tickets pro topic?`
			`# todo modelle testen`
			`# frage welche mitarbeiter bearbeiteten welche Topics? idee topics mit mitarbeiternummern erstzen`
			`# frage wenn 155 versch. kb-einträge benutzt wurden, wieso gibt es nur 139 topics?`
			`# idee word vorher mit semantischen netz abgleichen: wenn zu weit entfernt, dann ignore`

			`#todo FREITAG zeichnen, refactoring`



			`"""`

lauffähige version 2017-11-06 12:54:59 +01:00			`init.main()`
topicmodeling jgibbsllda lauffähig 2017-10-25 09:46:44 +02:00			`logprint("")`
aufgeräumt 2017-10-16 14:01:38 +02:00
			`corporization.main()`
topicmodeling jgibbsllda lauffähig 2017-10-25 09:46:44 +02:00			`logprint("")`

			`cleaning.main()`
			`logprint("")`
aufgeräumt 2017-10-16 14:01:38 +02:00
termiteplot für lda 2017-10-30 12:56:52 +01:00			`preprocessing.main() # ~5h`
topicmodeling jgibbsllda lauffähig 2017-10-25 09:46:44 +02:00			`logprint("")`
aufgeräumt 2017-10-16 14:01:38 +02:00
commit vor refactoring 2017-11-03 11:49:26 +01:00
			`"""`
refactored 2017-11-17 11:46:57 +01:00

			`#topicModeling.main(algorithm="lsa")`
topicmodeling jgibbsllda lauffähig 2017-10-25 09:46:44 +02:00			`logprint("")`
termiteplot für lda 2017-10-30 12:56:52 +01:00

refactored 2017-11-17 11:46:57 +01:00			`#topicModeling.main(algorithm="nmf")`
termiteplot für lda 2017-10-30 12:56:52 +01:00			`logprint("")`

commit vor refactoring 2017-11-03 11:49:26 +01:00
refactored 2017-11-17 11:46:57 +01:00			`#topicModeling.main(algorithm="lda")`
commit vor refactoring 2017-11-03 11:49:26 +01:00			`logprint("")`


refactored 2017-11-17 11:46:57 +01:00			`topicModeling.main(algorithm="llda")`
commit vor refactoring 2017-11-03 11:49:26 +01:00			`logprint("")`


weiter aufgeräumt 2017-10-17 10:13:49 +02:00			`end = time.time()`
topicmodeling jgibbsllda lauffähig 2017-10-25 09:46:44 +02:00			`logprint("Total Time Elapsed: {0} min".format((end - start) / 60))`
preprocessing abgeschlossen 2017-10-18 17:37:20 +02:00

refactored 2017-11-17 11:46:57 +01:00			`#800*400`