topicModelingTickets/test.py

# -*- coding: utf-8 -*-
import re

import spacy
import textacy

DATAPATH_thesaurus = "openthesaurus.csv"


# read .csv
thesaurus = textacy.fileio.read_csv(DATAPATH_thesaurus, delimiter=";")  # generator [[a,b,c,..],[a,b,c,..],...]

wort = "(anmachen)"
if not re.match(r'\([^)]+\)', wort):
    print(wort)

#if "Pass" in wort:  # "Pass" muss irgendwo drin sein
#    print(wort.lower())


#if "Passwort" in wort.split(" "):   # Pass muss gleich einem Wort sein
#    print(wort.lower())


def getFirstSynonym(word, thesaurus_gen):

    word = word.lower()
    #TODO word cleaning https://stackoverflow.com/questions/3939361/remove-specific-characters-from-a-string-in-python


    # durch den thesaurrus iterieren
    for syn_block in thesaurus_gen: # syn_block ist eine liste mit Synonymen

        # durch den synonymblock iterieren
        for syn in syn_block:
            syn = syn.lower().split(" ")    # aus synonym mach liste (um evtl. sätze zu identifieziren)

            # falls das wort in dem synonym enthalten ist (also == einem Wort in der liste ist)
            if word in syn:

                # Hauptform suchen
                if "auptform" in syn:
                    #nicht ausgeben, falls es in Klammern steht
                    for w in syn:
                        if not re.match(r'\([^)]+\)',w):
                            return w


                # falls keine hauptform enthalten ist, das erste Synonym zurückgeben, was kein satz ist und nicht in klammern steht
                if len(syn) == 1:
                    w = syn[0]
                    if not re.match(r'\([^)]+\)', w):
                        return w


                return word #zur Not die eingabe ausgeben


print(getFirstSynonym(wort,thesaurus))
xml2Corpus openthesaurus eingebunden 2017-08-29 15:01:17 +02:00			`# -- coding: utf-8 --`
			`import re`

			`import spacy`
			`import textacy`

			`DATAPATH_thesaurus = "openthesaurus.csv"`


			`# read .csv`
			`thesaurus = textacy.fileio.read_csv(DATAPATH_thesaurus, delimiter=";") # generator [[a,b,c,..],[a,b,c,..],...]`

			`wort = "(anmachen)"`
			`if not re.match(r'\([^)]+\)', wort):`
			`print(wort)`

			`#if "Pass" in wort: # "Pass" muss irgendwo drin sein`
			`# print(wort.lower())`


			`#if "Passwort" in wort.split(" "): # Pass muss gleich einem Wort sein`
			`# print(wort.lower())`


			`def getFirstSynonym(word, thesaurus_gen):`

			`word = word.lower()`
			`#TODO word cleaning https://stackoverflow.com/questions/3939361/remove-specific-characters-from-a-string-in-python`


			`# durch den thesaurrus iterieren`
			`for syn_block in thesaurus_gen: # syn_block ist eine liste mit Synonymen`

			`# durch den synonymblock iterieren`
			`for syn in syn_block:`
			`syn = syn.lower().split(" ") # aus synonym mach liste (um evtl. sätze zu identifieziren)`

			`# falls das wort in dem synonym enthalten ist (also == einem Wort in der liste ist)`
			`if word in syn:`

			`# Hauptform suchen`
			`if "auptform" in syn:`
			`#nicht ausgeben, falls es in Klammern steht`
			`for w in syn:`
			`if not re.match(r'\([^)]+\)',w):`
			`return w`


			`# falls keine hauptform enthalten ist, das erste Synonym zurückgeben, was kein satz ist und nicht in klammern steht`
			`if len(syn) == 1:`
			`w = syn[0]`
			`if not re.match(r'\([^)]+\)', w):`
			`return w`



			`return word #zur Not die eingabe ausgeben`



			`print(getFirstSynonym(wort,thesaurus))`