Stopword NLTK Python Indonesia

  Umum

Cara otomatis

tokens = nltk.tokenize.word_tokenize(kalimat)

listStopword = set(stopwords.words('indonesian'))

removed = []
for t in tokens:
    if t not in listStopword:
        removed.append(t)

#print(removed)

Cara Manual txt

katahapus = open("/home/mfahri/Documents/GitHub/OpenData/buangkata.txt")
kt = katahapus.read()
kataenter = kt.split()

#print(kataenter)
removed2 = []
for t in removed:
    if t not in kataenter:
        removed2.append(t)

print(removed2)

LEAVE A COMMENT