Basically, I have no idea why I’m getting this error.
Just to have more than an image, here is a similar message in code format. As it is more recent, the answer of this thread has already been mentioned in the message:
JavaScript
x
86
86
1
Preprocessing raw texts
2
3
---------------------------------------------------------------------------
4
5
LookupError Traceback (most recent call last)
6
7
<ipython-input-38-263240bbee7e> in <module>()
8
----> 1 main()
9
10
7 frames
11
12
<ipython-input-32-62fa346501e8> in main()
13
32 data = data.fillna('') # only the comments has NaN's
14
33 rws = data.abstract
15
---> 34 sentences, token_lists, idx_in = preprocess(rws, samp_size=samp_size)
16
35 # Define the topic model object
17
36 #tm = Topic_Model(k = 10), method = TFIDF)
18
19
<ipython-input-31-f75213289788> in preprocess(docs, samp_size)
20
25 for i, idx in enumerate(samp):
21
26 sentence = preprocess_sent(docs[idx])
22
---> 27 token_list = preprocess_word(sentence)
23
28 if token_list:
24
29 idx_in.append(idx)
25
26
<ipython-input-29-eddacbfa6443> in preprocess_word(s)
27
179 if not s:
28
180 return None
29
--> 181 w_list = word_tokenize(s)
30
182 w_list = f_punct(w_list)
31
183 w_list = f_noun(w_list)
32
33
/usr/local/lib/python3.7/dist-packages/nltk/tokenize/__init__.py in word_tokenize(text, language, preserve_line)
34
126 :type preserver_line: bool
35
127 """
36
--> 128 sentences = [text] if preserve_line else sent_tokenize(text, language)
37
129 return [token for sent in sentences
38
130 for token in _treebank_word_tokenizer.tokenize(sent)]
39
40
/usr/local/lib/python3.7/dist-packages/nltk/tokenize/__init__.py in sent_tokenize(text, language)
41
92 :param language: the model name in the Punkt corpus
42
93 """
43
---> 94 tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
44
95 return tokenizer.tokenize(text)
45
96
46
47
/usr/local/lib/python3.7/dist-packages/nltk/data.py in load(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)
48
832
49
833 # Load the resource.
50
--> 834 opened_resource = _open(resource_url)
51
835
52
836 if format == 'raw':
53
54
/usr/local/lib/python3.7/dist-packages/nltk/data.py in _open(resource_url)
55
950
56
951 if protocol is None or protocol.lower() == 'nltk':
57
--> 952 return find(path_, path + ['']).open()
58
953 elif protocol.lower() == 'file':
59
954 # urllib might not use mode='rb', so handle this one ourselves:
60
61
/usr/local/lib/python3.7/dist-packages/nltk/data.py in find(resource_name, paths)
62
671 sep = '*' * 70
63
672 resource_not_found = 'n%sn%sn%sn' % (sep, msg, sep)
64
--> 673 raise LookupError(resource_not_found)
65
674
66
675
67
68
LookupError:
69
**********************************************************************
70
Resource punkt not found.
71
Please use the NLTK Downloader to obtain the resource:
72
73
>>> import nltk
74
>>> nltk.download('punkt')
75
76
Searched in:
77
- '/root/nltk_data'
78
- '/usr/share/nltk_data'
79
- '/usr/local/share/nltk_data'
80
- '/usr/lib/nltk_data'
81
- '/usr/local/lib/nltk_data'
82
- '/usr/nltk_data'
83
- '/usr/lib/nltk_data'
84
- ''
85
**********************************************************************
86
Advertisement
Answer
Perform the following:
JavaScript
1
3
1
>>> import nltk
2
>>> nltk.download()
3
Then when you receive a window popup, select punkt
under the identifier
column which is locatedin the Module
tab.