I have the following code:
JavaScript
x
74
74
1
import re
2
3
def get_filename():
4
"""gets the file"""
5
filename = input("Please enter filename: ")
6
return filename
7
8
def get_words_from_file(filename):
9
"""getting the data and printing it word by word"""
10
infile = open(filename, 'r', encoding='utf-8')
11
outfile = infile.read().splitlines()
12
words = []
13
reading = False
14
for let in outfile:
15
if let.startswith("*** START OF")and reading == False:
16
reading = True
17
elif let.startswith("*** END OF SYNTHETIC TEST CASE ***") or let.startswith("*** END"):
18
return words
19
elif reading:
20
let = let.lower()
21
words.extend(re.findall("[a-z]+[-'][a-z]+|[a-z]+[']?|[a-z]+", let))
22
return words
23
24
def calculate(words):
25
"""gjhwjghwg2"""
26
all_times = []
27
max_word_length = 0
28
number_of_words = len(words)
29
average = sum(len(word) for word in words) / number_of_words
30
for word in words:
31
if len(word)>max_word_length:
32
max_word_length=len(word)
33
frequency = {word: 0 for word in words}
34
for word in words:
35
frequency[word] += 1
36
max_frequency = max(frequency.values())
37
38
result = (number_of_words, average, max_word_length, max_frequency)
39
return result
40
41
def get_frequency(words):
42
"""ghjhgwejhgwjgw"""
43
len_count = []
44
frequency_1 = {word: 0 for word in words}
45
for word in words:
46
frequency_1[word] += 1
47
answer = (frequency_1, len_count)
48
return answer
49
50
def print_results(stats_tuple, lengthy):
51
"""calculate the goods"""
52
(frequency_1, len_count) = lengthy
53
(number_of_words, average, max_word_length, max_frequency) = stats_tuple
54
print("")
55
print("Word summary (all words):")
56
print(" Number of words = {0}".format(number_of_words))
57
print(" Average word length = {:.2f}".format(average))
58
print(" Maximum word length = {0}".format(max_word_length))
59
print(" Maximum frequency = {0}".format(max_frequency))
60
print("")
61
print(" Len Freq")
62
for word_len in range(1, max(len_count) + 1):
63
print(f'{frequency_1}t{len_count.get(frequency_1, 0)}')
64
65
66
def main():
67
"""ghkghwgjkwhgw"""
68
filename = get_filename()
69
data = get_words_from_file(filename)
70
stats = calculate(data)
71
lengthy = get_frequency(data)
72
print_results(stats, lengthy)
73
main()
74
Without importing anything else, how would I make a table which prints the length and then the frequency.
For example: In a file with the text of “a blah ba ba” it would print:
JavaScript
1
6
1
Len Freq
2
1 1
3
2 2
4
3 0
5
4 1
6
What confuses me about this is how to add all the length of the words together, should I be making a new list with all the same length of words and then counting the length of that list, or is there a better way to do it.
Advertisement
Answer
JavaScript
1
10
10
1
len_count = {}
2
with open(filename, "r") as file:
3
for line in file:
4
for word in line.split():
5
word_len = len(word)
6
if not word_len in len_count:
7
len_count[word_len] = 1
8
else:
9
len_count[word_len] += 1
10
Then you can print the two columns:
JavaScript
1
4
1
print("LentFreq")
2
for word_len in range(1, max(len_count) + 1):
3
print(f'{word_len}t{len_count.get(word_len, 0)}')
4
EDIT: To handle empty files:
JavaScript
1
4
1
max_len_count = 0 if not len_count else max(len_count)
2
for word_len in range(1, max_len_count + 1):
3
print(f'{word_len}t{len_count.get(word_len, 0)}')
4