I have the following code:
import re def get_filename(): """gets the file""" filename = input("Please enter filename: ") return filename def get_words_from_file(filename): """getting the data and printing it word by word""" infile = open(filename, 'r', encoding='utf-8') outfile = infile.read().splitlines() words = [] reading = False for let in outfile: if let.startswith("*** START OF")and reading == False: reading = True elif let.startswith("*** END OF SYNTHETIC TEST CASE ***") or let.startswith("*** END"): return words elif reading: let = let.lower() words.extend(re.findall("[a-z]+[-'][a-z]+|[a-z]+[']?|[a-z]+", let)) return words def calculate(words): """gjhwjghwg2""" all_times = [] max_word_length = 0 number_of_words = len(words) average = sum(len(word) for word in words) / number_of_words for word in words: if len(word)>max_word_length: max_word_length=len(word) frequency = {word: 0 for word in words} for word in words: frequency[word] += 1 max_frequency = max(frequency.values()) result = (number_of_words, average, max_word_length, max_frequency) return result def get_frequency(words): """ghjhgwejhgwjgw""" len_count = [] frequency_1 = {word: 0 for word in words} for word in words: frequency_1[word] += 1 answer = (frequency_1, len_count) return answer def print_results(stats_tuple, lengthy): """calculate the goods""" (frequency_1, len_count) = lengthy (number_of_words, average, max_word_length, max_frequency) = stats_tuple print("") print("Word summary (all words):") print(" Number of words = {0}".format(number_of_words)) print(" Average word length = {:.2f}".format(average)) print(" Maximum word length = {0}".format(max_word_length)) print(" Maximum frequency = {0}".format(max_frequency)) print("") print(" Len Freq") for word_len in range(1, max(len_count) + 1): print(f'{frequency_1}t{len_count.get(frequency_1, 0)}') def main(): """ghkghwgjkwhgw""" filename = get_filename() data = get_words_from_file(filename) stats = calculate(data) lengthy = get_frequency(data) print_results(stats, lengthy) main()
Without importing anything else, how would I make a table which prints the length and then the frequency.
For example: In a file with the text of “a blah ba ba” it would print:
Len Freq 1 1 2 2 3 0 4 1
What confuses me about this is how to add all the length of the words together, should I be making a new list with all the same length of words and then counting the length of that list, or is there a better way to do it.
Advertisement
Answer
len_count = {} with open(filename, "r") as file: for line in file: for word in line.split(): word_len = len(word) if not word_len in len_count: len_count[word_len] = 1 else: len_count[word_len] += 1
Then you can print the two columns:
print("LentFreq") for word_len in range(1, max(len_count) + 1): print(f'{word_len}t{len_count.get(word_len, 0)}')
EDIT: To handle empty files:
max_len_count = 0 if not len_count else max(len_count) for word_len in range(1, max_len_count + 1): print(f'{word_len}t{len_count.get(word_len, 0)}')