I want to extract the numbers only from before the “am” and “pm”. If “pm” not avalilable means then only from before “am”.
para = ["who done this on 23 october to 26th october from 2 am to 10am"]
def time_ext(para):
    vals = []
    samp_str = ''
    for i in t:
        if i.isnumeric() == True:
            samp_str = samp_str+i
        else:
            if samp_str == '':
                pass
            else:
                vals.append(samp_str)
                samp_str = ''
    if len(vals) > 0:
        vals = [int(i) for i in vals]
    else:
        pass
    return vals
print(time_ext(para))
# my output is = [23, 2]
# Expecting output is = [2, 10]
Advertisement
Answer
para = ["who done this on 23 october to 26th october from 2 am to 10am"]
def time_ext(t):
    vals = []
    for line in t:
        words = line.split()
        for i in range(len(words)):
            word = words[i]
            found = False
            if word.endswith("am") or word.endswith("pm"):
                word = word[:-2]
                found = True
            elif ((i + 1) < len(words)) and (words[i + 1] in ("am", "pm")):
                found = True
            if found and word.isnumeric():
                vals.append(word)
    return vals
print(time_ext(para))