I have a complicated string that includes a kilometer range and a fee for users that fall into that range. Ideally, I would like to transform the string into something that I could use to easily assign fees to users. I've taken the steps below to extract a list from the string, but the kilometer range is still not usable for classifying users:
import string
import unicodedata
import re
str = '{"row1":{"from":"0","to":"500","fee":"23100 "},"row2":{"from":"500","to":"1000","fee":"24100 "},"row3":{"from":"1000","to":"1500","fee":"25200 "},"row4":{"from":"1500","to":"2000","fee":"26200 "},"row5":{"from":"2000","to":"2500","fee":"27200 "},"row6":{"from":"2500","to":"3000","fee":"28300 "},"row7":{"from":"3000","to":"3500","fee":"29300 "},"row8":{"from":"3500","to":"4000","fee":"30400 "},"row9":{"from":"4000","to":"4500","fee":"31400 "},"row10":{"from":"4500","to":"5000","fee":"32400 "},"row11":{"from":"5000","to":"5500","fee":"33500 "},"row12":{"from":"5500","to":"6000","fee":"34600 "},"row13":{"from":"6000","to":"6500","fee":"35500 "},"row14":{"from":"6500","to":"7000","fee":"36600 "},"row15":{"from":"7000","to":"7500","fee":"37700 "},"row16":{"from":"7500","to":"8000","fee":"38600 "},"row17":{"from":"8000","to":"8500","fee":"39700 "},"row18":{"from":"8500","to":"9000","fee":"40300 "},"row19":{"from":"9000","to":"9500","fee":"41400 "},"row20":{"from":"9500","to":"10000","fee":"42700 "},"row21":{"from":"10000","to":"10500","fee":"43500 "},"row22":{"from":"10500","to":"11000","fee":"44500 "},"row23":{"from":"11000","to":"11500","fee":"45600 "},"row24":{"from":"11500","to":"12000","fee":"46600 "},"row25":{"from":"12000","to":"12500","fee":"47700 "},"row26":{"from":"12500","to":"13000","fee":"48700 "},"row27":{"from":"13000","to":"13500","fee":"49700 "},"row28":{"from":"13500","to":"14000","fee":"50800 "},"row29":{"from":"14000","to":"14500","fee":"51900 "},"row30":{"from":"14500","to":"15000","fee":"52800 "},"row31":{"from":"15000","to":"15500","fee":"52800 "},"row32":{"from":"15500","to":"16000","fee":"52800 "},"row33":{"from":"16000","to":"70000","fee":"52800 "'
str1 = unicodedata.normalize("NFKD", str)
str2 = str1.translate({ord(c): None for c in string.whitespace})
s1 = str2.replace('{', "")
s2 = s1.replace('"', ' ')
s3 = s2.split(sep='},')
for i in range(len(s3)):
ftm = re.search(r'from : (\d+) , to : (\d+)', s3[i])
fm = re.search(r'fee : (\d+)', s3[i])
if ftm and fm:
vfrom = int(ftm.group(1))
vto = int(ftm.group(2))
vfee= int(fm.group(1))
print(f"({vfrom}, {vto}), {vfee}")
I would like it to be instead transformed into something that extracts the lower and upper bounds of the range separately next to the fee, where I could later export into Excel and check which range the user falls between.
}}
at the end, this appears to be valid JSON. So it would probably make more sense, if you tried to get this data "complete", if you can, and then used a JSON parser ...