我正在尝试从每个项目的嵌套和未嵌套字符串和整数列表中提取超过 10 的最小数字。我尝试了几种不同的方法,但它们要么不返回任何内容,要么返回一些不同的错误消息(预期的字符串或缓冲区,'>' 在 int 和 list 之间不可能是其中两个)。它们需要保持有序,因为之后它们将被输入到 pandas 数据框中。
starting_list = [['4dfg', '12kfmgfg','dfgdf133'],[8, '16dgdfg'], 11, '', 'fdsf']
desired_result = [12, 16, 11, NaN, NaN]
以下是我尝试过的两种不同功能。由于结果将进入数据框,因此基于 pandas 的答案也可以。
def min_int(data):
for item in range(len(data)):
for i in range(len(data[item])):
if type(data[item][i]) == int:
if data[item][i] >10:
data.remove(data[item][i])
else:
data[item][i] =int(re.sub(r'\D', "", data[item]))
if data[item][i] >10:
data.remove(data[item][i])
data[item] = min(data)
def remove_text(data):
for i in range(len(data)):
try:
for ii in range(len(data[i])):
try:
data[i][ii] =int(re.sub(r'\D', "", data[item]))
except:
continue
except:
continue
谢谢!
最佳答案
使用:
s = pd.Series(data)
a = (pd.to_numeric(s.explode() #explode lists
.astype(str) #convert all values to strings
.str.replace(r'\D', ''), errors='coerce') #replace and convert to numbers if possible
.loc[lambda x: x > 10] #filter values
.min(level=0) #get minimal per index
.reindex(s.index) #add removed values of index
.tolist()) #convert to list
#convert non NaNs to integers
a = [int(x) if x == x else x for x in a]
print (a)
[12, 16, 11, nan, nan]
你的函数应该是简化的:
def try_to_int(x):
try:
return int(re.sub(r'\D', "", x))
except:
return np.nan
def min_int(x):
if isinstance(x, int):
return x
elif isinstance(x, list):
gen = (try_to_int(y) for y in x)
return min(y for y in gen if y == y and y > 10)
else:
return try_to_int(x)
print ([min_int(x) for x in starting_list])
[12, 16, 11, nan, nan]
https://stackoverflow.com/questions/64332997/