Ich habe versucht, die Cosinus-Ähnlichkeit zwischen zwei Spalten eines Datenrahmens mit spatial.distance.cosine
zu erzeugen. Ich möchte mit diesen beiden Funktionen eine weitere Spalte erstellen:
def cosine_sim(x):
li = []
for item in x["sent_emb"]:
li.append(spatial.distance.cosine(item,x["quest_emb"][0]))
return li
def predictions(train):
train["cosine_sim"] = train.apply(cosine_sim, axis = 1)
Die zwei Spalten sehen so aus:
sent_emb quest_emb
0 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.01491953, 0.021973763, 0.021364095, 0.0393...
1 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.04444952, 0.028005758, 0.030357722, 0.0375...
2 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.03949683, 0.04509903, 0.018089347, 0.07667...
...
Aber ich habe einen TypeError
, anscheinend sind einige Werte NoneType
und float
. Weißt du, wie ich die Daten dieser Art filtern kann, um sie auf zro oder zwei etwas zu setzen, das mich nicht daran hindert, meine zu verwenden
TypeError: ("unsupported operand type(s) for *: 'NoneType' and 'float'", 'occurred at index 473')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-23-af28fc11a9d3> in <module>()
----> 1 predicted = predictions(train)
<ipython-input-22-1699cf33d87c> in predictions(train)
1 def predictions(train):
2
----> 3 train["cosine_sim"] = train.apply(cosine_sim, axis = 1)
4 train["diff"] = (train["quest_emb"] - train["sent_emb"])**2
5 train["euclidean_dis"] = train["diff"].apply(lambda x: list(np.sum(x, axis = 1)))
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-20-276aa09bc25e> in cosine_sim(x)
2 li = []
3 for item in x["sent_emb"]:
----> 4 li.append(spatial.distance.cosine(item,x["quest_emb"][0]))
5 return li
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/scipy/spatial/distance.py in cosine(u, v, w)
742 # cosine distance is also referred to as 'uncentered correlation',
743 # or 'reflective correlation'
--> 744 return correlation(u, v, w=w, centered=False)
745
746
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/scipy/spatial/distance.py in correlation(u, v, w, centered)
693 u = u - umu
694 v = v - vmu
--> 695 uv = np.average(u * v, weights=w)
696 uu = np.average(np.square(u), weights=w)
697 vv = np.average(np.square(v), weights=w)
TypeError: ("unsupported operand type(s) for *: 'NoneType' and 'float'", 'occurred at index 473')