这可能有些过头了,而且是基于你的评论
Both the theoretical curves and the data points are arrays of
different length.
我会做以下事情:
- Truncate the data set so that its x values lie within the max and min values of the theoretical set.
- Interpolate the theoretical curve using
scipy.interpolate.interp1d
and the above truncated data x values. The reason for step (1) is to satisfy the constraints of interp1d
.
- Use
numpy.where
to find data y values that are out side the range of acceptable theory values.
- DONT discard these values, as was suggested in comments and other answers. If you want for clarity, point them out by plotting the inliners one color and the outliers an other color.
我想这是一个接近你想要的剧本。希望它能帮助你完成你想要的:
import numpy as np
import scipy.interpolate as interpolate
import matplotlib.pyplot as plt
# make up data
def makeUpData():
Make many more data points (x,y,yerr) than theory (x,y),
with theory yerr corresponding to a constant "sigma" in y,
about x,y value
NX= 150
dataX = (np.random.rand(NX)*1.1)**2
dataY = (1.5*dataX+np.random.rand(NX)**2)*dataX
dataErr = np.random.rand(NX)*dataX*1.3
theoryX = np.arange(0,1,0.1)
theoryY = theoryX*theoryX*1.5
theoryErr = 0.5
return dataX,dataY,dataErr,theoryX,theoryY,theoryErr
def makeSameXrange(theoryX,dataX,dataY):
Truncate the dataX and dataY ranges so that dataX min and max are with in
the max and min of theoryX.
minT,maxT = theoryX.min(),theoryX.max()
goodIdxMax = np.where(dataX<maxT)
goodIdxMin = np.where(dataX[goodIdxMax]>minT)
return (dataX[goodIdxMax])[goodIdxMin],(dataY[goodIdxMax])[goodIdxMin]
# take theory and get values at every data x point
def theoryYatDataX(theoryX,theoryY,dataX):
For every dataX point, find interpolated thoeryY value. theoryx needed
for interpolation.
f = interpolate.interp1d(theoryX,theoryY)
return f(dataX[np.where(dataX<np.max(theoryX))])
# collect valid points
def findInlierSet(dataX,dataY,interpTheoryY,thoeryErr):
Find where theoryY-theoryErr < dataY theoryY+theoryErr and return
valid indicies.
withinUpper = np.where(dataY<(interpTheoryY+theoryErr))
withinLower = np.where(dataY[withinUpper]
>(interpTheoryY[withinUpper]-theoryErr))
return (dataX[withinUpper])[withinLower],(dataY[withinUpper])[withinLower]
def findOutlierSet(dataX,dataY,interpTheoryY,thoeryErr):
Find where theoryY-theoryErr < dataY theoryY+theoryErr and return
valid indicies.
withinUpper = np.where(dataY>(interpTheoryY+theoryErr))
withinLower = np.where(dataY<(interpTheoryY-theoryErr))
return (dataX[withinUpper],dataY[withinUpper],
dataX[withinLower],dataY[withinLower])
if __name__ == "__main__":
dataX,dataY,dataErr,theoryX,theoryY,theoryErr = makeUpData()
TruncDataX,TruncDataY = makeSameXrange(theoryX,dataX,dataY)
interpTheoryY = theoryYatDataX(theoryX,theoryY,TruncDataX)
inDataX,inDataY = findInlierSet(TruncDataX,TruncDataY,interpTheoryY,
theoryErr)
outUpX,outUpY,outDownX,outDownY = findOutlierSet(TruncDataX,
TruncDataY,
interpTheoryY,
theoryErr)
#print inlierIndex
fig = plt.figure()
ax = fig.add_subplot(211)
ax.errorbar(dataX,dataY,dataErr,fmt= . ,color= k )
ax.plot(theoryX,theoryY, r- )
ax.plot(theoryX,theoryY+theoryErr, r-- )
ax.plot(theoryX,theoryY-theoryErr, r-- )
ax.set_xlim(0,1.4)
ax.set_ylim(-.5,3)
ax = fig.add_subplot(212)
ax.plot(inDataX,inDataY, ko )
ax.plot(outUpX,outUpY, bo )
ax.plot(outDownX,outDownY, ro )
ax.plot(theoryX,theoryY, r- )
ax.plot(theoryX,theoryY+theoryErr, r-- )
ax.plot(theoryX,theoryY-theoryErr, r-- )
ax.set_xlim(0,1.4)
ax.set_ylim(-.5,3)
fig.savefig( findInliers.png )
This figure is the result: