如何通过python预测模型下一组数据?

这里利用一组数据,8对入湖河流主要污染物通量和浓度数据,作为训练数据,进行一元线性回归,建立回归模型,并用模拟数据0.8浓度值进行预测,预测值为114.27。感觉用python进行数据处理和建模还是很有意思的# 19.11.14
import numpy as np
from pandas
import read_csv
from matplotlib import pyplot as plt
# from sklearn.linear_model import LinearRegression
from sklearn import linear_model
# from UdxDataset import *
import pandas as pd
import requests
#浓度数据
nd_data=[1.040000, 0.940000, 0.530000, 1.930000, 0.430000, 0.330000, 0.690000, 0.140000]
#通量数据
tl_data=[151.0, 87.000000, 148.000000, 159.000000, 111.000000, 47.000000, 147.000000, 44.000000]
#数组转为矩阵,作为模型训练的输入数据
x = pd.DataFrame(nd_data)
y = pd.DataFrame(tl_data)
# 调用线性回归函数
clf = linear_model.LinearRegression()
# print(x,y)
# 开始线性回归计算
clf.fit(x,y)
# 得到斜率
print("模型斜率",clf.coef_[0][0])
# 得到截距
print("模型截距",clf.intercept_)
#利用模型,输入浓度为0.8预测通量
predict_nd=[[0.8]]
predict_tl=clf.predict(predict_nd)
print("当假设浓度为 0.8mg/l 时,利用模型预测通量为:",predict_tl[0][0],"t")
#绘制散点图
plt.scatter(nd_data[:8],tl_data[:8],label='观测值')
plt.scatter(predict_nd[0],predict_tl[0],label='预测值')
#绘制线
plt.plot(x, x*clf.coef_[0][0]+clf.intercept_, color='r',label='模拟曲线')
plt.axis([0,2,30,280])
plt.legend(loc = 'upper left')#显示图例,设置图例的位置
plt.title("通量与浓度的回归关系", fontsize=20)
plt.grid(b=True)#加网格
plt.show()
}
1 #-*- coding: utf-8 -*-2 """3 Created on Wed Oct 17 21:14:44 201845 @author: Luove6 """7 #KRR适合分类和回归训练集很少时,非线性方法8 importos9 importnumpy as np10 importmatplotlib.pyplot as plt11 import dateutil.parser as dparser #dateutil模块主要有两个函数,parser和rrule。parser是根据字符串解析成datetime,而rrule是则是根据定义的规则来生成datetime;https://blog.csdn.net/cherdw/article/details/5522434112 from pylab import * #将matplotlib和numpy封装在一起,模拟MATLAB编程环境13 from sklearn.cross_validation importtrain_test_split14 from sklearn importlinear_model15 from sklearn importdatasets16 importmlpy17 from mlpy importKernelRidge1819 #np.hamming 汉明窗,构造一个函数(仅处理窗内数据)。这个函数在某一区间有非零值,而在其余区间皆为0.汉明窗就是这样的一种函数20 #阶梯图,又叫瀑布图,可以用于企业成本、销售等数据的变化和构成情况的分析;plot.step()21 x1 = np.linspace(1,100,500)22 x2 = np.linspace(1,100,50)23 y1 =np.cos(x1)24 y2 =np.cos(x2)2526 axs1 = plt.subplot(211)27 axs2 = plt.subplot(212)28 axs1.step(x1,y1)29 axs2.step(x2,y2)30 plt.show()313233 goldfile = "D:\Analyze\Python Matlab\Python\BookCodes\PDA_Book-master\PDA_Book-master\Chapter7\Gold.csv"34 #tsa,时间序列分析,将时间序列平滑化,(本身包含:趋势T,季节性/周期性S,波动性V)35 defsmooth(x,window_length):36 s = np.r_[2*x[0]-x[window_length-1::-1], x, 2*x[-1]-x[-1:-window_length:-1]]37 w =np.hamming(window_length)38 y = np.convolve(w/w.sum(), s, mode='same') #卷积函数,移动平均滤波(平滑方法),第一个参数长度要大于等于第二参数长度,否则会交换位置;mode={'full','same','valid'},默认full39 return y[window_length:-window_length+1]4041 #金价走势,注意下面dtype变化:日期用object,值用None(各列内容识别,)42 x = np.genfromtxt(goldfile,dtype='object',delimiter=',',skip_header=1,usecols=(0),converters={0:dparser.parse}) #第一列日期,dateutil.parser.parse,字符串中解析出日期43 y = np.genfromtxt(goldfile,dtype=None,delimiter=',',skip_header=1,usecols=(1)) #获取第二列44 y_smoothed =smooth(y,len(y))45 plt.step(x,y,'r*',label='raw data')46 plt.step(x,y_smoothed,label='smoothed data')47 plt.legend()48 #x = [2,3,9,634,32,4,676,4,234,43,7,-13,0]49 #x = np.array(x)50 #np.round(smooth(x,len(x)))51 #[ 33., 80., 124., 165., 189., 199., 192., 169., 137., 104., 66., 35., 16.]52 #plt.plot(x)53 #plt.plot(np.round(smooth(x,len(x)))) # 加载pylab,不必plt.show()?54 ##plt.show()55 #window_length=x.shape[0]5657 house =datasets.load_boston()58 houseX = house.data[:,np.newaxis] #添加一个新轴,添加一维度,由(506, 13)转成(506, 1,13)59 houseX_temp = houseX[:,:,2]6061 x_train,xtest,ytrain,ytest=train_test_split(houseX_temp,house.target,test_size=1.0/3)62 lreg =linear_model.LinearRegression()63 lreg.fit(x_train,ytrain)64 plt.scatter(xtest,ytest,color='green')65 plt.plot(xtest,lreg.predict(xtest),color='blue',linewidth=2)6667 np.random.seed(0)68 targetvalues = np.genfromtxt(goldfile,skip_header=1,dtype=None,delimiter=',',usecols=(1)) #usecols筛选感兴趣列69 type(targetvalues)70 trainingpoints = np.arange(125).reshape(-1,1) #transform ,转换成一列,行自适应71 testpoint = np.arange(126).reshape(-1,1)72 knl = mlpy.kernel_gaussian(trainingpoints,trainingpoints,sigma=1) #训练核矩阵,对称半正定,(125, 125)73 knltest = mlpy.kernel_gaussian(testpoint,trainingpoints,sigma=1) #测试核矩阵,(126, 125)7475 knlridge = KernelRidge(lmb=0.01)76 knlridge.learn(knl,targetvalues)77 resultpoints =knlridge.pred(knltest)7879 fig = plt.figure(1)80 plt.plot(trainingpoints,targetvalues,'o')81 plt.plot(testpoint,resultpoints)82 #plt.show()83 len(resultpoints)84 resultpoints[-5:-1]8586 #采用平滑后的数据,即smooth后的targetvalues87 targetvalues_smoothed =smooth(targetvalues,len(targetvalues))88 knlridge.learn(knl,targetvalues_smoothed)89 resultpoints_smoothed =knlridge.pred(knltest)90 plt.step(trainingpoints,targetvalues_smoothed,'o')91 plt.step(testpoint,resultpoints_smoothed)92 #plt.show()93 len(resultpoints_smoothed)94 resultpoints_smoothed[-5:-1] #平滑前126期预测值:1389.8;平滑后126期预测值1388.695 #x = np.arange(0, 2, 0.05).reshape(-1, 1) # training points96 #y = np.ravel(np.exp(x)) + np.random.normal(1, 0.2, x.shape[0]) # target values97 #xt = np.arange(0, 2, 0.01).reshape(-1, 1) # testing points98 #K = mlpy.kernel_gaussian(x, x, sigma=1) # training kernel matrix99 #Kt = mlpy.kernel_gaussian(xt, x, sigma=1) # testing kernel matrix100 #krr = KernelRidge(lmb=0.01)101 #krr.learn(K, y)102 #yt = krr.pred(Kt)103 #fig = plt.figure(1)104 #plot1 = plt.plot(x[:, 0], y, 'o')105 #plot2 = plt.plot(xt[:, 0], yt)106 #plt.show()}

我要回帖

更多关于 python预测模型 的文章

更多推荐

版权声明:文章内容来源于网络,版权归原作者所有,如有侵权请点击这里与我们联系,我们将及时删除。

点击添加站长微信