网站首页 > 技术文章 正文
废话也不多说了,首先导入相关模块
import pandas as pd
import numpy as np
import pywt
import scipy as sp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
导入数据
df = pd.read_pickle('shuju.pkl')
df.head()
进行下数据转换
df['fMag'] = df['Mag.']
df['Mag.'] = df['Mag.'].apply(lambda x :int(x) if (x-int(x)) < 0.5 else int(x)+1)
删除不需要的列
df.drop(columns=['Date(yyyy/mm/dd)', 'Time(UTC)', 'Time', 'unit'], inplace=True)
df.dropna(inplace=True)
df.head()
训练集划分
train, test = train_test_split(df, test_size=0.3, random_state=38)
print('train data shape: ', train.shape)
print('test data shape: ', test.shape)
train data shape: (2228, 6)
test data shape: (955, 6)
训练集
为了便于训练,进行数据转换,得到train_wave_mat和test_wave_mat
绘制一些样本
fig, axs = plt.subplots(3, 3, figsize=(15, 10))
plt.subplots_adjust(hspace=0.5, wspace=0.2)
n = 0
for i in range(3):
for j in range(3):
axs[i,j].plot(train_wave_mat[n], color='black')
axs[i,j].set_title('train wave sample ' + str(n+1))
axs[i,j].set_xlabel('time')
axs[i,j].set_ylabel('amplitude')
axs[i,j].set_ylim(0, 0.4)
axs[i,j].set_xticks([])
n += 1
提取标签
train_labels_mat = train['Mag.'].tolist()
test_labels_mat = test['Mag.'].tolist()
定义特征提取函数(小波变换)
def features(data, labels, waveletname):
features_list = []
list_unique_labels = list(set(labels))
list_labels = [list_unique_labels.index(elem) for elem in labels]
for signal in data:
coeff_list = pywt.wavedec(signal, waveletname)
features = []
for coeff in coeff_list:
features += get_features(coeff)
features_list.append(features)
return features_list, list_labels
def get_features(list_values):
entropy = calculate_entropy(list_values)
crossings = calculate_crossings(list_values)
statistics = calculate_statistics(list_values)
return [entropy] + crossings + statistics
def calculate_entropy(list_values):
from collections import Counter
counter_values = Counter(list_values).most_common()
probabilities = [elem[1]/len(list_values) for elem in counter_values]
entropy=sp.stats.entropy(probabilities)
return entropy
def calculate_statistics(list_values):
n5 = np.nanpercentile(list_values, 5)
n25 = np.nanpercentile(list_values, 25)
n75 = np.nanpercentile(list_values, 75)
n95 = np.nanpercentile(list_values, 95)
median = np.nanpercentile(list_values, 50)
mean = np.nanmean(list_values)
std = np.nanstd(list_values)
var = np.nanvar(list_values)
rms = np.nanmean(np.sqrt(list_values**2))
return [n5, n25, n75, n95, median, mean, std, var, rms]
def calculate_crossings(list_values):
zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
no_zero_crossings = len(zero_crossing_indices)
mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
no_mean_crossings = len(mean_crossing_indices)
return [no_zero_crossings, no_mean_crossings]
利用小波变换进行特征提取
X_train, y_train = features(train_wave_mat, train_labels_mat, 'db4')
X_test, y_test = features(test_wave_mat, test_labels_mat, 'db4')
准备绘制小波变换幅值
trainset = np.array(X_train)
testset = np.array(X_test)
train_magitude = np.array(train['fMag'])
test_magitude = np.array(test['fMag'])
train_wavelet_amplitude = trainset.max(axis=1) - trainset.min(axis=1)
test_wavelet_amplitude = testset.max(axis=1) - testset.min(axis=1)
绘制
size = 10
alpha = 0.3
plt.figure(figsize=(10,8))
plt.scatter(train_magitude, train_wavelet_amplitude, s=size, c='r', alpha=alpha)
plt.scatter(test_magitude, test_wavelet_amplitude, s=size, c='b', alpha=alpha)
plt.ylabel('Wavelet Amplitude')
plt.xlabel('Magnitude')
plt.show()
绘制小波分解系数
from scipy import signal
a = pywt.wavedec(X_train, 'db4', mode='sym', level=None)
(cA, cD) = pywt.dwt(X_train, 'db4', mode='sym')
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
axs[0][0].plot(cA[0], color='darkred')
axs[0][0].set_xlabel('Time')
axs[0][0].set_ylabel('Approximation Coefficient')
axs[0][0].set_xticks([])
axs[0][1].plot(cD[0], color='darkblue')
axs[0][1].set_xlabel('Time')
axs[0][1].set_ylabel('Detail Coefficient')
axs[0][1].set_xticks([])
axs[1][0].plot(X_train[0], color='darkgreen')
axs[1][0].set_xlabel('Time')
axs[1][0].set_ylabel('Wavelet Coefficient')
axs[1][0].set_xticks([])
axs[1][1].plot(X_train[:])
axs[1][1].set_xlabel('Wavelet Samples')
axs[1][1].set_ylabel('Wavelet Coefficients')
plt.show()
fig, axs = plt.subplots(3, 3, figsize=(21, 15))
for i in range(3):
axs[i][0].plot(cA[i], color='darkred')
axs[i][0].set_xlabel('Time')
axs[i][0].set_ylabel('Approximation Coefficient')
axs[i][0].set_xticks([])
axs[i][1].plot(cD[i], color='darkblue')
axs[i][1].set_xlabel('Time')
axs[i][1].set_ylabel('Detail Coefficient')
axs[i][1].set_xticks([])
axs[i][2].plot(X_train[i], color='darkgreen')
axs[i][2].set_xlabel('Time')
axs[i][2].set_ylabel('Wavelet Coefficient')
axs[i][2].set_xticks([])
看看3D图
import matplotlib as mpl
ax = plt.figure(figsize=(10,10)).add_subplot(projection='3d')
x = np.linspace(0,108,108)
y = np.asarray(X_train)
z = range(len(y))
cmap = mpl.cm.get_cmap('ocean')
for i in z:
ax.plot(x,y[i],zs=i,zdir='x', color=cmap(i/2500))
ax.set_xlabel('Wavelet Samples')
ax.set_ylabel('Time')
ax.set_yticks([])
ax.set_zlabel('Wavelet Coefficient')
plt.show()
创建一个 pipeline
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
模型拟合
clf.fit(X_train,y_train)
Pipeline(steps=[('standardscaler', StandardScaler()),
('svc', SVC(gamma='auto'))])
预测
y_pred = clf.predict(X_test)
绘制散点图
labels = clf.named_steps['svc'].classes_
size = 10
alpha = 0.5
colors = range(len(labels))
plt.figure(figsize=(10,8))
for i in range(len(labels)):
plt.scatter(test_wavelet_amplitude[y_pred == i], test_magitude[y_pred == i], label=labels[i]+1, s=size, alpha=alpha)
plt.xlabel('Wavelet Amplitude')
plt.ylabel('Magnitude')
plt.legend()
plt.show()
计算准确率
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print('accuracy: ', accuracy)
绘制预测/实际类别
plt.figure(figsize=(20,7))
plt.plot(y_pred[:500], color='orange', label='Classified', linewidth=1, alpha=1)
plt.plot(y_test[:500], color='green', label='Actual', alpha=0.5)
plt.xlabel('Samples')
plt.ylabel('Class')
plt.xticks([0,500])
plt.yticks([1,2,3,4,5,6,7])
plt.legend()
plt.show()
计算混淆矩阵
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print('confusion matrix: ', cm)
绘制混淆矩阵
import seaborn as sns
plt.figure(figsize=(10, 10))
sns.heatmap(cm, annot=True, square=True, cmap='viridis', fmt='d')
plt.xlabel('Actual label')
plt.ylabel('Predicted label')
plt.show()
详细代码
https://mianbaoduo.com/o/bread/Yp2bl5xt
或者
https://m.tb.cn/h.fzeRMqF?tk=ik7R2tMxlDy CZ3457
- 上一篇: 用机器学习来提升你的用户增长:第五步,预测客户的下一个购买日
- 下一篇: 浅谈支持向量机(2)
猜你喜欢
- 2024-11-24 7000字,Python分析:泰坦尼克号中女生更容易生还?
- 2024-11-24 SVM 算法 和 梅尔倒谱系数 结合使用噪音检测的应用
- 2024-11-24 scikit-learn的5大新功能
- 2024-11-24 机器学习集成方法:Bagging, Boosting, Stacking, Voting, Blending
- 2024-11-24 Kaggle练习赛---Titanic的分析与整理
- 2024-11-24 超参数自动调参库介绍
- 2024-11-24 支持向量机SVM(Support Vector Machine) Ⅰ原创 Yu
- 2024-11-24 集成学习小介
- 2024-11-24 如何利用手机远训练机器学习模型
- 2024-11-24 使用SVC支持向量机算法来进行人脸识别的CNN神经网络训练
- 02-21走进git时代, 你该怎么玩?_gits
- 02-21GitHub是什么?它可不仅仅是云中的Git版本控制器
- 02-21Git常用操作总结_git基本用法
- 02-21为什么互联网巨头使用Git而放弃SVN?(含核心命令与原理)
- 02-21Git 高级用法,喜欢就拿去用_git基本用法
- 02-21Git常用命令和Git团队使用规范指南
- 02-21总结几个常用的Git命令的使用方法
- 02-21Git工作原理和常用指令_git原理详解
- 最近发表
- 标签列表
-
- cmd/c (57)
- c++中::是什么意思 (57)
- sqlset (59)
- ps可以打开pdf格式吗 (58)
- phprequire_once (61)
- localstorage.removeitem (74)
- routermode (59)
- vector线程安全吗 (70)
- & (66)
- java (73)
- org.redisson (64)
- log.warn (60)
- cannotinstantiatethetype (62)
- js数组插入 (83)
- resttemplateokhttp (59)
- gormwherein (64)
- linux删除一个文件夹 (65)
- mac安装java (72)
- reader.onload (61)
- outofmemoryerror是什么意思 (64)
- flask文件上传 (63)
- eacces (67)
- 查看mysql是否启动 (70)
- java是值传递还是引用传递 (58)
- 无效的列索引 (74)