Python - 目录操作

目录名中包含中文

1
2
3
4
5
root_dir = "D:/工作留档"
root_dir = unicode(root_dir, "utf-8")
if not os.path.exists(root_dir):
print("Error: %s does not exist!" % root_dir)
exit(-1)

文件内容中包含中文

1
2
3
info_path = os.path.join(root_dir, "info_file")
info_fd = codecs.open(info_path, "w", "utf-8")
info_fd.close()

清空目录

1
2
3
4
5
plot_dir = os.path.join(root_dir, "plot")
if os.path.exists(plot_dir):
import shutil
shutil.rmtree(plot_dir)
os.mkdir(plot_dir)

计数文件夹中内容

1
2
3
4
5
6
7
8
9
def count_dir(root_dir):
cur_items = 0
for filename in os.listdir(root_dir):
path = os.path.join(root_dir, filename)
if os.path.isdir(path):
cur_items += count_dir(path)
elif path.endswith(".data"):
cur_items += 1
return cur_items

处理文件夹中内容

1
2
3
4
5
6
7
8
9
10
def check_dir(root_dir, info_path, checked_items, total_items):
for filename in os.listdir(root_dir):
path = os.path.join(root_dir, filename)
if os.path.isdir(path):
checked_items = check_dir(path, info_path, checked_items, total_items)
elif path.endswith(".data"):
parse_data(path, info_path)
checked_items += 1
print("(%d/%d)\t%0.2f%%" % (checked_items, total_items, checked_items * 100.0 / total_items))
return checked_items

Python - XMeans实现

XMeans聚类、画图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# -*- coding: UTF-8 -*-


import math
from sklearn.cluster import KMeans
import numpy as np


DEBUG_PRINT = False

def cluster_by_xmeans(data, min_k=1, max_k=10, activation_bic_ratio=0.8, init="k-means++", n_init=10, max_iter=300, tol=0.0001):
"""
:param data: list of points
:param min_k, max_k: the min and max clusters for xmeans
:param activation_bic_ratio: the bic threshold for k=1
:param init, n_init, max_iter, tol: see sklearn.cluster.KMeans
:return: KMeans object
"""

if min_k > max_k:
return None

cur_k = min_k
while cur_k < max_k:
if DEBUG_PRINT: print("cur_k = %d" % cur_k)
new_k = 0

kmeans = KMeans(n_clusters=cur_k, init=init, n_init=n_init, max_iter=max_iter, tol=tol).fit(data)
data_predicted = kmeans.predict(data)
cluster_data_map = create_cluster_data_map(cur_k, data, data_predicted)

# 对各个簇进行二分裂尝试
for i in range(cur_k):
cur_cluster_data = cluster_data_map[i]
cur_cluster_center = kmeans.cluster_centers_[i]
R = len(cur_cluster_data)
M = len(cur_cluster_center)

if R >= 2:
if DEBUG_PRINT: print("\tcheck cluster %d of %d" % (i, cur_k))
# 检查当前簇的点是否全相同
equal_flag = True
for j in range(1, R):
if np.linalg.norm(np.array(cur_cluster_data[j]) - np.array(cur_cluster_data[0]) != 0):
equal_flag = False
# 当前簇的点全相同,不做二分裂检查,加速(此处优化是针对:sklearn.cluster.KMeans对完全相同的一组点做2means很慢的问题)
if equal_flag:
if DEBUG_PRINT: print("\t\tequal flag!")
new_k += 1
else:
child_kmeans = KMeans(n_clusters=2, init=init, n_init=n_init, max_iter=max_iter, tol=tol).fit(cur_cluster_data)
cur_cluster_data_predicted = child_kmeans.predict(cur_cluster_data)
child_cluster_data_map = create_cluster_data_map(2, cur_cluster_data, cur_cluster_data_predicted)
if DEBUG_PRINT: print("\t\tnot equal")

if len(child_cluster_data_map[0]) != 0 and len(child_cluster_data_map[1]) != 0:
bic1 = cal_BIC([cur_cluster_center], {0: cur_cluster_data}, R, M)
bic2 = cal_BIC(child_kmeans.cluster_centers_, child_cluster_data_map, R, M)
# 对cur_k的kmeans每个簇,尝试二分裂。
# 如果根据BIC,分裂成功,则一个原簇对应二个新簇。
# 如果根据BIC,分裂不成功,对原簇数量不产生影响,即一个簇仍对应一个簇;
if DEBUG_PRINT: print("\t\tbic1 = %.2f" % bic1)
if DEBUG_PRINT: print("\t\tbic2 = %.2f" % bic2)
if bic1 <= bic2:
new_k += 2
if DEBUG_PRINT:
print("\t\tsplit:")
for d in cur_cluster_data:
print(d)
elif cur_k == 1 and bic2 / bic1 >= activation_bic_ratio:
new_k += 2
if DEBUG_PRINT:
print("\t\tsplit (init):")
for d in cur_cluster_data:
print(d)
else:
new_k += 1
if DEBUG_PRINT:
print("\t\tno split")
for d in cur_cluster_data:
print(d)
else:
# 无法进行2means,也是分裂不成功的场景
new_k += 1
elif R == 1:
if DEBUG_PRINT: print("\tno need check cluster %d of %d, count = 1" % (i, cur_k))
if DEBUG_PRINT: print(cur_cluster_data[0])
# 该簇只有1个元素,无法做2means,只好保留当前簇
new_k += 1
else:
if DEBUG_PRINT: print("\tshould not create this cluster %d of %d, count = 0" % (i, cur_k))
# 该簇为空,意味着cur_k-means不成功,不应聚cur_k了
# 不应考虑本簇,此时new_k不应再自增
if cur_k != min_k:
# 但是注意:若是在cur_k的迭代中,本次cur_k是由上次cur_k-1中某个簇2means决定分裂的,
# 故本次cur_k中该簇也会要分裂,对此场景将new_k自减以抵消影响;
# 若不是在cur_k的迭代中,则为最初场景cur_k == min_k,仅需不考虑本簇即可,即new_k不再自增。
new_k -= 1
# 迭代到cur_k的某个簇(尝试二分裂的过程中)已经达到max_k
if new_k >= max_k:
new_k = max_k
break
# 迭代到cur_k的所有簇,经过二分裂尝试后,仍聚cur_k簇(或甚至不足cur_k簇),即已经无法再向下分裂
if new_k <= cur_k:
if new_k >= min_k:
cur_k = new_k
else:
cur_k = min_k
break
else:
cur_k = new_k

if DEBUG_PRINT: print("cur_k = %d" % cur_k)

return KMeans(n_clusters=cur_k, init=init, n_init=n_init, max_iter=max_iter, tol=tol).fit(data)


def create_cluster_data_map(k, data, data_predicted):
"""
create a dict: {label: [points_of_this_label]}
"""

cluster_data_map = {}
for i in range(k):
cluster_data_map[i] = []
for i in range(len(data)):
cluster_data_map[data_predicted[i]].append(data[i])
return cluster_data_map


def cal_BIC(centers, labelled_data, R, M):
"""
calculate the bayesian information criterion
"""

K = len(centers)
sigma2 = cal_cluster_variance(centers, labelled_data, R, K)
if sigma2 <= 0:
sigma_multiplier = float("-inf")
else:
sigma_multiplier = M * 0.5 * math.log(sigma2)
l = 0.0

for i in range(K):
Rn = len(labelled_data[i])
l += Rn * math.log(Rn) - Rn * math.log(R) - Rn * 0.5 * math.log(2.0 * math.pi) \
- Rn * sigma_multiplier - (Rn - K) * 0.5

bic = l - K * (K * (M + 1.0)) / 2.0 * math.log(R)

return bic


def cal_cluster_variance(centers, labelled_data, R, K):
"""
calculate the variance for a set of clusters
"""

sum = 0
for cur_label in range(K):
cur_center = centers[cur_label]
cur_data = labelled_data[cur_label]
for cur_point in cur_data:
#dist = np.linalg.norm(np.array(cur_point) - np.array(cur_center))
dist2 = np.sum(np.square(np.array(cur_point) - np.array(cur_center)))
sum += dist2

v = sum * 1.0
if R - K > 0:
v = v / (R - K)

return v


import colorsys
def get_colors(color_count):
hues = np.linspace(0, 1, color_count, endpoint=False)
for i in range(color_count):
#hue = i * 1. / color_count
hue = hues[i]
rgb = [int(x * 255.0) for x in colorsys.hsv_to_rgb(hue, 1.0, 1.0)]
yield "#{0:02x}{1:02x}{2:02x}".format(*rgb)


import matplotlib.pyplot as plt
def plot_by_clusters(cluster_data_map, savepath=None):
plt.figure(figsize=(8, 6))

actual_cluster_count = 0
for c in cluster_data_map:
if len(cluster_data_map[c]) > 0:
actual_cluster_count += 1

colors = get_colors(actual_cluster_count)

for c in cluster_data_map:
if len(cluster_data_map[c]) > 0:
col = next(colors)
for d in cluster_data_map[c]:
plt.plot(range(1, len(d) + 1), d, color=col)

plt.title("actually %d clusters" % actual_cluster_count, fontsize=16)
plt.xlabel("time")
plt.ylabel("kpi value")

if savepath is not None:
plt.savefig(savepath)
plt.close()
else:
plt.show()


if __name__ == '__main__':
import time

"""
# 5 clusters
data = [[3.487966, 2.617258], [3.052439, 2.939565], [2.541804, 2.855116],
[2.993872, 2.741651], [2.645149, 2.988544], [3.275658, 2.734759],
[3.4383, 3.126239], [3.149475, 2.774026], [2.808158, 2.987228],
[2.609178, 2.608235], [2.89733, 2.502805], [2.599489, 3.317544],
[3.330802, 3.329103], [2.504006, 3.13618], [2.598317, 2.838451],
[3.468053, -3.205529], [3.473971, -2.881551], [2.915968, -3.146783],
[3.287449, -2.818057], [3.199882, -3.082526], [3.33998, -2.892212],
[2.652254, -3.284869], [3.062117, -3.377962], [3.439667, -2.904208],
[3.131923, -2.860522], [2.770989, -2.585313], [2.678803, -3.25333],
[3.20751, -2.602797], [2.548267, -3.350991], [3.488662, -2.793679],
[-3.419303, -3.460712], [-2.66534, -3.327664], [-2.843735, -2.520843],
[-3.256643, -2.505888], [-2.723389, -3.145575], [-2.819643, -2.775133],
[-3.074187, -3.408583], [-3.286655, -2.857967], [-2.650737, -2.697036],
[-3.152726, -3.137763], [-2.999019, -2.686993], [-2.688528, -2.741138],
[-2.592045, -3.197096], [-3.160052, -2.52554], [-3.301692, -2.993403],
[-2.946691, 3.19919], [-3.408824, 2.648383], [-3.148617, 3.378614],
[-2.563794, 2.702014], [-3.157149, 2.622525], [-2.975722, 2.604931],
[-2.518403, 3.039607], [-3.140483, 3.32115], [-2.619992, 2.968638],
[-2.875432, 3.374492], [-2.555934, 2.997142], [-3.254758, 2.814866],
[-2.947021, 3.04663], [-3.177994, 2.874514], [-3.110401, 3.071354],
[0.11111, 0.111111], [0.11111, -0.111111], [-0.11111, 0.111111],
[-0.11111, -0.11111], [0.234232, 0.34322], [-0.23223, 0.3323]]

# use kmeans directly
time1 = time.time()
kmeans = KMeans(n_clusters=5)
kmeans.fit(data)
print("use kmeans directly".center(80, "-"))
print("cur kmeans model: %s" % kmeans)
print("cur centers: %s" % kmeans.cluster_centers_)
print("labels of data: %s" % kmeans.predict(data))
time2 = time.time()
print("timecost: %0.3f" % (time2 - time1))
print("".center(80, "-"))

print
print

# use xmeans to find the right k
time3 = time.time()
xmeans = cluster_by_xmeans(data, n_init=20)
print("use xmeans to find the right k".center(80, "-"))
print("cur kmeans model: %s" % xmeans)
print("cur centers: %s" % xmeans.cluster_centers_)
print("labels of data: %s" % xmeans.predict(data))
time4 = time.time()
print("timecost: %0.3f" % (time4 - time3))
print("".center(80, "-"))
"""


# another test
"""
data0 = [(1.2, 0, 0), (1, 0, 0), (1.1, 0, 0),
(0, 0.9, 0), (0, 0.8, 0), (0, 1, 0),
(0, 0, 1), (0, 0, 1.3), (0, 0, 1.1)]

data1 = [(1, 0, 0), (1, 0, 0), (1, 0, 0),
(0, 1, 0), (0, 1, 0), (0, 1, 0),
(0, 0, 1), (0, 0, 1), (0, 0, 1)]

data2 = [(1.2, 0, 0), (1, 0, 0), (1.1, 0, 0),
(0, 0.9, 0), (0, 0.8, 0), (0, 1, 0),
(0, 0, 1), (0, 0, 1.3), (0, 0, 1.1), (1, 0, 1)]

time1 = time.time()
xmeans = cluster_by_xmeans(data0)
print("cur kmeans model: %s" % xmeans)
print("cur centers: %s" % xmeans.cluster_centers_)
print("labels of data: %s" % xmeans.predict(data0))
time2 = time.time()
print("timecost: %0.3f" % (time2 - time1))

print
print

time3 = time.time()
xmeans = cluster_by_xmeans(data1)
print("cur kmeans model: %s" % xmeans)
print("cur centers: %s" % xmeans.cluster_centers_)
print("labels of data: %s" % xmeans.predict(data1))
time4 = time.time()
print("timecost: %0.3f" % (time4 - time3))

print
print

time5 = time.time()
xmeans = cluster_by_xmeans(data2)
print("cur kmeans model: %s" % xmeans)
print("cur centers: %s" % xmeans.cluster_centers_)
print("labels of data: %s" % xmeans.predict(data2))
time6 = time.time()
print("timecost: %0.3f" % (time6 - time5))
"""


# plot result
"""
xmeans = cluster_by_xmeans(data, max_k=4)
data_predicted = xmeans.predict(data)
cluster_data_map = create_cluster_data_map(len(xmeans.cluster_centers_), data, data_predicted)
plot_by_clusters(cluster_data_map)
"""

求下一个2的幂

最直观

1
2
3
4
5
6
7
8
9
10
11
12
def getNextPowerOf2(x: Int): Int = {
if (x > (Int.MaxValue >> 1) + 1) {
return 0
}

if ((x & (x - 1)) == 0) {
return x
}

val k = (math.log10(x)/math.log10(2.0)).ceil
math.pow(2.0, k).toInt
}

移位操作

1
2
3
4
5
6
7
n--;           // 1101 1101 --> 1101 1100
n |= n >> 1; // 1101 1100 | 0110 1110 = 1111 1110
n |= n >> 2; // 1111 1110 | 0011 1111 = 1111 1111
n |= n >> 4; // ...
n |= n >> 8;
n |= n >> 16; // 1111 1111 | 1111 1111 = 1111 1111
n++; // 1111 1111 --> 1 0000 0000
  • 右移1位,做或,保证最高位、最高位下一位为“1”;这样有2位为“1”了,进而右移2位,做或,保证最高4位为“1”;进而在右移4位(不停的乘2),做或……最后得到所有位都为“1”,再n++,可得一个2的幂。
  • n--是为了处理n本身是2的幂的场景。如不做n--,会得到n的2倍作为结果。
  • 处理32位整数,可扩展,如处理64位整数,添加n >> 32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
def getNextPowerOf2(x: Int): Int = {
if (x > (Int.MaxValue >> 1) + 1) {
return 0
}

var n = x - 1
n |= n >> 1
n |= n >> 2
n |= n >> 4
n |= n >> 8
n |= n >> 16
n += 1
n
}

使用FFT加速卷积求算

原理

时域卷积 <-> 频域相乘

假设序列$x1$和$x2$的长度分别为$N1$和$N2$

  1. 将$x1$和$x2$通过在序列末补0方式,而使得序列的长度都为$N=(N1+N2-1)$。
  2. 对$x1$和$x2$求FFT,得$f1$和$f2$。
  3. 将$f1$和$f2$相乘(各个点对应相乘),得$f$。
  4. 对$f$进行IFFT。

代码示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# -*- coding: UTF-8 -*-

if __name__ == '__main__':
x1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
x2 = [5, 6, 7]

from numpy import convolve
print(convolve(x1, x2))

from numpy import zeros,real
from numpy.fft import fft, ifft
N = len(x1) + len(x2) - 1
f1 = fft(x1, N)
f2 = fft(x2, N)
f = f1 * f2
y = ifft(f)
y = real(y)
print(y)

自相关函数相关内容

以下根据统计定义,暂不考虑概率论定义。
$A_{t} = A_{1}, A_{2}, …, A_{n}$表示一组数据(时间序列)。

均值

平均数

$$\begin{aligned} \mu = E(A_{t}) = \frac{\sum_{i=1}^{n}A_{i}}{n} \end{aligned}$$

方差

方差用来衡量数据的离散程度。
每个数据点与数据均值差方的平均数。

$$\begin{aligned} \sigma^{2} = var(A_{t}) = E(A_{t} - \mu)^{2} = \frac{\sum_{i=1}^{n}(A_{i} - \mu )^{2}}{n} \end{aligned}$$

方差中除以n和除以n-1的区别
我们通常所说的方差有两种,一种是样本方差,一种是总体方差。
当求总体方差的时候,分母是$n$;当求样本方差的时候,分母是$n-1$。
在数理统计中,一般所求的都是样本方差,这就需要构造一个统计量样本方差(注意这是一个随机变量),需要使构造的统计量的期望与总体方差相等,这样才能使统计量具有无偏性。

  • 求总体方差的情形:比如,一个班50个人,每个人的数学成绩都知道,让你求平均数和方差。这时所说的方差就是总体方差了,这里不存在任何采样的问题,所以没有随机变量,也没有期望这个概念。
  • 求样本方差的情形:比如,有一亿个数据,我们想知道这个总体的方差是多少。去统计这一亿个数据是非常困难的,所以我们希望通过对总体抽取一万个样本,通过对样本方差的计算来估计出总体方差。这就需要我们来构造一个合适的统计量(样本方差)来估计总体方差。

标准差

方差开方,量纲与数据一致。
各数据点到均值的距离之平均。

$$\begin{aligned} \sigma = \sqrt{\sigma^{2}} \end{aligned}$$

协方差

协方差用来衡量两组数据(在各个相应时刻)“偏离各自均值”的一致程度。
如果两组数据的变化趋势一致,也就是说如果其中一个大于自身的期望值,另外一个也大于自身的期望值,那么两组数据之间的协方差就是正值(变化趋势相同)。
如果两组数据的变化趋势相反,即其中一个大于自身的期望值,另外一个却小于自身的期望值,那么两组数据之间的协方差就是负值(变化趋势相反)。
如果两组数据是统计独立的,那么二者之间的协方差就是0。但是反过来并不成立,即如果两组的协方差为0,二者并不一定是统计独立的(只能看出二者不是线性关系)。

$$\begin{aligned} r = cov(A_{t}, B_{t}) = E((A_{t} - \mu)(B_{t} - \nu)) = \frac{\sum_{i=1}^{n}(A_{i} - \mu )(B_{i} - \nu)}{n} \end{aligned}$$

协方差中除以n和除以n-1的区别,与方差相同

上述为二维数据(两组)。若数据为多维,则需要计算多个协方差。使用矩阵来组织这些协方差,给出协方差矩阵的定义$c_{i,j} = cov(Dom_{i}, Dom_{j})$。

相关系数

皮尔逊相关系数
将协方差去除量纲影响,进行标准化。

$$\begin{aligned} \rho = \frac{cov(A_{t}, B_{t})}{\sigma_{A} \sigma_{B}} \end{aligned}$$

相关系数取值$[-1, 1]$,绝对值越大,相关性越强。

自协方差

数据与其经过时间平移的数据之间的协方差。
衡量数据在两个不同时期之间的相关程度。

$$\begin{aligned} &r_{k} = cov(A_{t}, A_{t + k}) = E((A_{t} - \mu)(A_{t + k} - \mu)) = \frac{\sum_{i=1}^{n - k}(A_{i} - \mu )(A_{i + k} - \mu)}{n - k}\\ &k = 0,1, ..., n-1 \end{aligned}$$

平移后,参与自协方差计算的数据长度为$n-k$。
将$k$视为变量,则$r_{k}, k = 0,1, …, n-1$为自协方差函数
当$k=0$时,$r_{0} = \sigma^{2}$。

自相关系数

将自协方差去除量纲影响,进行标准化。

$$\begin{aligned} &\rho_{k} = \frac{cov(A_{t}, A_{t + k})}{\sigma^{2}} = \frac{r_{k}}{r_{0}}\\ &k = 0,1, ..., n-1 \end{aligned}$$

将$k$视为变量,则$\rho_{k}, k = 0,1, …, n-1$为自相关函数

MORE

在信号处理中,往往提前对信号进行无偏(均值为0)归一化(标准差为1)处理,此时计算自协方差系数不需要减去均值。
这样好处是可以通过点积计算离散序列的自协方差、自相关系数(见下文)。

对于离散信号,
$$\begin{aligned} r_{k} = \frac{\sum_{i=0}^{n-k-1} y(i)\bar{y}(i+k)}{n-k}, k = 0,1, ..., n-1 \end{aligned}$$
注意,这里分子上的运算其实是向量点乘(内积)运算。
下标为了对应程序代码,调整为$[0, n-k-1]$。

MORE

相关函数与卷积

上文离散信号为例,其自协方差函数结果,其实是一个卷积计算与$k$所决定的权重乘积的结果。(卷积算出来的是一个序列,而非一个值。乘以权重后,对应不同$k$值时,数据的自协方差,即自协方差函数值)

$$\begin{aligned} &r_{k} = \frac{\sum_{i=0}^{n-k-1}y(i)\bar{y}(i+k)}{n-k} \\ &\sum_{i=0}^{n-k-1} y(i)\bar{y}(i+k) = y(k) * \bar{y}(-k) \\ &k = 0,1, ..., n-1 \end{aligned}$$

$k$是自变量,表示延迟。$n$为序列长度。
$*$表示卷积,${f}(-k)$表示翻转$f(k)$。根据卷积含义(先翻转、再平移),可得上式。

而对应每个$k$值,求算自协方差$r_{k}$时,做的向量点乘(向量对应项相乘,再全部相加,得到一个数),其实就是在计算卷积结果中的一项

代码示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# -*- coding: UTF-8 -*-
import numpy as np

if __name__ == '__main__':
# 本示例数据未做标准化

# f(t)
x1 = [1, 7, 5, 1, 7, 5]
n = len(x1)

# k = 0, 1, 2, ..., n - 1
r = [0] * n
# acovf
for k in range(0, n):
x1_1 = x1[:n - k]
x1_2 = x1[k:]
sum = 0
for i in range(0, len(x1_1)):
sum = sum + x1_1[i] * x1_2[i]
r[k] = sum / (n - k)
print(r)

# f(-t)
x2 = [5, 7, 1, 5, 7, 1]
# convolution
y = np.convolve(x1, x2)
# weight: n - k
xi = range(1, n + 1)
d = xi + xi[:-1][::-1]
# acovf
r = (y / d)[n - 1:]
print(r)

对求得的自协方差函数值,全体除以$r_{0}$,即可得自相关函数

此外,