pyhon 進階分析 - 4.1 時間序列 | M5 資料簡介 ( M5 Time Series in python )

Поделиться
HTML-код
  • Опубликовано: 3 фев 2025

Комментарии • 1

  • @play_data
    @play_data  11 дней назад

    # 完整程式
    import sys
    sys.path.append(r'D:\Users\Pu_chang\Desktop\資料分析\UsefulML')
    from PreProcessing import TS_PreProcess, External_Variable
    import datetime
    import itertools # grid_search
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from prophet import Prophet
    from prophet.diagnostics import performance_metrics, cross_validation
    import warnings
    warnings.filterwarnings('ignore')
    '每個產品於每個商店銷量,時間 = d_1 ~ d_1913'
    path = 'D:/Users/Pu_chang/Desktop/資料分析/3. 推論分析/時間序列/data/M5/'
    validation = pd.read_csv(path + 'sales_train_validation.csv', encoding='utf-8',header=0)
    calender = pd.read_csv(path + 'calendar.csv', encoding='utf-8',header=0)
    evaluation = pd.read_csv(path + 'sales_train_evaluation.csv', encoding='utf-8',header=0)
    greater_than_zero = (validation.iloc[:,6:] > 0).sum(axis=1).reset_index()
    '易 : 全 > 0的跑,5859,2011-01-29 ~ 2016-04-24'
    def split_train_test(item, df1, df2) :
    train = df1.loc[item].reset_index().iloc[6:,:]
    train.columns = ['d', 'y']
    train['y'] = train['y'].astype(int)
    train = pd.merge(train, calender, on="d",how='inner')
    train = train.sort_values(by = 'date', ascending=True)

    test = df2.loc[item].reset_index().iloc[6:,:]
    test.columns = ['d', 'y']
    test['y'] = test['y'].astype(int)
    test = pd.merge(test, calender, on="d",how='inner')
    test['d'] = test['d'].replace(to_replace ='d_', value = '', regex = True).astype(int)
    test = test.loc[test['d']>=1914]
    test = test.sort_values(by = 'date', ascending=True)
    return train, test
    train, test = split_train_test(5859, validation, evaluation)