8-1. 원자재가격과 주가의 관계 분석_1 [Code]

각종 원자재 가격과 주가의 상관관계를 알아보기위해 5년간 KOSPI 주가의 등락과 원자재가격의 등락을 비교하고 해당기업과 원자재 간의 관계를 해석해 보았다.

[main code]

원자재가격과 주가의 관계 분석

import pandas as pd
from pandas import Series,DataFrame
import numpy as np
from dateutil.parser import parse
import dateutil

# For Visualization
import matplotlib.pyplot as plt
from matplotlib import pyplot
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

# For time stamps
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import datetime

path = "c:/Windows/Fonts/malgun.ttf"
import platform
from matplotlib import font_manager, rc
if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
else:
    print('Unknown system... sorry~~~~')    

plt.rcParams['axes.unicode_minus'] = False

데이터 전처리

KOSPI만 선택

item_Charge = pd.read_csv("st_data_itemChargeFull.tsv", sep = '\t')
item_Charge['date'] = item_Charge['date'].apply(str)
item_Charge['date'] = item_Charge['date'].apply(dateutil.parser.parse)
KOSPI = item_Charge.loc[item_Charge[list(item_Charge.columns)[2]]=="KOSPI",:]
KOSPI.head()
itemcodeitemnamemarketdateclose_valchange_valchange_rateacc_quantdebt_totalface_val...operating_profitoperating_profit_increasing_rateprev_quantproperty_totalreserve_ratioroaroesalessales_increasing_ratetrade_stop_yn
0000020동화약품KOSPI2015-01-075540.070.01.2642517.0732.01000...112.0NaN62744.03706.0967.43NaNNaN3066.0NaNN
1000020동화약품KOSPI2015-01-135600.040.00.7139527.0732.01000...112.0NaN62744.03706.0967.43NaNNaN3066.0NaNN
2000020동화약품KOSPI2015-01-165640.0-110.0-1.9557341.0732.01000...112.0NaN62744.03706.0967.43NaNNaN3066.0NaNN
3000020동화약품KOSPI2015-01-195680.040.00.7049574.0732.01000...112.0NaN62744.03706.0967.43NaNNaN3066.0NaNN
4000020동화약품KOSPI2015-01-225550.0-60.0-1.0877386.0732.01000...112.0NaN62744.03706.0967.43NaNNaN3066.0NaNN

5 rows × 27 columns

KOSPI_=KOSPI.pivot_table('close_val',index='date',columns='itemname')
KOSPI_clo = KOSPI_.dropna(axis = 1 ).fillna(method='ffill').pct_change().fillna(0)
KOSPI_val = KOSPI_.dropna(axis = 1 ).fillna(method='ffill')
KOSPI_val.head()
itemnameAJ렌터카AK홀딩스BGFBNK금융지주BYCBYC우CJCJ CGVCJ대한통운CJ씨푸드...후성휠라코리아휴니드휴비스휴스틸휴켐스흥국화재흥국화재2우B흥국화재우흥아해운
date
2015-01-0717750.086600.074500.013500.0252000.0168500.0158500.059800.0183500.02595.0...3080.0119000.04510.011600.017350.025550.03730.06140.02460.02805.0
2015-01-1317650.088000.073700.014000.0254000.0170000.0161500.060500.0192000.02590.0...3245.0118000.04820.011750.017600.025200.03900.06200.02465.02655.0
2015-01-1617300.092300.075100.014000.0252500.0169000.0164500.059300.0182000.02560.0...3215.0115000.04970.011800.017150.026800.04230.06160.02410.02710.0
2015-01-1917250.088200.075100.014250.0252500.0170500.0164500.059300.0182000.02540.0...3160.0115500.05050.011800.016850.027050.04190.06300.02450.02575.0
2015-01-2217000.090300.075000.013950.0251000.0175000.0168500.059600.0172000.02520.0...3205.0116000.05090.011650.016800.026800.04175.06180.02495.02570.0

5 rows × 802 columns

st_data_goodsForeign = pd.read_csv("st_data_goodsForeign.tsv", sep = '\t')
st_data_goodsForeign['date'] = st_data_goodsForeign['date'].apply(str)
st_data_goodsForeign['date'] = st_data_goodsForeign['date'].apply(dateutil.parser.parse)
st_data_goodsForeign.head()
itemcodeitemnamedateclose_valchange_valopen_valhigh_vallow_val
0LME@AAY알루미늄합금2015-01-071790.0-15.01790.01790.01780.0
1LME@AAY알루미늄합금2015-01-131835.0NaN1835.01835.01825.0
2LME@AAY알루미늄합금2015-01-161820.0NaN1820.01820.01810.0
3LME@AAY알루미늄합금2015-01-191820.0NaN1820.01820.01810.0
4LME@AAY알루미늄합금2015-01-221870.010.01870.01870.01860.0
goodsForeign=st_data_goodsForeign.pivot_table('close_val',index='date',columns='itemname')
goodsForeign_clo = goodsForeign.fillna(method='ffill').pct_change().fillna(0)
goodsForeign_val = goodsForeign.fillna(method='ffill')
goodsForeign_val.head()
itemname구리난방유니켈대두대두박대두유설탕소맥아연알루미늄합금옥수수주석천연가스
date
2015-01-076170.01.69991820.015455.01065.6368.532.9814.78579.411.4102145.51790.0411.419680.02.871
2015-01-135915.01.63301800.514780.01015.6349.732.3914.87548.011.2402083.01835.0400.219555.02.943
2015-01-165660.01.66561766.514360.01003.0326.233.3915.33532.611.3252056.51820.0401.019325.03.127
2015-01-195671.01.64111827.514400.01003.0326.233.3915.33532.611.3252068.51820.0401.019280.02.995
2015-01-225742.01.63791881.514720.0988.6330.131.9715.91533.611.1602144.01870.0399.219580.02.835
TD = pd.concat([KOSPI_val,goodsForeign_val],axis=1)
TD = TD.reset_index().fillna(method='ffill')
for i in range(0,336):
    if i%3 != 0:
        TD.drop( i,inplace = True)
FTD = TD.set_index('date')
FTD.head()
itemnameAJ렌터카AK홀딩스BGFBNK금융지주BYCBYC우CJCJ CGVCJ대한통운CJ씨푸드...대두박대두유설탕소맥아연알루미늄합금옥수수주석천연가스
date
2015-01-0717750.086600.074500.013500.0252000.0168500.0158500.059800.0183500.02595.0...368.532.9814.78579.411.4102145.51790.0411.419680.02.871
2015-01-1917250.088200.075100.014250.0252500.0170500.0164500.059300.0182000.02540.0...326.233.3915.33532.611.3252068.51820.0401.019280.02.995
2015-02-0317350.098000.085000.014450.0282000.0171500.0173000.060200.0200000.02525.0...340.630.8014.47513.610.3852146.51840.0401.018950.02.754
2015-02-1216500.091500.088000.014450.0295000.0170500.0177500.064700.0197000.02600.0...330.532.0115.04521.210.3302129.51785.0398.217900.02.713
2015-02-2716800.0101000.089800.014900.0304500.0186500.0173500.064300.0181000.02610.0...353.732.8013.77517.410.4702060.51800.0401.217920.02.734

5 rows × 817 columns

del TD['date']
TD.head()
corr = TD.corr(method = 'pearson')
corr_TD = corr.iloc[:-16,-15:]
corr_TD.head()
itemname구리난방유니켈대두대두박대두유설탕소맥아연알루미늄합금옥수수주석천연가스
itemname
AJ렌터카0.2534340.357430-0.1806750.491654-0.1159870.033026-0.198628-0.5519200.5781820.134875-0.1179260.3547600.391342-0.2387370.049452
AK홀딩스0.2841430.319556-0.0289060.5720110.1771180.423597-0.119239-0.4495310.4801140.331932-0.0063320.6689360.434461-0.125864-0.080484
BGF-0.840062-0.796421-0.665365-0.6636810.135000-0.1717800.1128960.434287-0.213475-0.261622-0.742201-0.2378370.009903-0.649587-0.506187
BNK금융지주0.0858140.101259-0.2387050.3524530.0750160.130878-0.052747-0.3650240.4460030.125338-0.2641360.5367340.446926-0.389719-0.135423
BYC-0.787079-0.844172-0.445330-0.7396220.107104-0.3275910.2825400.573646-0.448948-0.358966-0.559299-0.355283-0.213558-0.434541-0.579782

데이터 시각화

원자재 가격과 양의 상관관계를 갖는 주식

corr_TD.iloc[:,0].sort_values(ascending=False).head()
itemname
POSCO     0.890243
SK하이닉스    0.885247
하나금융지주    0.865733
NH투자증권    0.858859
KB금융      0.854524
Name: 구리, dtype: float64
from sklearn import preprocessing

col = ['POSCO', 'SK하이닉스', '하나금융지주', 'NH투자증권', 'KB금융', '구리']

x = FTD[col].values
min_max_scaler = preprocessing.MinMaxScaler()

x_scaled = min_max_scaler.fit_transform(x.astype(float))
cu = pd.DataFrame(x_scaled, columns = col, index = FTD.index)

cu.head()
POSCOSK하이닉스하나금융지주NH투자증권KB금융구리
date
2015-01-070.5462960.3302610.3092780.1551070.1541610.644206
2015-01-190.5138890.3241170.3711340.2749050.1950890.471332
2015-02-030.4282410.3195080.3446240.2749050.2346520.479647
2015-02-120.4629630.3187400.2430040.3064310.2032740.479647
2015-02-270.4907410.3179720.2842420.4388400.2633020.543911
plt.plot(cu['POSCO'],ls = ":", label = 'POSCO')
plt.plot(cu['SK하이닉스'],ls = ":", label = 'SK하이닉스')
plt.plot(cu['하나금융지주'],ls = ":", label = '하나금융지주')
plt.plot(cu['NH투자증권'],ls = ":", label = 'NH투자증권')
plt.plot(cu['KB금융'],ls = ":", label = 'KB금융')
plt.plot(cu['구리'], label = '구리')

plt.legend(loc = 'best')
plt.grid()
plt.show()
plt.savefig('구리.jpg')

png

<Figure size 432x288 with 0 Axes>
corr_TD.iloc[:,1].sort_values(ascending=False).head()
itemname
금호석유      0.849365
신대양제지     0.792479
SK하이닉스    0.788649
LS산전      0.786274
KG케미칼     0.779705
Name: 난방유, dtype: float64
from sklearn import preprocessing

col = ['금호석유', '신대양제지', 'SK하이닉스', 'LS산전', 'KG케미칼', '난방유']

x = FTD[col].values
min_max_scaler = preprocessing.MinMaxScaler()

x_scaled = min_max_scaler.fit_transform(x.astype(float))
cu = pd.DataFrame(x_scaled, columns = col, index = FTD.index)

cu.head()
금호석유신대양제지SK하이닉스LS산전KG케미칼난방유
date
2015-01-070.5081480.1419160.3302610.4865530.2413790.524168
2015-01-190.5555560.1125750.3241170.5452320.2959770.483911
2015-02-030.6296300.1437130.3195080.5452320.2183910.624538
2015-02-120.5407410.1598800.3187400.5599020.1781610.670546
2015-02-270.5511110.1712570.3179720.5794620.2212640.711625
plt.plot(cu['금호석유'],ls = ":", label = '금호석유')
plt.plot(cu['신대양제지'],ls = ":", label = '신대양제지')
plt.plot(cu['SK하이닉스'],ls = ":", label = 'SK하이닉스')
plt.plot(cu['LS산전'],ls = ":", label = 'LS산전')
plt.plot(cu['KG케미칼'],ls = ":", label = 'KG케미칼')
plt.plot(cu['난방유'], label = '난방유')

plt.legend(loc = 'best')
plt.grid()
plt.show()

png

corr_TD.iloc[:,13].sort_values(ascending=False).head()
itemname
카프로      0.838551
LS       0.837574
이구산업     0.816536
POSCO    0.803074
황금에스티    0.779738
Name: 주석, dtype: float64
from sklearn import preprocessing

col = ['카프로', 'LS', '이구산업', 'POSCO', '황금에스티', '주석']

x = FTD[col].values
min_max_scaler = preprocessing.MinMaxScaler()

x_scaled = min_max_scaler.fit_transform(x.astype(float))
cu = pd.DataFrame(x_scaled, columns = col, index = FTD.index)

cu.head()
카프로LS이구산업POSCO황금에스티주석
date
2015-01-070.0611840.4106690.0620350.5462960.0244150.741765
2015-01-190.2184210.3471630.0645160.5138890.0000000.694706
2015-02-030.1796050.3590180.0967740.4282410.0188200.655882
2015-02-120.1875000.3327690.1141440.4629630.0345880.532353
2015-02-270.2289470.4022020.1687340.4907410.0722280.534706
plt.plot(cu['카프로'],ls = ":", label = '카프로')
plt.plot(cu['LS'],ls = ":", label = 'LS')
plt.plot(cu['이구산업'],ls = ":", label = '이구산업')
plt.plot(cu['POSCO'],ls = ":", label = 'POSCO')
plt.plot(cu['황금에스티'],ls = ":", label = '황금에스티')
plt.plot(cu['주석'], label = '주석')

plt.legend(loc = 'best')
plt.grid()
plt.show()

png

원자재 가격과 음의 상관관계를 갖는 주식

corr_TD.iloc[:,0].sort_values(ascending=True).head()
itemname
미래아이앤지   -0.871926
BGF      -0.840062
CJ대한통운   -0.820994
한국전력     -0.798898
하이트론     -0.793636
Name: 구리, dtype: float64
col = ['미래아이앤지', 'BGF', 'CJ대한통운', '한국전력', '하이트론', '구리']

x = FTD[col].values
min_max_scaler = preprocessing.MinMaxScaler()

x_scaled = min_max_scaler.fit_transform(x.astype(float))
cu = pd.DataFrame(x_scaled, columns = col, index = FTD.index)

cu.head()
미래아이앤지BGFCJ대한통운한국전력하이트론구리
date
2015-01-070.4019040.3062230.5477390.4837400.2791880.644206
2015-01-190.3812090.3089710.5326630.3794040.2318100.471332
2015-02-030.4019040.3543070.7135680.4607050.2005080.479647
2015-02-120.3543050.3680450.6834170.5311650.3705580.479647
2015-02-270.3770700.3762880.5226130.5257450.4940780.543911
plt.plot(cu['미래아이앤지'],ls = ":", label = '미래아이앤지')
plt.plot(cu['BGF'],ls = ":", label = 'BGF')
plt.plot(cu['CJ대한통운'],ls = ":", label = 'CJ대한통운')
plt.plot(cu['한국전력'],ls = ":", label = '한국전력')
plt.plot(cu['하이트론'],ls = ":", label = '하이트론')
plt.plot(cu['구리'], label = '구리')

plt.legend(loc = 'best')
plt.grid()
plt.show()

png

corr_TD.iloc[:,1].sort_values(ascending=True).head()
itemname
BYC      -0.844172
NPC      -0.827650
동성화학     -0.823397
조일알미늄    -0.821647
미래아이앤지   -0.814216
Name: 난방유, dtype: float64
col = ['BYC', 'NPC', '동성화학', '조일알미늄', '미래아이앤지', '난방유']

x = FTD[col].values
min_max_scaler = preprocessing.MinMaxScaler()

x_scaled = min_max_scaler.fit_transform(x.astype(float))
cu = pd.DataFrame(x_scaled, columns = col, index = FTD.index)

plt.plot(cu['BYC'],ls = ":", label = 'BYC')
plt.plot(cu['NPC'],ls = ":", label = 'NPC')
plt.plot(cu['동성화학'],ls = ":", label = '동성화학')
plt.plot(cu['조일알미늄'],ls = ":", label = '조일알미늄')
plt.plot(cu['미래아이앤지'],ls = ":", label = '미래아이앤지')
plt.plot(cu['난방유'], label = '난방유')

plt.legend(loc = 'best')
plt.grid()
plt.show()

png

corr_TD.iloc[:,13].sort_values(ascending=True).head()
itemname
한세예스24홀딩스   -0.901462
쿠쿠홀딩스       -0.885435
S&T홀딩스      -0.884960
전방          -0.870637
사조해표        -0.868335
Name: 주석, dtype: float64
col = ['한세예스24홀딩스', '쿠쿠홀딩스', 'S&T홀딩스', '전방', '사조해표', '주석']

x = FTD[col].values
min_max_scaler = preprocessing.MinMaxScaler()

x_scaled = min_max_scaler.fit_transform(x.astype(float))
cu = pd.DataFrame(x_scaled, columns = col, index = FTD.index)

plt.plot(cu['한세예스24홀딩스'],ls = ":", label = '한세예스24홀딩스')
plt.plot(cu['쿠쿠홀딩스'],ls = ":", label = '쿠쿠홀딩스')
plt.plot(cu['S&T홀딩스'],ls = ":", label = 'S&T홀딩스')
plt.plot(cu['전방'],ls = ":", label = '전방')
plt.plot(cu['사조해표'],ls = ":", label = '사조해표')
plt.plot(cu['주석'], label = '주석')

plt.legend(loc = 'best')
plt.grid()
plt.show()

png


© 2020. All rights reserved.