diff --git a/.gitignore b/.gitignore index 11614af..692947d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ __pycache__/ *.py[cod] *$py.class +*.hdf +.idea + # C extensions *.so diff --git a/Pipfile b/Pipfile index 3746950..a167b13 100644 --- a/Pipfile +++ b/Pipfile @@ -8,6 +8,7 @@ verify_ssl = true [packages] yfinance = "*" get-all-tickers = "*" +tables = "*" [requires] python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock index f9362ba..1c0ce2f 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6d68c8391a95cc9d4c416bc8550fc794d91f1fb51e7aa93621ff3ca52666ecaf" + "sha256": "070cf218be5255d1f48d5ec540dd5f7178f9bbe64faf22ff7d66168641440198" }, "pipfile-spec": 6, "requires": { @@ -51,6 +51,38 @@ ], "version": "==0.0.9" }, + "numexpr": { + "hashes": [ + "sha256:0eb8d1a949dcc3eea633438af939f406aaaf240ae69b4ab85ed0c11b8d5e77ee", + "sha256:208597cacb191fe983b4ae05dc9ae8177b17d82a0d9f34719d71ac614744f53b", + "sha256:280c316d56903d20a474c5e03c073371b8879842b8070606cef0c1ea7371933a", + "sha256:2cb778a74f315aafc8eded19781e444269bd45f4ce3095697595e5000dc20f8a", + "sha256:33a610bb775a84ab8ded0af4041df2e931ce7edf5b465ccd9851511429c86d0f", + "sha256:3d83f6f3d6d449eb82a4a5bd56b9d61c9e1ade65b1188052700171051329888b", + "sha256:4655276892b5274015377a4487e1c57cc257c666e5578e12679029cc1124fb08", + "sha256:49f835568c864b444fa6fccf64cc01ff51a6171311742451ac4a176df471f9d8", + "sha256:57b7fcf2d0a1370bc9a380f3a96f6d10e4dfab5081b61a198a8d23b80c33e634", + "sha256:57d9ccd0820b7f5b1bed5100dd54a5ae52c39eb5b7e54317ae29e31ed9bd9edf", + "sha256:583fcf614521edf6eb1326e982d6fe3951dbd451d63e51f7438f0142b491d43f", + "sha256:59984617a50369670a88a0f0b6decdf59a93828dc42e29c8851bcffcedf0695b", + "sha256:659cee220ebe4bf88cb527ca9723d7cb390e93cbae8729ff5e927d06713bad26", + "sha256:687fa9521dbafb130f42d61462f968f211f7eb364f2789c5fbe65d82809ad6b2", + "sha256:78c7040baf20036f0d85308fd5f8322e30d553b8daff1de264394014feb62cc0", + "sha256:7cd5369c2f8cb4bac57571e52bca1a9ccc0260567cefa39ac40680dad0e9df4c", + "sha256:81ff83abc969288673ad37055fef3e5e80cdc87f90245b76c0af9bdef6d5c509", + "sha256:841c23811b00f35b4ce2c330b57c4398ff4a61af4488ce0e013e5039bba68188", + "sha256:84d10e27833a5be6c9a61350cba2acb2f36af1e71c4d47c390b4cc80704ccb55", + "sha256:9e7dbf2a849c34f5e61f9b8119688108f7b5dec97ee8ea2946440bc69a4b28d0", + "sha256:9f91ea6385f743d5ef5ef0a074270a057115d8a4c57625800dd25b5912f563b2", + "sha256:a478e224a23609e1bef45b44a65aad2f158a3072947fc0085c231953b1fafdcd", + "sha256:b0d239d9827e1bcee08344fd05835823bc60aff97232e35a928214d03ff802b1", + "sha256:c169e1424d495b7efefe69c046cbf89ae0dc7a071a89b6b844ae328ac48fccbc", + "sha256:e518918a077478523d89060a8eb59178fd80f7f1273fe1a74088c46163fa49b5", + "sha256:e6a7d0c269a3d9e117072551e78ec5332ece7297f80acf6447d701de0328e7df", + "sha256:eb2bd8656ee2a92b2e928904d6b7ad434f559b1f74a381ff5f36ad987badd1a6" + ], + "version": "==2.7.1" + }, "numpy": { "hashes": [ "sha256:04c7d4ebc5ff93d9822075ddb1751ff392a4375e5885299445fcebf877f179d5", @@ -136,6 +168,34 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.15.0" }, + "tables": { + "hashes": [ + "sha256:169450bd11959c0e1c43137e768cf8b60b2a4f3b2ebf9a620e21865dc0c2d059", + "sha256:361da30289ecdcb39b7648c786d8185f9ab08879c0a58a3fc56dab026e122d8e", + "sha256:4628e762a8aacfa038cdae118d2d1df9a9ddd9b4a82d6993f4bcbfa7744a9f8a", + "sha256:49a972b8a7c27a8a173aeb05f67acb45fe608b64cd8e9fa667c0962a60b71b49", + "sha256:6055dd1d3ec03fd25c60bb93a4be396464f0640fd5845884230dae1deb7e6cc6", + "sha256:6e13a3eaf86f9eba582a04b44929ee1585a05dd539d207a10a22374b7e4552ca", + "sha256:7acbf0e2fb7132a40f441ebb53b53c97cee05fb88ce743afdd97c681d1d377d7", + "sha256:8c96c5d4a9ebe34b72b918b3189954e2d5b6f87cb211d4244b7c001661d8a861", + "sha256:8ea87231788cfd5773ffbe33f149f778f9ef4ab681149dec00cb88e1681bd299", + "sha256:94d7ccac04277089e3bb466bf5c8f7038dd53bb8f19ea9679b7fea62c5c3ae8f", + "sha256:950167d56b45ece117f79d839d5d55f0cb45bfca20290fa9dcd70255282f969e", + "sha256:9d06c5fda6657698bae4fbe841204625b501ddf2e2a77131c23f3d3ac072db82", + "sha256:acb3f905c63e437023071833744b3e5a83376dc457f413f0840d8d50dd5d402b", + "sha256:bfdbcacffec122ce8d1b0dd6ffc3c6051bedd6081e20264fa96165d43fc78f52", + "sha256:c0b97a7363941d9518573c217cb5bfe4b2b456748aac1e9420d3979f7d5e82d2", + "sha256:d95faa1174653a738ac8183a95f050a29a3f69efac6e71f70cde8d717e31af17", + "sha256:db163df08ded7804d596dee14d88397f6c55cdf4671b3992cb885c0b3890a54d", + "sha256:ea4b41ed95953ad588bcd6e557577414e50754011430c27934daf5dbd2d52251", + "sha256:eed1e030bb077476d585697e37f2b8e37db4157ff93b485b43f374254cff8698", + "sha256:f1327aeef8b6c0fec5aae9f5f5a57b2d8ec98c08495fd09471b749ea46de9eb0", + "sha256:f9c88511483c8fd39e7841fc60bc7038c96eeb87fe776092439172e1e6330f49", + "sha256:fd63c94960f8208cb13d41033a3114c0242e7737cb578f2454c6a087c5d246ec" + ], + "index": "pypi", + "version": "==3.6.1" + }, "urllib3": { "hashes": [ "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a", diff --git a/main.py b/main.py index ab8ae52..3c32127 100644 --- a/main.py +++ b/main.py @@ -3,9 +3,10 @@ import os import pandas import yfinance as yf from get_all_tickers.get_tickers import get_tickers +from pandas import DataFrame PROJ_PATH = os.path.dirname(__file__) -ticker_data_file = os.path.join(PROJ_PATH, "ticker_data_NYSE.csv") +ticker_data_file = os.path.join(PROJ_PATH, "ticker_data_NYSE.hdf") def main(): @@ -23,10 +24,72 @@ def main(): # data.to_csv(ticker_data_file) -def transform_data(): - data = pandas.read_csv(ticker_data_file) +def test(): + nyse_tickers = get_tickers(NASDAQ=False, AMEX=False) + print(nyse_tickers) + # data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="60m", ) + data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="1d", ) + print(data) + print(data.keys()) + data.to_hdf(ticker_data_file, key='Date') + exit() + + +def download_test_data(): + if os.path.exists(ticker_data_file): + print("file already exists, won't download") + return + nyse_tickers = get_tickers(NASDAQ=False, AMEX=False) + data = yf.download(tickers=" ".join(nyse_tickers), interval="1d") + print("storing data ...") + data.to_hdf(ticker_data_file, key="Date") + print("storing data Done") + + +def transform_data() -> DataFrame: + print("reading data ...") + data = pandas.read_hdf(ticker_data_file) + print("reading done ...") + + return data + + +def get_day_x_delta_data(df: DataFrame, day_delta: int = 1, absolute=False) -> DataFrame: + if absolute: + return df.diff(day_delta).add_suffix('_{}d'.format(day_delta)) + return (df.diff(day_delta) * 100 / df).add_suffix('_{}d%'.format(day_delta)) if __name__ == '__main__': # main() - transform_data() + try: + data = transform_data() + except FileNotFoundError: + download_test_data() + data = transform_data() + + print("keys:") + print(data.keys()) + print("index:") + print(data.index) + + print(data) + + print(get_day_x_delta_data(data, 1, True)) + print(get_day_x_delta_data(data, 1)) + exit() + print(data.isnull().sum() / data.shape[0] * 100) + + + first_loc = data.index.get_loc(data.index[0]) + + print(type(data)) + # print(data.diff()) + for day_diff in range(1, 8): # one to 7 days + d_day = data.diff(day_diff) / data + d_day = d_day.add_suffix('-1%') + print(d_day) + print(data.diff(2) / data) + + print(pandas.concat([data, d_day])) + # print(data["-1"])