#!/usr/bin/env python
# coding: utf-8

# # Get tables with date conversion
# ## Examples national accounts
# The national accounts tables have the property that they have the same structure but different frequency.
# Then we can easily use the same query and post it towards different tables' metadata in order to retrieve figuress
# Makroøkonomiske hovedstørrelser:
# - [09189](https://www.ssb.no/statbank/table/09189) - årlig
# - [09190](https://www.ssb.no/statbank/table/09190) - kvartalsvis
# - [11721](https://www.ssb.no/statbank/table/11721) - månedlig
#

# Import packages
import requests
import pandas as pd
from pyjstat import pyjstat  # pyjstat er for behandling av JSON-stat


# URL to tablel's metadata i PxWebApi, where we can post the query
tabid = "09190" # try to run using ID for year (09189), quarter (09190)  and month (11721)
lang = "en"  # vi kan velge "en"
url1 = "https://data.ssb.no/api/v0/" + lang + "/table/" + tabid;


# We query for annual volume change for GDP total and GDP mainland Norway for the period 2010-2029,
# API query formulated as JSON, using filter "all" for time.

query1 = {
  "query": [
    {
      "code": "Makrost",
      "selection": {
        "filter": "item",
        "values": [
          "bnpb.nr23_9",
          "bnpb.nr23_9fn"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "Volum"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "all",
        "values": [
         "201*",
          "202*"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
}
# --- End query --- #

# ### General function to identify frequency
# so that we can convert to date format and set [**Pandas PeriodIndex**](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-span-representation)

def dateConv(dataframe):
    frekvens, frek_no, frek_en, fmt = findFrequency(dataframe)
    setPeriodIndex(frekvens, frek_no, frek_en, fmt, dataframe)

# function to identify frequency
def findFrequency(dataframe):
    frekvenser = ['måned', 'kvartal', 'uke', 'år', 'year', 'quarter', 'month', 'week']
    frek_no = ''  #statistics Norway unfortunately uses Norwegian and not English frequency codes U - week etc.
    frek_en = '' # English frequency code
    fmt = '' # python date format
    for w in frekvenser:
        if w in dataframe.columns:
            if w in ['måned', 'month']:
                frek_no = 'M'
                frek_en = 'M'
                fmt = '%YM%m'
            elif w in ['kvartal', 'quarter']:
                frek_no = 'K'
                frek_en = 'Q'
            elif w in ['uke', 'week']:
                frek_no = 'U';
                frek_en = 'W'
                fmt = '%YW%W-%w'
            else:
                frek_no = ''
                frek_en = 'A'
            return w, frek_no, frek_en, fmt

# function to convert to date format and set the Pandas PeriodIndex.
def setPeriodIndex(frekvens, frek_no, freq_en, fmt, df):
    if frekvens in ['kvartal', 'quarter']:
        # Replace K with Q, convert to date format and set frequency to Pandas PeriodIndex.
        # Pandas recognizes frequency codes in English, but Statistics Norway's are in Norwegian
        df.index = pd.PeriodIndex(pd.to_datetime(df[frekvens].str.replace(frek_no, freq_en), errors='coerce'), freq='Q-DEC')
    elif frekvens in ['uke', 'week']:
        df.index = pd.PeriodIndex(pd.to_datetime(df[frekvens].str.replace(frek_no, freq_en).add('-0'), format= fmt, errors='coerce'), freq='W-MON')
    else:
        df.index =  pd.PeriodIndex(pd.to_datetime(df[frekvens], format= fmt, errors='coerce'), freq=freq_en)
# --- End functions --- #

# ### Here we post the query
res1 = requests.post(url1, json=query1)

# Reads the JSON-stat result using the library [pyjstat](https://pypi.org/project/pyjstat/)
ds1 = pyjstat.Dataset.read(res1.text)

# Write the resulting dataset to a Pandas dataframe
df1 = ds1.write('dataframe')

df1.head(7)

# Call the function dateConv that converts from category to date format.
dateConv(df1)

# This is now set as an index using Pandas PeriodIndex.
df1.index

df1.info()

# Plot using pandas groupby - gives two charts
df1.groupby('makrostørrelse').plot( use_index=True, y='value', marker="o", markersize=3)

# pivot dataset in order to get lines in one chart
df2 = df1.pivot(columns = 'makrostørrelse', values='value')

# Increase the size and set Statistics Norway's colors
df2.plot(figsize=(16, 8), color=['#1a9d49', '#1d9de2'], title="BNP årlig volumendring %")
