rm(list=ls())
library(tidyverse)
## -- Attaching packages ------------------------------------------------------ tidyverse 1.3.0 --
## √ ggplot2 3.2.1     √ purrr   0.3.3
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   1.0.0     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0
## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
#path to the folder that contains all the datasets
datafolder <- "oecd-data/"
( filesinfolder <- list.files(datafolder) )
##  [1] "Albania"            "Algeria"            "Armenia"           
##  [4] "Austria"            "Belarus"            "Belgium"           
##  [7] "Bulgaria"           "Cyprus"             "Egypt"             
## [10] "France"             "Germany"            "Hondura"           
## [13] "Hungary"            "Iran"               "Iraq"              
## [16] "Israel"             "Italy"              "Jordan"            
## [19] "Kosovo"             "Kuwait"             "Lebanon"           
## [22] "Libya"              "Malta"              "Morocco"           
## [25] "Oman"               "Poland"             "Portugal"          
## [28] "Quatar"             "Romania"            "Russian Federetion"
## [31] "Saudi"              "secondstudygroup"   "Servia"            
## [34] "Slovak Republic"    "Slovania"           "Spain"             
## [37] "Syria"              "Tunisia"            "Ukraine"
#Checking all the country files through the for loop
for(i in 1:length(filesinfolder)) {
  this_folder <- filesinfolder[i]
  this_folder_path <- paste(datafolder, this_folder,sep="")
  this_folder_files <- list.files(this_folder_path)
    for(j in 1:length(this_folder_files)){
      print(this_folder_files[j])
    }
}
## [1] "~$Albania.xlsm"
## [1] "92686511-4ee6-405b-a5e2-57246e69e3f3.xlsx"
## [1] "a7b96d74-521b-4ea8-b2b4-a8ddfb789530.xlsx"
## [1] "c6ca1ce3-f839-4d6e-abe2-0cda0e1d9c42.xlsx"
## [1] "c926cb2f-61bc-46e6-a572-126ccec076ff.xlsx"
## [1] "d5aa03f8-6fa9-4e87-8449-21d83f3bd646.xlsx"
## [1] "06cac31f-5ca9-4dce-bb14-54a2bc2c788e.xlsx"
## [1] "3277d5f5-3c39-4713-9321-2318ed8439b0.xlsx"
## [1] "597e9686-dd6f-496e-a5b0-f91c14b1cc25.xlsx"
## [1] "e75fb9fd-6fda-4dcf-91d3-bca924588944.xlsx"
## [1] "e7f0c675-ebc6-4fdd-a5a2-3ebf95041e47.xlsx"
## [1] "3c7db986-2f29-4619-9370-79a20da74fcb.xlsx"
## [1] "60644764-faea-4449-b5d6-4c44c7410987.xlsx"
## [1] "81f8636c-1f9a-4859-a886-6bfafb5f08aa.xlsx"
## [1] "a9276f77-ad1c-4a2f-b2ba-4df78397aa29.xlsx"
## [1] "d04e6c95-ae6a-4cf9-9e0e-191cc5e914b9.xlsx"
## [1] "45bcbb29-9ae8-4e20-906d-961618423476.xlsx"
## [1] "66d317b7-2f7f-4760-a955-de2e794b0497.xlsx"
## [1] "a06bc1ea-bb9a-42ac-8dad-a6ad2f58a6e8.xlsx"
## [1] "bb02d3d2-56a8-4a99-9cb4-7c0e0a40c1d2.xlsx"
## [1] "f575a792-e669-4179-a4b9-ec96a380c138.xlsx"
## [1] "1e4a0ec1-e1b3-49fc-9e25-330808adb6bf.xlsx"
## [1] "8d9f7276-e5db-4bab-aad6-0855fe3ace5c.xlsx"
## [1] "c94bff56-5ff8-4ce1-a863-73ed778e49af.xlsx"
## [1] "ee974651-8db8-496e-b01a-b9b38b830752.xlsx"
## [1] "f7d5bcad-4241-4e4f-83c4-6cfc9331ee78.xlsx"
## [1] "02af3eb7-f807-43bd-948a-0d2a816da0ad.xlsx"
## [1] "0b1d17e4-154b-49af-876c-d690ca4aed80.xlsx"
## [1] "234890a4-c871-401c-b04d-32c00a451ad8.xlsx"
## [1] "d293052c-a9cc-46f9-a2f4-e028a7d79958.xlsx"
## [1] "f368c5d3-f2d3-4c4b-bb1d-a4f39c15f862.xlsx"
## [1] "1f25460d-dc8d-4f29-be2e-48bdb4ad00bf.xlsx"
## [1] "1f694aa9-275e-4f72-be8c-52a8ef5e6b54.xlsx"
## [1] "7da8c8c7-0c86-44be-9c6c-d292ccd2d4f0.xlsx"
## [1] "80d397c4-fd5c-423c-afdc-f652e805287d.xlsx"
## [1] "f4b9890b-8bc6-4e5e-843f-eebb0862f7a4.xlsx"
## [1] "37351b7e-c29a-4e76-a375-886dede2959b.xlsx"
## [1] "476c4a6d-7443-4d05-bc7b-0042907d98cc.xlsx"
## [1] "d50bd147-fe70-4c1f-a802-ce87a6f52267.xlsx"
## [1] "d6b84cdf-a54e-43cb-999a-6ea8478d58b5.xlsx"
## [1] "f2028226-9beb-4548-be36-4ce15f3b4575.xlsx"
## [1] "01be735c-61dd-4bce-b5cf-9419cd2686f8.xlsx"
## [1] "0a7f9435-dcca-49c0-9350-960b0e092de6.xlsx"
## [1] "4cf3375d-c634-44bc-a9ab-890ee574279b.xlsx"
## [1] "5a81e301-fd82-4f36-aedf-b72562178dff.xlsx"
## [1] "a4246aea-adf8-4798-a504-8dda5639583a.xlsx"
## [1] "1ce7e491-dbb5-4636-a1b0-9ab6fb5a107e.xlsx"
## [1] "34bf6bc1-b3a9-4306-a493-9d7d56478b03.xlsx"
## [1] "480585ac-f6c1-4a78-9bdb-07251bf20821.xlsx"
## [1] "6205ddd4-677c-4615-a339-711f9b2ceceb.xlsx"
## [1] "f3facbb7-4994-49f0-ac26-d5b837be887c.xlsx"
## [1] "35133d99-a53a-4384-8af7-f5b22384a000.xlsx"
## [1] "765de670-826f-4af1-8fab-3a5d0a89cba7.xlsx"
## [1] "97a5bb1c-89c4-45be-a4c4-ad98ad2c56f8.xlsx"
## [1] "e5162968-84d4-4598-9d77-354a83b8001a.xlsx"
## [1] "f46d3807-0be3-4962-a9f8-d68a400902e7.xlsx"
## [1] "0ae43ea2-4986-4b4c-9ca8-36f43710692c.xlsx"
## [1] "1a8e51ba-975d-4f84-8bef-9a307ddb81cb.xlsx"
## [1] "4425c09e-c4c2-4555-8669-6bede08ada26.xlsx"
## [1] "4b8b7266-93f6-451d-a2d4-ff382c902fee.xlsx"
## [1] "9e0df853-aad5-46e9-8ad8-e5df59c89b65.xlsx"
## [1] "2b2c7a7b-ecc1-42f5-bdb2-4a293fb70c0a.xlsx"
## [1] "4f9580c1-6139-4846-baa3-94812e1e642d.xlsx"
## [1] "b3d5faf8-8175-475e-8c80-a87f826a2a72.xlsx"
## [1] "ead88e08-eab0-483d-b9b3-7a2f93c82679.xlsx"
## [1] "f56ab13c-38d7-4816-9acc-94f84ea83a47.xlsx"
## [1] "2eb3b345-e663-482a-b9d7-6c3eec2823da.xlsx"
## [1] "3c56fc47-a589-40fd-b586-9372a39b1a8a.xlsx"
## [1] "786e0b12-ca8b-48fc-ab13-095c7655e136.xlsx"
## [1] "9ef6a3d2-861a-4f6c-b749-64a5d8577a07.xlsx"
## [1] "c80a7151-78f8-47f0-9789-0b388b19000d.xlsx"
## [1] "05d4f86a-babf-4864-a4de-170aee6157ad.xlsx"
## [1] "3a9cb239-f1f7-45fa-ad96-567547f1e11b.xlsx"
## [1] "45f9a39b-bb5c-493e-b807-8b64f1f39310.xlsx"
## [1] "a6f8d093-9b85-4f23-af28-e859ea57292b.xlsx"
## [1] "b0dbfeb8-634c-47f2-bea5-eade7b07dfc2.xlsx"
## [1] "2ffc3de9-b58d-4216-8ca3-c6c174fdc245.xlsx"
## [1] "5b778cab-93d4-4d84-b3bc-10b0d1030e0b.xlsx"
## [1] "62b2dc7c-133c-4933-8354-99d23bd383ae.xlsx"
## [1] "89cc3d72-63e7-4a8e-9ec1-bd2aeb746dbe.xlsx"
## [1] "9e2abb70-5a86-4712-9a7f-39194c6c60d1.xlsx"
## [1] "054fc2cf-d19f-426c-b42c-d3d58fd974eb.xlsx"
## [1] "075e591d-1790-4879-b6ca-cabfe1f97fa8.xlsx"
## [1] "22c12d8e-0708-47ca-8ea9-b5403068bec7.xlsx"
## [1] "3b6aeef9-ad5f-4895-9255-0a75907acb3e.xlsx"
## [1] "f5ad25d9-c494-4505-9f23-478d9bdcaaaf.xlsx"
## [1] "0df8069d-8bcf-4ebb-bea7-2cb1bd0af630.xlsx"
## [1] "2949f63f-0c55-4557-84e9-db4fdf4c9324.xlsx"
## [1] "93798aea-e62c-40a9-935e-b8b76beb54b1.xlsx"
## [1] "9b0d5dfd-1785-495c-bcca-39b99b03f92e.xlsx"
## [1] "a8c7123e-9bc3-4a98-8712-ee30be771d54.xlsx"
## [1] "04c16fe5-ad16-45c9-b32d-ffc3f66881f1.xlsx"
## [1] "6721aadd-35be-4e8a-9e1d-b04eabba5f98.xlsx"
## [1] "97ecdcff-62c3-4cae-b2b7-2e13d6360387.xlsx"
## [1] "c2348e42-73ba-4f7e-b667-cac6b3e6aa06.xlsx"
## [1] "c43bde60-08ff-4de0-b494-6b44366aaef2.xlsx"
## [1] "09c58218-5e93-4391-bf34-75b7347844b3.xlsx"
## [1] "33cb38ae-574c-462a-9bbd-82920aca1183.xlsx"
## [1] "bb8ac175-8353-4095-a278-488758721c6d.xlsx"
## [1] "c8fd3775-0f63-4d57-bd8d-69c05dd806b2.xlsx"
## [1] "e57bab21-28c2-4bd4-892a-9712f215eb53.xlsx"
## [1] "53b28cf5-35d3-4ec8-8d8e-eb6378c9e4a3.xlsx"
## [1] "7d4a096c-af7d-40f6-ac74-d4465a6d963c.xlsx"
## [1] "82b69a3e-2e17-4f6a-b8d0-e5260e744576.xlsx"
## [1] "9fbefac0-6572-43cc-93ef-ea26556024c3.xlsx"
## [1] "c51fbfbb-32d8-465d-8232-2f41591441cd.xlsx"
## [1] "081d7e25-b59c-4d4a-9122-5aea755a9e5d.xlsx"
## [1] "143992c3-237c-4cb3-97b5-f4df0929bae4.xlsx"
## [1] "21e95e82-256b-4bcb-aecf-3c048a4abbd0.xlsx"
## [1] "b50fcf23-1141-4678-93aa-67cccd8b0936.xlsx"
## [1] "cc25f523-a395-470f-a7f0-88df64ce519b.xlsx"
## [1] "2689efd8-a8d4-44c2-9f1e-ce925b974d79.xlsx"
## [1] "67f0fad3-26f1-4f8b-8b31-66516514d7e0.xlsx"
## [1] "7ba0f914-b65c-411e-b1b7-9749dd6caca9.xlsx"
## [1] "a8613797-c149-4f07-9d8d-d83026519bb5.xlsx"
## [1] "d5a6a1db-09f8-46fb-9c80-0f1770619160.xlsx"
## [1] "20562d2a-be31-43bc-9af0-c7a8754da5bc.xlsx"
## [1] "36c79a06-e85e-4fb0-9db5-3a3087d6bfb0.xlsx"
## [1] "6999c66a-40ad-440f-ae31-8505c2702fb2.xlsx"
## [1] "751050d0-adfa-4125-be60-abcfc230ee66.xlsx"
## [1] "8b4ec3f9-7e55-4656-bc3f-af39598d5f42.xlsx"
## [1] "23115df3-6ae9-4fb6-ae78-1b0c9a26f7c6.xlsx"
## [1] "5ad12929-74c9-4aec-a4df-b235a469f975.xlsx"
## [1] "7f8b9d38-0657-4164-94d5-99872ab98735.xlsx"
## [1] "b03b1bae-d86d-45b6-a840-5d792b6fcb3c.xlsx"
## [1] "ee1e6432-a8d4-4802-a5ab-1cace4df4bdd.xlsx"
## [1] "19910347-445a-45bc-864f-459d9a21bde2.xlsx"
## [1] "75037c16-1aa8-404e-96e0-cf0076bda9cc.xlsx"
## [1] "90c7991c-8836-4b35-bb38-499941a41bfc.xlsx"
## [1] "d316f9e2-cbf1-4fda-8d23-e7d93e63270a.xlsx"
## [1] "e4444e39-1ae8-4516-bae7-77f8f5fdbcf0.xlsx"
## [1] "331aa43e-4851-4d3f-a7dd-cdd4ab5f5694.xlsx"
## [1] "3ed54652-21b3-41d6-b42b-5f4a3fd62574.xlsx"
## [1] "532d9d07-ef5e-41d4-be48-187249e38204.xlsx"
## [1] "8cc7bdbf-5402-40e4-b310-e1dda2bcb064.xlsx"
## [1] "c8570cc4-6ac3-461b-baaa-048de5cd2238.xlsx"
## [1] "1ae43514-1c76-46df-988a-7351547ce8f0.xlsx"
## [1] "2437bd20-5d22-4b12-8a1c-8e574a6ffa7d.xlsx"
## [1] "260e1744-494b-4754-bd61-b8c391592a67.xlsx"
## [1] "d29fa061-6bf3-4742-b60d-d92de707c279.xlsx"
## [1] "ea52a9ee-4403-4678-8da6-368a9b542aed.xlsx"
## [1] "27b4efc5-63ef-46ba-9564-8054b1180684.xlsx"
## [1] "70dc5779-c596-42cd-ba8f-ac7ffbf39232.xlsx"
## [1] "98f8e50c-2692-4060-bf65-3df72a0bdbc6.xlsx"
## [1] "9b3aa487-fc03-45ef-bfe8-6f04231d4824.xlsx"
## [1] "d0a56f0c-9af4-4df7-bf58-c12632cd4515.xlsx"
## [1] "4ab27b56-0918-453e-b5d0-cc0ffa2b1786.xlsx"
## [1] "6852000d-d1a3-447e-b533-af2b99df7971.xlsx"
## [1] "7c011891-9024-4419-bec1-3f5bd988aa9b.xlsx"
## [1] "9d4fccc1-54f2-459a-825a-f20f7e7e4dd2.xlsx"
## [1] "d4d075bd-d264-434d-9315-41e1f33291b0.xlsx"
## [1] "1919c8f4-efee-4825-add0-05af3aa8bcf9.xlsx"
## [1] "36ec70e6-e425-4532-94d4-dad89221af53.xlsx"
## [1] "85b1c7bb-07a8-4d22-88e1-c4d7b5c82ef5.xlsx"
## [1] "9e2e4ec1-7d29-4d14-872a-878407a29020.xlsx"
## [1] "ef3e01fa-c7c0-4cf8-8e98-f84c16ac7f83.xlsx"
## [1] NA
## character(0)
## [1] "25783787-ea19-4b3e-9bd8-3d67967c70f1.xlsx"
## [1] "4249c60e-3598-42e4-b541-9337b9c07796.xlsx"
## [1] "51c6ecf4-5684-4aba-864d-bc822c07e90c.xlsx"
## [1] "7e4f2304-fea7-4a87-823a-d656d821cf4f.xlsx"
## [1] "f8600021-f10c-453f-9b8b-9034449c7200.xlsx"
## [1] "0836b083-929c-4615-8250-1855b8927904.xlsx"
## [1] "3a1ff966-e37b-4b6d-9476-e0afdd264aff.xlsx"
## [1] "40ba69c6-d316-4009-ae29-230f2f3fdfcf.xlsx"
## [1] "6a522c1c-7286-461a-bcba-1ee91a21edfe.xlsx"
## [1] "f1c2dab1-c022-4131-beda-1ff8dc0a919e.xlsx"
## [1] "17687bef-d17d-46a5-90ae-3104f8d431bf.xlsx"
## [1] "3f21f55a-0bc6-403e-8e6f-d815b9121094.xlsx"
## [1] "44a993a1-8f51-4f1c-bdb3-fc0cf436fe4d.xlsx"
## [1] "4c1af220-9a95-4c6a-b6dd-5abe0a2b55e5.xlsx"
## [1] "6e21903e-8caa-4e76-a810-79b51641880f.xlsx"
## [1] "4da1b377-5ed2-4351-b7d2-4b3fd27989e6.xlsx"
## [1] "788bf587-a65e-40e2-b1ff-6ee3af87cfbf.xlsx"
## [1] "b6cd014c-584a-4ee2-b668-b45b2bf9a168.xlsx"
## [1] "eba9d40f-affa-4a3e-9ff3-043302412bab.xlsx"
## [1] "fc7f8de7-6e2b-47d6-8a4c-844715261fb4.xlsx"
## [1] "2c9e7b9b-1aa7-4969-b27a-ba3f6226692c.xlsx"
## [1] "50424b5b-b2ed-4fb4-b403-8ade8095b3aa.xlsx"
## [1] "9453702b-5b55-4256-93aa-cecac92e412f.xlsx"
## [1] "aaf65f32-135a-45da-bc31-159974f8e535.xlsx"
## [1] "bde34577-29b9-45ae-949a-0179ed3f5a8e.xlsx"
## [1] "50646ee3-2f15-45e2-b027-60d45d08418d.xlsx"
## [1] "5b84a526-2e9a-40df-959f-00fc9693339b.xlsx"
## [1] "989bdce8-33ec-4288-8f6c-b7ac5ca3bae3.xlsx"
## [1] "e0dfe81b-f8da-4f76-b63e-53b764cb5486.xlsx"
## [1] "f92251bd-b6b5-4923-846b-2e6b923fb021.xlsx"
## [1] "3b740b98-5e10-45f4-b36c-502cb2c19129.xlsx"
## [1] "924a4bb9-2a25-4551-a5dc-5596c44a62a1.xlsx"
## [1] "ac3f1438-29aa-4a2b-b30d-6fbf35cc6be3.xlsx"
## [1] "c27803df-11e4-430a-bcfb-ec05f6edb223.xlsx"
## [1] "d2aa0d27-49a4-4f8c-a538-d3c7c3ec5d13.xlsx"
Problems found
secondstudygroup$Albania.xlsmThis report will now read all the excel files in each country folder while solving the two problems mentioned above at the same time. In order to combine all the excel file, this report will store each dataframe of excel file to an empty list called l and then combine all the data in the list. Flow data will be read in the same way and stored in the other list called flow, which will be used to assign flow information to each section later. Meanwhile, two empty vectors will be made: number_of_rows and filesinfolder_real. number_of_rows vector stores the number of rows of each excel file so that it can be used to divide the sectors for flow and country information. filesinfolder_real vector stores country names dropping the unnecessary folder name “secondstudygroup”. Then, it can be used to assign the country names to each section of country.
l <- list()
number_of_rows <- vector('numeric')
flow <- list()
filesinfolder_real <- vector("character")
#The first for loop to open each country folder in the datafolder
for(i in 1:length(filesinfolder)) {
  this_folder <- filesinfolder[i]
  this_folder_path <- paste(datafolder, this_folder,sep="")
  this_folder_files <- list.files(this_folder_path)
  #if statement to skip the empty folder "secondstudygroup"
  if (!length(this_folder_files) == 0) {
    filesinfolder_real <- c(filesinfolder_real, this_folder)
    #The second for loop to load each excel file in a country folder
    for(j in 1:length(this_folder_files)){
      #if statement not to read unnecessary file "~$Albania.xlsm"
      if (grepl(".xlsx", this_folder_files[j])) {
        data <- read_excel(paste(this_folder_path,"/",this_folder_files[j], sep=""), skip = 5)
        data_flow <- read_excel(paste(this_folder_path,"/",this_folder_files[j], sep=""), range = cell_rows(3), col_names = FALSE)[,3]
        
        l <- append(l, list(data))
        # there are 4 unnecessary rows in each excel file which must be dropped later
        number_of_rows <- c(number_of_rows, nrow(data)-4)
        flow <- append(flow, list(data_flow))
      }
    }
  }
}
#binding all dataframes in the list in a row wise
dt <- do.call("rbind", l)
#binding all flow dataframes 
flow_dt <- do.call("rbind", flow)
print(head(dt))
## # A tibble: 6 x 67
##   Product ...2  `Additives/blen~ Anthracite `Aviation gasol~ BKB   Biodiesels
##   <chr>   <chr> <chr>            <chr>      <chr>            <chr> <chr>     
## 1 Time    <NA>  <NA>             <NA>       <NA>             <NA>  <NA>      
## 2 1971    <NA>  0                ..         0                0     0         
## 3 1972    <NA>  0                ..         0                0     0         
## 4 1973    <NA>  0                ..         0                0     0         
## 5 1974    <NA>  0                ..         0                0     0         
## 6 1975    <NA>  0                ..         0                0     0         
## # ... with 60 more variables: Biogases <chr>, Biogasoline <chr>, Bitumen <chr>,
## #   `Blast furnace gas` <chr>, `Brown coal (if no detail)` <chr>,
## #   Charcoal <chr>, `Coal tar` <chr>, `Coke oven coke` <chr>, `Coke oven
## #   gas` <chr>, `Coking coal` <chr>, `Crude oil` <chr>, `Crude/NGL/feedstocks
## #   (if no detail)` <chr>, `Elec/heat output from non-specified manufactured
## #   gases` <chr>, Electricity <chr>, Ethane <chr>, `Fuel oil` <chr>, `Gas
## #   coke` <chr>, `Gas works gas` <chr>, `Gas/diesel oil excl. biofuels` <chr>,
## #   `Gasoline type jet fuel` <chr>, Geothermal <chr>, `Hard coal (if no
## #   detail)` <chr>, Heat <chr>, `Heat output from non-specified combustible
## #   fuels` <chr>, Hydro <chr>, `Industrial waste` <chr>, `Kerosene type jet
## #   fuel excl. biofuels` <chr>, Lignite <chr>, `Liquefied petroleum gases
## #   (LPG)` <chr>, Lubricants <chr>, `Motor gasoline excl. biofuels` <chr>,
## #   `Municipal waste (non-renewable)` <chr>, `Municipal waste
## #   (renewable)` <chr>, Naphtha <chr>, `Natural gas` <chr>, `Natural gas
## #   liquids` <chr>, `Non-specified primary biofuels and waste` <chr>,
## #   Nuclear <chr>, `Oil shale and oil sands` <chr>, `Other bituminous
## #   coal` <chr>, `Other hydrocarbons` <chr>, `Other kerosene` <chr>, `Other
## #   liquid biofuels` <chr>, `Other oil products` <chr>, `Other recovered
## #   gases` <chr>, `Other sources` <chr>, `Paraffin waxes` <chr>, `Patent
## #   fuel` <chr>, Peat <chr>, `Peat products` <chr>, `Petroleum coke` <chr>,
## #   `Primary solid biofuels` <chr>, `Refinery feedstocks` <chr>, `Refinery
## #   gas` <chr>, `Solar photovoltaics` <chr>, `Solar thermal` <chr>,
## #   `Sub-bituminous coal` <chr>, `Tide, wave and ocean` <chr>, `White spirit &
## #   SBP` <chr>, Wind <chr>
print(head(flow_dt))
## # A tibble: 6 x 1
##   ...3                       
##   <chr>                      
## 1 Imports                    
## 2 Losses                     
## 3 Production                 
## 4 Exports                    
## 5 Total primary energy supply
## 6 Imports
This report will manipulate the combined dataframe in an appropriate form to be analysed in the future.
#dropping unnecessary 4 rows using the values in the "Product" column and and the second column which is empty
row_to_drop <- which(grepl("Time|Data|Legend|x", dt$Product))
dt <- dt[-row_to_drop, -2]
#Changing the column name "Product" to the appropriate name "Year"
colnames(dt)[1] <- "Year"
print(head(dt))
## # A tibble: 6 x 66
##   Year  `Additives/blen~ Anthracite `Aviation gasol~ BKB   Biodiesels Biogases
##   <chr> <chr>            <chr>      <chr>            <chr> <chr>      <chr>   
## 1 1971  0                ..         0                0     0          0       
## 2 1972  0                ..         0                0     0          0       
## 3 1973  0                ..         0                0     0          0       
## 4 1974  0                ..         0                0     0          0       
## 5 1975  0                ..         0                0     0          0       
## 6 1976  0                ..         0                0     0          0       
## # ... with 59 more variables: Biogasoline <chr>, Bitumen <chr>, `Blast furnace
## #   gas` <chr>, `Brown coal (if no detail)` <chr>, Charcoal <chr>, `Coal
## #   tar` <chr>, `Coke oven coke` <chr>, `Coke oven gas` <chr>, `Coking
## #   coal` <chr>, `Crude oil` <chr>, `Crude/NGL/feedstocks (if no
## #   detail)` <chr>, `Elec/heat output from non-specified manufactured
## #   gases` <chr>, Electricity <chr>, Ethane <chr>, `Fuel oil` <chr>, `Gas
## #   coke` <chr>, `Gas works gas` <chr>, `Gas/diesel oil excl. biofuels` <chr>,
## #   `Gasoline type jet fuel` <chr>, Geothermal <chr>, `Hard coal (if no
## #   detail)` <chr>, Heat <chr>, `Heat output from non-specified combustible
## #   fuels` <chr>, Hydro <chr>, `Industrial waste` <chr>, `Kerosene type jet
## #   fuel excl. biofuels` <chr>, Lignite <chr>, `Liquefied petroleum gases
## #   (LPG)` <chr>, Lubricants <chr>, `Motor gasoline excl. biofuels` <chr>,
## #   `Municipal waste (non-renewable)` <chr>, `Municipal waste
## #   (renewable)` <chr>, Naphtha <chr>, `Natural gas` <chr>, `Natural gas
## #   liquids` <chr>, `Non-specified primary biofuels and waste` <chr>,
## #   Nuclear <chr>, `Oil shale and oil sands` <chr>, `Other bituminous
## #   coal` <chr>, `Other hydrocarbons` <chr>, `Other kerosene` <chr>, `Other
## #   liquid biofuels` <chr>, `Other oil products` <chr>, `Other recovered
## #   gases` <chr>, `Other sources` <chr>, `Paraffin waxes` <chr>, `Patent
## #   fuel` <chr>, Peat <chr>, `Peat products` <chr>, `Petroleum coke` <chr>,
## #   `Primary solid biofuels` <chr>, `Refinery feedstocks` <chr>, `Refinery
## #   gas` <chr>, `Solar photovoltaics` <chr>, `Solar thermal` <chr>,
## #   `Sub-bituminous coal` <chr>, `Tide, wave and ocean` <chr>, `White spirit &
## #   SBP` <chr>, Wind <chr>
#making empty column for Country and Flow
dt <- dt %>% mutate(Country = NA, Flow = NA)
#Each country folder has 5 excel files
number_of_rows_per_country <- tapply(number_of_rows, (seq_along(number_of_rows)-1) %/% 5, sum)
cumsum_rows_per_country <- cumsum(number_of_rows_per_country)
#Assigning country names to each section
dt[1:215,]$Country <- filesinfolder_real[1]
for (i in 2:length(filesinfolder_real)) {
  start <- cumsum_rows_per_country[i-1]+1
  end <- cumsum_rows_per_country[i]
  dt[start:end,]$Country <- filesinfolder_real[i]
} 
cumsum_rows <- cumsum(number_of_rows)
#Assigning flow values to each section
flow1 <- pull(flow_dt[1,])
dt[1:43,]$Flow <- flow1
for (k in 2:length(number_of_rows)){
  start <- cumsum_rows[k-1]+1
  end <- cumsum_rows[k]
  dt[start:end,]$Flow <- pull(flow_dt[k,])
}
#gathering the column names of product into the single column "product"
dt_gathered <- gather(dt, "product", "value", 2:66)
print(head(dt_gathered))
## # A tibble: 6 x 5
##   Year  Country Flow    product                       value
##   <chr> <chr>   <chr>   <chr>                         <chr>
## 1 1971  Albania Imports Additives/blending components 0    
## 2 1972  Albania Imports Additives/blending components 0    
## 3 1973  Albania Imports Additives/blending components 0    
## 4 1974  Albania Imports Additives/blending components 0    
## 5 1975  Albania Imports Additives/blending components 0    
## 6 1976  Albania Imports Additives/blending components 0
#Assign NA values and changing columns order
dt_gathered[dt_gathered == ".."] <- NA
dt_gathered <- dt_gathered[,c(2,1,3,4,5)]
print(head(dt_gathered))
## # A tibble: 6 x 5
##   Country Year  Flow    product                       value
##   <chr>   <chr> <chr>   <chr>                         <chr>
## 1 Albania 1971  Imports Additives/blending components 0    
## 2 Albania 1972  Imports Additives/blending components 0    
## 3 Albania 1973  Imports Additives/blending components 0    
## 4 Albania 1974  Imports Additives/blending components 0    
## 5 Albania 1975  Imports Additives/blending components 0    
## 6 Albania 1976  Imports Additives/blending components 0
#the total number of records in the dataset
print(nrow(dt_gathered))
## [1] 573950
#the total number of records for each product across countries across years
head(dt_gathered %>% 
  group_by(Country, Year, product) %>%
    summarize(number_of_records = n()))
## # A tibble: 6 x 4
## # Groups:   Country, Year [1]
##   Country Year  product                       number_of_records
##   <chr>   <chr> <chr>                                     <int>
## 1 Albania 1971  Additives/blending components                 5
## 2 Albania 1971  Anthracite                                    5
## 3 Albania 1971  Aviation gasoline                             5
## 4 Albania 1971  Biodiesels                                    5
## 5 Albania 1971  Biogases                                      5
## 6 Albania 1971  Biogasoline                                   5