-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfront_end_script.R
More file actions
77 lines (64 loc) · 3.67 KB
/
front_end_script.R
File metadata and controls
77 lines (64 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
div_code <- FALSE
asip_code <- FALSE
`%notin%` <- Negate(`%in%`)
#need to ask you a few questions
#where do you want the file saved?
setwd(rstudioapi::selectDirectory())
#what do you want the file named?
file_name <- dlg_input(message = "Name of file? (without extension)")$res
#what type of file?
current_export_list <- c('Stata', 'CSV', 'SPSS','SAS')
file_type <- dlg_list(current_export_list, multiple = F)$res
#sample start year
year_start <- dlg_input(message = "What year should your sample start? (e.g., 2000)")$res %>% as.numeric()
year_end <- dlg_input(message = "What year should your sample end? (e.g., 2000)")$res %>% as.numeric()
#enter exchanges you would like to pull from
#exchg_list <- dlg_input(message = "Enter Stock Exchange Codes -- default is NYSE and US NASDAQ", default = "11,14,15", gui = .GUI)$res
#exchg_list <- regmatches(exchg_list, gregexpr("[[:digit:]]+", exchg_list))
#exchg_list <- as.numeric(unlist(exchg_list))
print('*******Database pull has begun. This will take some time (5-10 minutes), please be patient.*******')
###adapted from https://wrds-www.wharton.upenn.edu/, connects you to WRDS DB
print("Please enter your WRDS credentials.")
wrds <- dbConnect(Postgres(),
host='wrds-pgdata.wharton.upenn.edu',
port=9737,
dbname='wrds',
sslmode='require',
user=rstudioapi::askForPassword("WRDS Username") %>% tolower(),
password=rstudioapi::askForPassword("WRDS password"))
if(!exists('wrds')){
print('You did not enter your WRDS credentials accurately. Please try again.')
}
if(!exists('wrds')){
wrds <- dbConnect(Postgres(),
host='wrds-pgdata.wharton.upenn.edu',
port=9737,
dbname='wrds',
sslmode='require',
user=rstudioapi::askForPassword("WRDS username") %>% tolower(),
password=rstudioapi::askForPassword("WRDS password"))
}
if(!exists('wrds')){
print('Your WRDS creentials are still not accurate. Please verify your username and password with WRDS. The script will not work otherwise.')
}
###connect and pull compustat data based on year
pulled <- tbl(wrds, sql("select * from comp.funda where datafmt = 'STD' and consol = 'C' and indfmt = 'INDL' and popsrc = 'D'"))
pulled <- pulled %>% filter(between(fyear, year_start,year_end))
#pulled <- pulled %>% filter(exchg %in% exchg_list)
df <- pulled %>% collect()
rm(pulled)
df$year <- df$fyear
#get vector of unique gvkeys
u_gvkey <- unique(df$gvkey)
#this replaces missing sich codes with SIC codes from company information dataset
missing_sich <- unique(df$gvkey)
sich_pull <- tbl(wrds, sql('select * from comp.company'))
sich_pull <- sich_pull %>% filter(gvkey %in% missing_sich)
sich_pull <- sich_pull %>% collect()
sich_pull <- sich_pull %>% select(gvkey, sic)
df <- merge(df, sich_pull, by='gvkey', all.x = T)
df$sich <- ifelse(is.na(df$sich), df$sic, df$sich)
df <- df %>% select(-sic)
#this will be used to create the 'small' dataframe at the end of the script
cols_to_remove <- names(df)
cols_to_remove <- cols_to_remove[cols_to_remove %notin% c('gvkey','fyear','year','tic','sich','cusip','cik')]