DESCRIPTIVE STATS AND VISUALIZATION
#READING DATSET
# A tibble: 6 x 23
Sr_no Trans_date_Trans~ cc_num Merchant Category Amount first last
<dbl> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr>
1 0 6/21/2020 2.29e15 fraud_K~ persona~ 3 Jeff Elli~
2 1 6/21/2020 3.57e15 fraud_S~ persona~ 30 Joan~ Will~
3 2 6/21/2020 3.60e15 fraud_S~ health_~ 41 Ashl~ Lopez
4 3 6/21/2020 3.59e15 fraud_H~ misc_pos 60 Brian Will~
5 4 6/21/2020 3.53e15 fraud_J~ travel 3 Nath~ Mass~
6 5 6/21/2020 3.04e13 fraud_D~ kids_pe~ 20 Dani~ Evans
# ... with 15 more variables: gender <chr>, street <chr>, city <chr>,
# state <chr>, zip <dbl>, lat <dbl>, long <dbl>, city_pop <dbl>,
# job <chr>, dob <chr>, trans_num <chr>, unix_time <dbl>,
# merch_lat <dbl>, merch_long <dbl>, is_fraud <dbl>
#Summary of the dataset
summary(fraudTest)
Sr_no Trans_date_Trans_time cc_num
Min. : 0 Length:555719 Min. :6.042e+10
1st Qu.:138930 Class :character 1st Qu.:1.800e+14
Median :277859 Mode :character Median :3.521e+15
Mean :277859 Mean :4.178e+17
3rd Qu.:416789 3rd Qu.:4.635e+15
Max. :555718 Max. :4.992e+18
Merchant Category Amount
Length:555719 Length:555719 Min. : 1.0
Class :character Class :character 1st Qu.: 10.0
Mode :character Mode :character Median : 47.0
Mean : 69.4
3rd Qu.: 83.0
Max. :22768.0
first last gender
Length:555719 Length:555719 Length:555719
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
street city state
Length:555719 Length:555719 Length:555719
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
zip lat long city_pop
Min. : 1257 Min. :20.03 Min. :-165.67 Min. : 23
1st Qu.:26292 1st Qu.:34.67 1st Qu.: -96.80 1st Qu.: 741
Median :48174 Median :39.37 Median : -87.48 Median : 2408
Mean :48843 Mean :38.54 Mean : -90.23 Mean : 88222
3rd Qu.:72011 3rd Qu.:41.89 3rd Qu.: -80.18 3rd Qu.: 19685
Max. :99921 Max. :65.69 Max. : -67.95 Max. :2906700
job dob trans_num
Length:555719 Length:555719 Length:555719
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
unix_time merch_lat merch_long
Min. :1.372e+09 Min. :19.03 Min. :-166.67
1st Qu.:1.376e+09 1st Qu.:34.76 1st Qu.: -96.91
Median :1.381e+09 Median :39.38 Median : -87.45
Mean :1.381e+09 Mean :38.54 Mean : -90.23
3rd Qu.:1.386e+09 3rd Qu.:41.95 3rd Qu.: -80.26
Max. :1.389e+09 Max. :66.68 Max. : -66.95
is_fraud
Min. :0.00000
1st Qu.:0.00000
Median :0.00000
Mean :0.00386
3rd Qu.:0.00000
Max. :1.00000
# A tibble: 555,719 x 2
state Category
<chr> <chr>
1 SC personal_care
2 UT personal_care
3 NY health_fitness
4 FL misc_pos
5 MI travel
6 NY kids_pets
7 CA health_fitness
8 SD personal_care
9 PA shopping_pos
10 TX food_dining
# ... with 555,709 more rows
#PREVIEW OF THE COLUMN NAMES IN THE DATSET.
colnames(fraudTest)
[1] "Sr_no" "Trans_date_Trans_time"
[3] "cc_num" "Merchant"
[5] "Category" "Amount"
[7] "first" "last"
[9] "gender" "street"
[11] "city" "state"
[13] "zip" "lat"
[15] "long" "city_pop"
[17] "job" "dob"
[19] "trans_num" "unix_time"
[21] "merch_lat" "merch_long"
[23] "is_fraud"
#PREVIEW OF THE DATA TYPES OF THE COLUMN NAMES
sapply(fraudTest, class)
Sr_no Trans_date_Trans_time cc_num
"numeric" "character" "numeric"
Merchant Category Amount
"character" "character" "numeric"
first last gender
"character" "character" "character"
street city state
"character" "character" "character"
zip lat long
"numeric" "numeric" "numeric"
city_pop job dob
"numeric" "character" "character"
trans_num unix_time merch_lat
"character" "numeric" "numeric"
merch_long is_fraud
"numeric" "numeric"
DESCRIPTIVE STATISTICS
# A tibble: 2 x 2
is_fraud n
<dbl> <int>
1 0 553574
2 1 2145
count(fraudTest, gender)
# A tibble: 2 x 2
gender n
<chr> <int>
1 F 304886
2 M 250833
count(fraudTest , state)
# A tibble: 50 x 2
state n
<chr> <int>
1 AK 843
2 AL 17532
3 AR 13484
4 AZ 4592
5 CA 24135
6 CO 5886
7 CT 3277
8 DC 1517
9 FL 18104
10 GA 11277
# ... with 40 more rows
median(fraudTest$Amount, na.rm= TRUE)
[1] 47
sd(fraudTest$Amount, na.rm= TRUE)
[1] 156.7456
mean(fraudTest$Amount , na.rm= TRUE)
[1] 69.39616
range(fraudTest$Amount)
[1] 1 22768
min(fraudTest$Amount)
[1] 1
max(fraudTest$Amount)
[1] 22768
table(fraudTest$gender)
F M
304886 250833
table(fraudTest$state)
AK AL AR AZ CA CO CT DC FL GA HI
843 17532 13484 4592 24135 5886 3277 1517 18104 11277 1090
IA ID IL IN KS KY LA MA MD ME MI
11819 2490 18960 11959 9943 12506 8988 5186 11152 6928 19671
MN MO MS MT NC ND NE NH NJ NM NV
13719 16501 8833 5052 12868 6397 10257 3449 10528 7020 2451
NY OH OK OR PA RI SC SD TN TX UT
35918 20147 11379 7811 34326 195 12541 5250 7359 40393 4658
VA VT WA WI WV WY
12506 5044 8116 12370 10838 8454
table(fraudTest$Category)
entertainment food_dining gas_transport grocery_net
40104 39268 56370 19426
grocery_pos health_fitness home kids_pets
52553 36674 52345 48692
misc_net misc_pos personal_care shopping_net
27367 34574 39327 41779
shopping_pos travel
49791 17449
table(fraudTest$city)
Achille Acworth
208 828
Adams Afton
223 897
Akron Albany
222 423
Albuquerque Alder
454 441
Aledo Alexandria
435 417
Allenhurst Allentown
450 1480
Alpharetta Altair
1084 865
Alton Altona
466 891
Altonah Alva
837 414
Amanda American Fork
667 417
Amorita Amsterdam
228 420
Andrews Annapolis
1508 217
Apison Arcadia
219 1754
Arlington Armagh
666 1109
Armonk Arnold
453 905
Arvada Ash Flat
222 685
Ashfield Ashford
435 426
Atglen Athena
454 644
Atlantic Auburn
858 419
Aurora Avera
238 437
Avoca Azusa
900 238
Bagley Bailey
1070 684
Ballwin Barnard
643 440
Barneveld Barnstable
821 834
Baroda Basye
652 828
Baton Rouge Battle Creek
1308 944
Bauxite Bay City
1291 457
Bay Minette Beasley
634 419
Beaver Falls Beaverdam
633 442
Belfast Belgrade
684 877
Belle Fourche Bellmore
431 1073
Belmond Belmont
654 470
Benton Bessemer
220 1097
Bethel Bethel Springs
222 433
Big Creek Big Indian
681 640
Bigelow Birmingham
219 2423
Blackville Blairsden-Graeagle
611 441
Blairstown Blooming Grove
682 1094
Bolivar Bolton
446 877
Bonfield Bonita Springs
666 214
Boonton Boulder
421 250
Bowdoin Bowersville
1267 656
Boyd Bradley
218 1263
Brainard Brandon
204 1800
Brantley Brashear
449 225
Breesport Bridgeport
873 679
Bridger Brinson
842 419
Bristol Bristow
688 418
Bronx Brooklin
857 453
Brooklyn Broomfield
896 228
Browning Brownville
649 859
Brunson Bryant
1151 234
Burbank Burke
1672 715
Burlington Burns Flat
878 215
Burrton Bynum
503 230
Cadiz Camden
666 1143
Campbell Canton
691 452
Cape Coral Cardwell
440 218
Carlisle Carlotta
419 886
Carroll Cascade Locks
429 870
Cass Cassatt
637 217
Catawba Cazenovia
853 653
Cecilton Cedar
672 657
Center Point Center Tuftonboro
664 440
Centerview Central
1293 441
Chatham Cherokee Village
242 637
Chester Chester Heights
452 664
Christine Churubusco
397 453
Cisco Claremont
852 205
Clarinda Clarks Mills
901 1474
Clarksville Clay Center
631 648
Clayton Clearwater
887 200
Cleveland Clifton
1968 426
Clune Clutier
446 210
Cochranton Coffeeville
1101 834
Cokeburg Coleharbor
848 874
Coleman Collegeville
635 232
Collettsville Colorado Springs
681 646
Colton Columbia
416 640
Comfort Comfrey
656 841
Conway Cord
1961 906
Corona Corriganville
441 856
Corsica Cottekill
679 1289
Cowlesville Coyle
681 674
Craig Cranks
14 677
Creedmoor Creola
848 691
Cressona Cromona
442 887
Cross Cross Plains
218 440
Crownpoint Curlew
458 195
Cuthbert Cuyahoga Falls
440 641
Dadeville Dallas
210 1512
Dalton Daly City
213 662
Damascus Daniels
215 209
Darien Dayton
660 219
De Lancey De Queen
255 218
De Soto De Witt
1275 1279
Deadwood Deane
630 733
Delhi Dell City
598 253
Deltona Denham Springs
434 1098
Des Moines Desdemona
653 230
Detroit Dexter
1540 218
Diamond Dieterich
1118 641
Doe Hill Dongola
905 445
Downsville Drakes Branch
427 1095
Dresden Du Pont
232 220
Dublin Dubre
184 413
Dumont Duncan
685 423
Dunlevy Eagarville
856 436
Early East Andover
247 641
East Canaan East Rochester
1292 201
East Troy Easton
429 431
Edinburg Edisto Island
420 1466
Edmond Egan
1090 874
Ehrhardt El Paso
213 201
Elberta Eldridge
1251 662
Elizabeth Elizabethtown
1091 634
Elk Rapids Elkhart
215 692
Emmons Emporium
221 450
Enola Esbon
886 1075
Espanola Etlan
1042 226
Eugene Eureka
631 676
Fairhope Fairview
212 629
Falconer Falls Church
846 454
Falls City Falmouth
471 891
Farmington Fayetteville
694 1129
Fenelton Ferney
1112 222
Fiddletown Fields Landing
876 648
Florence Ford
1325 682
Fordoche Fort Myers
879 1126
Fort Washakie Freedom
1589 873
Fullerton Fulton
637 1864
Gadsden Gainesville
1327 200
Gaithersburg Galatia
227 214
Gardiner Garfield
672 237
Garrattsville Georgetown
923 878
Gibsonville Girard
419 229
Glade Spring Glen Rock
454 438
Glendale Goodrich
1045 1325
Goreville Grand Bay
440 442
Grand Junction Grand Ridge
654 442
Grandview Graniteville
1336 882
Grant Grantham
861 428
Grassflat Great Mills
828 624
Greenbush Greendale
217 646
Greenview Greenville
635 402
Greenwich Greenwood
899 452
Gregory Grenada
456 634
Gretna Grifton
666 10
Grimesland Grover
471 1053
Guthrie Hahira
9 670
Haines City Halma
455 422
Halstad Hampton
436 409
Hancock Hannawa Falls
1134 1088
Harborcreek Harmony
635 221
Harper Harrington Park
900 679
Harrodsburg Hartford
820 229
Harwood Hatch
457 683
Haw River Hawley
1097 666
Hawthorne Haynes
649 455
Hazel Heart Butte
209 1291
Hedley Hedrick
446 467
Heidelberg Heiskell
446 851
Heislerville Helm
420 920
Henderson Hewitt
653 899
Higganum High Rolls Mountain Park
432 852
Highland Hills
666 224
Hinckley Hinesburg
1270 1271
Holcomb Holliday
876 859
Holloway Holstein
665 842
Honokaa Hooper
431 872
Hopewell Hopkins
233 667
Houston Hovland
1697 488
Howells Howes Cave
642 239
Hudson Humble
1576 461
Humboldt Huntington Beach
258 228
Huntsville Hurley
1543 423
Hurricane Huslia
866 194
Iliff Independence
420 637
Indian Wells Indianapolis
640 1765
Ironton Irvine
222 656
Irwinton Iselin
645 227
Issaquah Jackson
1049 239
Jaffrey Jay
202 242
Jefferson Jelm
663 669
Jermyn Johns Island
647 1057
Joliet Jones
666 1297
Jordan Valley Jordanville
216 436
Juliette June Lake
632 666
Kansas City Karnack
879 666
Keisterville Keller
854 667
Kenner Kensington
675 1092
Kent Key West
427 1071
Kilgore Kings Bay
655 241
Kingsford Heights Kingsport
1462 462
Kingsville Kirby
224 909
Kirk Kirkwood
426 12
Kirtland Kirtland Afb
10 449
Kissee Mills Kittery Point
621 215
Knowlesville Knoxville
217 1066
Lagrange Laguna Hills
464 652
Lahoma Lake Jackson
1470 1105
Lake Oswego Lakeland
620 1485
Lakeport Lakeview
929 438
Lamberton Lamy
662 231
Lanark Village Lane
1074 237
Laramie Laredo
888 659
Lawn Lawrence
852 1055
Lebanon Leetsdale
460 466
Leo Leonard
886 1094
Lepanto Lexington
228 11
Liberty Mills Lima
226 647
Linthicum Heights Lithopolis
199 257
Littleton Livonia
1285 213
Llano Loami
442 241
Logan Lohrville
871 852
Lolita Lomax
680 1386
Lonetree Lonsdale
418 895
Lorenzo Los Angeles
234 216
Louisiana Louisville
231 870
Loving Lowell
247 644
Lowville Loxahatchee
663 636
Lubbock Luray
432 628
Luzerne Malad City
472 232
Mallie Manchester
909 648
Manderson Manistique
892 1350
Manley Manor
651 461
Manquin Mansfield
230 657
Manville Marathon
921 871
Margaretville Maria Stein
657 206
Marienville Marietta
671 227
Marion Matawan
431 867
Matthews May
682 894
Mayersville Maysville
885 408
Mc Clellandtown Mc Cracken
232 642
Mc Intosh Mc Nabb
204 863
Mc Veytown Meadville
874 1106
Melbourne Mendon
436 463
Meredith Meridian
878 2229
Mesa Metairie
883 440
Methuen Miamisburg
861 463
Michigan Mifflin
243 897
Milford Mill Creek
1129 677
Milner Milwaukee
434 619
Minneapolis Minnesota Lake
682 441
Moab Mobile
696 1116
Monetta Monitor
231 216
Monmouth Beach Montandon
889 639
Montgomery Montrose
451 434
Moores Hill Mooresville
234 671
Moorhead Moravian Falls
1251 899
Moriarty Moriches
1096 239
Moro Morrisdale
658 1273
Morrowville Moscow
12 229
Moulton Mound City
636 888
Mounds Moundsville
673 1074
Mount Clemens Mount Hope
429 832
Mount Morris Mount Perry
621 661
Mount Saint Joseph Mountain Center
846 452
Mountain Park Mulberry Grove
893 1112
Munith Murfreesboro
667 1076
Murrayville Muskegon
1187 1093
Napa Naples
825 1694
Nazareth Nelson
881 444
New Boston New Ellenton
857 638
New Franken New Goshen
247 218
New Holstein New Memphis
1079 211
New Waverly New York City
648 1117
Newark Valley Newberg
651 453
Newhall Newport
1340 224
Newton Nobleboro
209 881
Nokomis Noonan
232 843
Norman Norman Park
1338 671
North Augusta North Brookfield
669 219
North Haverhill North Judson
204 635
North Las Vegas North Loup
213 900
North Prairie North Tonawanda
846 407
North Washington North Wilkesboro
882 1072
Northport Norwalk
241 1312
Norwich Notrees
459 866
O Brien Oak Hill
878 650
Oakdale Oakford
225 208
Oakland Oaks
1040 626
Oconto Falls Odessa
858 209
Ogdensburg Oklahoma City
888 435
Old Hickory Ollie
205 673
Olmsted Omaha
434 221
Oolitic Oran
1112 412
Orangeburg Orient
682 1548
Oriskany Falls Orr
448 429
Owensville Oxford
240 643
Ozawkie Paauilo
650 659
Paint Rock Palermo
666 659
Palmdale Palmyra
233 240
Paradise Valley Paris
434 212
Parker Parker Dam
227 228
Parkers Lake Parks
457 414
Parsonsfield Paulding
894 641
Paxton Payson
219 693
Pea Ridge Pearlington
242 215
Pecos Pelham
195 409
Pembroke Pembroke Township
425 1458
Pewee Valley Phenix City
419 918
Phil Campbell Philadelphia
441 1596
Phoenix Pikesville
2222 428
Pittsburgh Plainfield
862 1302
Plantersville Plymouth
876 902
Pointe Aux Pins Pomona
843 416
Port Charlotte Port Costa
440 906
Port Ewen Port Gibson
671 651
Port Richey Port Saint Lucie
433 224
Portland Powell
447 669
Powell Butte Prairie Creek
209 207
Prairie Hill Premier
897 229
Preston Princeton
220 651
Prosperity Providence
464 195
Pueblo Purmela
416 672
Putnam Quanah
223 642
Queenstown Ragland
428 234
Randolph Ranier
667 1403
Ratcliff Ravenna
886 220
Red Cliff Red River
215 1079
Redford Remer
1124 218
Reno Republic
1375 220
Reynolds Rhame
472 702
Rice Richland
8 220
Ridgeland Ringwood
413 446
River Riverton
673 920
Riverview Rochester
237 243
Rock Glen Rock Springs
237 1081
Rock Tavern Rockwell
875 222
Rockwood Rocky Mount
461 1334
Roma Romulus
1324 1099
Ronceverte Roosevelt
1046 892
Roseland Rosewood
446 224
Rossville Ruckersville
238 1036
Ruidoso Rule
660 644
Rumely Ruth
435 641
Sachse Sacramento
1060 876
Saint Amant Saint Bonaventure
1119 868
Saint Francis Saint James City
217 653
Saint Louis Saint Paul
435 866
Saint Petersburg San Angelo
892 224
San Antonio San Diego
2182 1069
San Jose Santa Monica
903 653
Sardis Sauk Rapids
222 428
Saxon Scarborough
838 669
Schaefferstown Schaumburg
887 452
Scotia Scotland
869 1074
Scotts Mills Sea Island
436 246
Seattle Sebring
9 1358
Seligman Senatobia
10 10
Seneca Shedd
638 236
Sheffield Shelter Island
900 639
Shenandoah Junction Sherman
905 1054
Shields Shippingport
1325 877
Shrewsbury Sixes
444 424
Skytop Slayden
212 428
Smackover Smith River
636 434
Smithfield Smiths Grove
202 1083
Smock Sontag
417 840
South Hero South Londonderry
445 457
South Richmond Hill Southfield
241 876
Spearsville Spencer
425 1107
Spirit Lake Sprague
232 203
Spring Spring Church
211 699
Springfield Springfield Gardens
1110 220
Springville Stanchfield
12 1088
Stayton Stephensport
426 657
Sterling City Steuben
449 842
Stillwater Stirling
433 649
Stittville Stoneham
259 219
Sturgis Sula
618 684
Summerfield Sun City
855 1352
Sunflower Superior
226 1264
Surrency Sutherland
907 1317
Syracuse Tallmansville
225 867
Tamaroa Tampa
451 437
Tekoa Texarkana
1188 412
Thida Thomas
1426 1937
Thompson Thornville
1346 247
Thrall Tickfaw
1298 458
Timberville Tiptonville
681 458
Titusville Tomahawk
663 646
Tomales Topeka
882 1373
Tower Hill Trenton
666 223
Tryon Tulsa
468 1314
Tupper Lake Turner
1319 667
Tuscarora Tyaskin
207 912
Tyler Uledi
827 1261
Umatilla Union
424 441
Unionville University
438 660
Utica Valdosta
2204 669
Valentine Vancouver
233 230
Vanderbilt Varnell
459 688
Veedersburg Vero Beach
454 640
Vienna Vinton
203 439
Viola Wales
648 635
Walnut Ridge Warren
1314 1985
Washington Washington Court House
1517 640
Washoe Valley Watertown
222 440
Wauchula Waukesha
461 470
Waupaca Waynesfield
690 220
Webster City Weeping Water
245 431
Wendel West Bethel
223 223
West Chazy West Columbia
219 639
West Decatur West Eaton
439 444
West Finley West Green
268 431
West Harrison West Hartford
671 872
West Henrietta West Long Branch
723 218
West Monroe West Palm Beach
448 636
West Sayville Westerville
1072 874
Westfir Westhampton Beach
223 434
Westport Wetmore
1428 1300
Wever Whaleyville
6 1301
Wheaton Whigham
878 671
White Sulphur Springs Whittemore
215 240
Wichita Williams
1098 221
Williamsburg Wilmette
218 835
Wilmington Wilton
1268 882
Winfield Winger
1099 858
Winslow Winter
427 410
Winthrop Wittenberg
200 224
Woods Cross Woodville
216 813
Yellowstone National Park Zaleski
217 638
Zavalla
661
#USING GROUP_BY AND SUMMARIZE FUNCTION CALUCULATING THE MIN, MAX AND AVG AMOUNT PER STATE.
fraudTest %>%
group_by(state) %>%
summarise(Average=mean(Amount) , lower=min(Amount) , Upper = max(Amount))
# A tibble: 50 x 4
state Average lower Upper
<chr> <dbl> <dbl> <dbl>
1 AK 78.4 1 1617
2 AL 64.3 1 5030
3 AR 76.2 1 8181
4 AZ 75.8 1 7321
5 CA 73.3 1 16837
6 CO 76.0 1 5187
7 CT 62.6 1 4120
8 DC 71.7 1 1121
9 FL 71.4 1 21438
10 GA 69.2 1 7886
# ... with 40 more rows
library(tidyverse)
library(ggplot2)
fraudTest %>%
filter(state=="MA") %>%
ggplot(aes(gender)) +geom_bar()
Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Gogula (2022, March 23). Data Analytics and Computational Social Science: HW_4. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscommanikanta881033/
BibTeX citation
@misc{gogula2022hw_4, author = {Gogula, Mani kanta}, title = {Data Analytics and Computational Social Science: HW_4}, url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscommanikanta881033/}, year = {2022} }