diff --git a/data/datasets/DASL.json b/data/datasets/DASL.json index 088f2cce1..5f8111df9 100644 --- a/data/datasets/DASL.json +++ b/data/datasets/DASL.json @@ -1,6130 +1,2946 @@ { "name": "DASL", "categories": [ { "name": "Medicine", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A group of female college students took a test that measured their verbal IQs and also underwent an MRI scan to measure the size of their brains (in 1000s of pixels)", "url": "https://dasl.datadescription.com/download/data/3084", "filename": "Brain-size", "name": "Brain size", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An experiment was performed to see whether sensory deprivation over an extended period of time has any effect on the alpha-wave patterns produced by the brain. To determine this, 20 subjects, inmates in a Canadian prison, were randomly split into two groups. Members of one group were placed in solitary confinement. Those in the other […] ", + "description": "An experiment was performed to see whether sensory deprivation over an extended period of time has any effect on the alpha-wave patterns produced by the brain. To determine this, 20 subjects, inmates in a Canadian prison, were randomly split into two groups. Members of one group were placed in solitary confinement. Those in the other other group were allowed to remain in their own cells. Seven days later, alpha-wave frequencies were measured for all subjects.", "url": "https://dasl.datadescription.com/download/data/3085", "filename": "Brain-waves", "name": "Brain waves", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study examined brain size (measured as pixels counted in a digitized magnetic resonance image [MRI] of a cross section of the brain) and IQ (4 performance scales of the Wechsler IQ test) for college students. The data give the Performance IQ scores and Brain Size. ", + "description": "A study examined brain size (measured as pixels counted in a digitized magnetic resonance image [MRI] of a cross section of the brain) and IQ (4 performance scales of the Wechsler IQ test) for college students. The data give the Performance IQ scores and Brain Size.", "url": "https://dasl.datadescription.com/download/data/3301", "filename": "IQ-Brain", "name": "IQ Brain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Neurology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Framingham Heart Study is one of the longest running health studies. It has followed original subjects, their children, and their grand children, looking for factors that affect cardiac health.\nThese data only include\nsubjects whose cholesterol was measured in the first exam.\nSource: “Statistical Methods in Epidemiology” by H.A.Kahn and C.T.Sempos\nSBP: Systolic blood pressure at first exam\nDBP: Diastolic blood pressure at first exam\nCHOL: Serum choloesterol at first exam\nFRW : Framingham relative weight; a standardized measure of weight adjusted for sex and height\nCIG: Number of cigarettes smoked/day at first exam\nDEATH: First biannual exam missed due to death; 0=”alive at tenth biannual exam.” (This exam wasgiven in the 18th year of the study.)\nCAUSE: 0=aliv e at exam 10, 1=Coronary Heart Disease (sudden), 2=CHD (not sudden), 3=Stroke,4=Other cardiovascular disease, 5=cancer, 6=other", + "description": "The Framingham Heart Study is one of the longest running health studies. It has followed original subjects, their children, and their grand children, looking for factors that affect cardiac health.\nThese data only include\nsubjects whose cholesterol was measured in the first exam.\nSource: \"Statistical Methods in Epidemiology\" by H.A.Kahn and C.T.Sempos\nSBP: Systolic blood pressure at first exam\nDBP: Diastolic blood pressure at first exam\nCHOL: Serum choloesterol at first exam\nFRW : Framingham relative weight; a standardized measure of weight adjusted for sex and height\nCIG: Number of cigarettes smoked/day at first exam\nDEATH: First biannual exam missed due to death; 0=\"alive at tenth biannual exam.\" (This exam was given in the 18th year of the study.)\nCAUSE: 0=aliv e at exam 10, 1=Coronary Heart Disease (sudden), 2=CHD (not sudden), 3=Stroke,4=Other cardiovascular disease, 5=cancer, 6=other", "url": "https://dasl.datadescription.com/download/data/3217", "filename": "Framingham", "name": "Framingham", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "number of days spent in hospital by patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age and sex of the patient and the diagnostic (DRG) code. \n", "url": "https://dasl.datadescription.com/download/data/3263", "filename": "Heart-attack-charges", "name": "Heart attack charges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Number of days spent in hospital by female patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age of the patient", "url": "https://dasl.datadescription.com/download/data/3264", "filename": "Heart-attack-stays", "name": "Heart attack stays", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A medical researcher measured the pulse rates (beats per minute) of a sample of randomly selected adults.", "url": "https://dasl.datadescription.com/download/data/3413", "filename": "Pulse-rates", "name": "Pulse rates", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Cardiology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Does blood pressure, on average, change with age. The data here are two categorical variables: Blood pressure categorized as High, Normal, Low, and Age categorized as under 30, 30-49, and over 50", "url": "https://dasl.datadescription.com/download/data/3077", "filename": "Blood-Pressure", "name": "Blood Pressure", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Thirteen overweight women volunteered for a study to determine whether eating specially prepared crackers before a meal could help them lose weight. The subjects were randomly assigned to eat crackers with different types of fiber (bran fiber, gum fiber, both, and a control cracker) and cycled through several of the cracker alternatives. Unfortunately, some of the women developed uncomfortable bloating and upset stomachs. Researchers suspected that some of the crackers might be at fault. The study was paid for by the manufacturers of the gum fiber, who hoped this would be a new diet tool. What would you recommend to them about the prospects for marketing their new diet cracker?", "url": "https://dasl.datadescription.com/download/data/3163", "filename": "Diet", "name": "Diet", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Medical researchers followed 6272 Swedish men for 30 years to see whether there\nwas any association between the amount of fish in their diet and prostate cancer. The original study actually used pairs of twins, which enabled the researchers to discern that the risk of cancer for those who never ate fish actually was substantially greater.", "url": "https://dasl.datadescription.com/download/data/3207", "filename": "Fish-diet", "name": "Fish diet", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student decided to investigate just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the washing Method, at four levels.\nShe suspected that the number of bacteria on her hands before washing might vary considerably from day to day. To help even out the effects of those changes, she generated random numbers to determine the order of the four treatments. Each morning, she washed her hands according to the treatment randomly chosen. Then she placed her right hand on a sterile media plate designed to encourage bacteria growth. She incubated each plate for 2 days at 36°C, after which she counted the bacteria colonies. She replicated this procedure 8 times for each of the four treatments.", + "description": "A student decided to investigate just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods - washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the washing Method, at four levels.\nShe suspected that the number of bacteria on her hands before washing might vary considerably from day to day. To help even out the effects of those changes, she generated random numbers to determine the order of the four treatments. Each morning, she washed her hands according to the treatment randomly chosen. Then she placed her right hand on a sterile media plate designed to encourage bacteria growth. She incubated each plate for 2 days at 36°C, after which she counted the bacteria colonies. She replicated this procedure 8 times for each of the four treatments.", "url": "https://dasl.datadescription.com/download/data/3254", "filename": "Hand-washing", "name": "Hand washing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The heights and weights of students in a statistics class were recorded. ", + "description": "The heights and weights of students in a statistics class were recorded.", "url": "https://dasl.datadescription.com/download/data/3265", "filename": "Heights-weights", "name": "Heights and weights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Canadian researcher John Coates took saliva samples in\nthe morning, twice a day for eight days, from 17 men working on a London\nmid-size trading f loor (trading a wide range of assets, with largest exposure to\nGerman interest rate futures), in June 2005, and classified each trader according\nto whether his testosterone level was high or low on that day (compared\nwith the trader’s median over the period). High testosterone days differed from\ntrader to trader, and high days differed from low days on average by 25% in\ntestosterone level. He also recorded the profits or losses (P&L) in pounds sterling\nof each trader during 11 am–4 pm daily.", + "description": "Canadian researcher John Coates took saliva samples in\nthe morning, twice a day for eight days, from 17 men working on a London\nmid-size trading floor (trading a wide range of assets, with largest exposure to\nGerman interest rate futures), in June 2005, and classified each trader according\nto whether his testosterone level was high or low on that day (compared\nwith the trader's median over the period). High testosterone days differed from\ntrader to trader, and high days differed from low days on average by 25% in\ntestosterone level. He also recorded the profits or losses (P&L) in pounds sterling\nof each trader during 11 am - 4 pm daily.", "url": "https://dasl.datadescription.com/download/data/3272", "filename": "Hormones", "name": "Hormones", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Since the 1960s, the Centers for Disease Control and Prevention’s National Center for Health Statistics has been collecting health and nutritional information on people of all ages and backgrounds. The National Health and Nutrition Examination Survey (NHANES) of 2001–2002, measured a wide variety of variables, including body measurements, cardiovascular fitness, blood chemistry, and demographic information on more than 11,000 individuals.\nThe file holds data on the weights of 80 men between 19 and 24 years old of average height (between 5′8″ and 5′10″ tall).", + "description": "Since the 1960s, the Centers for Disease Control and Prevention's National Center for Health Statistics has been collecting health and nutritional information on people of all ages and backgrounds. The National Health and Nutrition Examination Survey (NHANES) of 2001-2002, measured a wide variety of variables, including body measurements, cardiovascular fitness, blood chemistry, and demographic information on more than 11,000 individuals.\nThe file holds data on the weights of 80 men between 19 and 24 years old of average height (between 5'8'' and 5'10'' tall).", "url": "https://dasl.datadescription.com/download/data/3337", "filename": "Mens-Weights", "name": "Mens Weights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1879, A. A. Michelson made 100 determinations of the velocity\nof light in air using a modification of a method proposed by the French\nphysicist Foucault. The data are given here as reported by Stigler.\nThe measurements are derived from sets of often widely disparate\nnumbers of observations. The numbers are in km/sec, and have had\n299,000 subtracted from them. The currently accepted “true”\nvelocity of light in vacuum is 299,792.5 km/sec. Stigler has\napplied the corrections used by Michelson and reports that the\n“true” value appropriate for comparison to these measurements\nis 734.5. Each trial may be a summary of several experimental\nobservations.", + "description": "In 1879, A. A. Michelson made 100 determinations of the velocity\nof light in air using a modification of a method proposed by the French\nphysicist Foucault. The data are given here as reported by Stigler.\nThe measurements are derived from sets of often widely disparate\nnumbers of observations. The numbers are in km/sec, and have had\n299,000 subtracted from them. The currently accepted \"true\"\nvelocity of light in vacuum is 299,792.5 km/sec. Stigler has\napplied the corrections used by Michelson and reports that the\n\"true\" value appropriate for comparison to these measurements\nis 734.5. Each trial may be a summary of several experimental\nobservations.", "url": "https://dasl.datadescription.com/download/data/3338", "filename": "Michelson_", "name": "Michelson", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The National Health and Nutrition Examination Survey (NHANES) is a program of studies designed to assess the health and nutritional status of adults and children in the United States. The survey is unique in that it combines interviews and physical examinations. ", + "description": "The National Health and Nutrition Examination Survey (NHANES) is a program of studies designed to assess the health and nutritional status of adults and children in the United States. The survey is unique in that it combines interviews and physical examinations.", "url": "https://dasl.datadescription.com/download/data/3365", "filename": "NHANES", "name": "NHANES", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Body temperatures of a random sample of 52 healthy adults, reported in degrees Fahrenheit. ", + "description": "Body temperatures of a random sample of 52 healthy adults, reported in degrees Fahrenheit.", "url": "https://dasl.datadescription.com/download/data/3368", "filename": "Normal-temperature", "name": "Normal temperature", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Obesity and exercise", "url": "https://dasl.datadescription.com/download/data/3372", "filename": "Obesity-and-exercise", "name": "Obesity and exercise", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Story: \nThe Pima Indians of southern Arizona are a unique community. Their ancestors were among the first people to cross over into the Americas some 30,000 years ago. For at least two millennia, they have lived in the Sonoran Desert near the Gila River. Known throughout history as a generous people, they have given of themselves for the past 30 years helping researchers at the National Institutes of Health study certain diseases like diabetes and obe-sity. Young Pima Indians often marry other Pimas, making them an ideal group for genetic researchers to study. Pimas also have an extremely high incidence of diabetes.\nResearchers investigating factors for increased risk of diabetes examined data on 768 adult women of Pima Indian heritage. One possible predictor is the body mass index, BMI, calculated as weight/height2, where weight is measured in kilograms and height in meters. We are interested in the relationship between BMI and the incidence of diabetes. ", + "description": "Story: \nThe Pima Indians of southern Arizona are a unique community. Their ancestors were among the first people to cross over into the Americas some 30,000 years ago. For at least two millennia, they have lived in the Sonoran Desert near the Gila River. Known throughout history as a generous people, they have given of themselves for the past 30 years helping researchers at the National Institutes of Health study certain diseases like diabetes and obe-sity. Young Pima Indians often marry other Pimas, making them an ideal group for genetic researchers to study. Pimas also have an extremely high incidence of diabetes.\nResearchers investigating factors for increased risk of diabetes examined data on 768 adult women of Pima Indian heritage. One possible predictor is the body mass index, BMI, calculated as weight/height2, where weight is measured in kilograms and height in meters. We are interested in the relationship between BMI and the incidence of diabetes.", "url": "https://dasl.datadescription.com/download/data/3394", "filename": "Pima-indians", "name": "Pima indians", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Pregnancies", "url": "https://dasl.datadescription.com/download/data/3404", "filename": "Pregnancies", "name": "Pregnancies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Sleep Foundation (www.sleepfoundation.org) says that adults should get at least 7 hours of sleep each night. A survey of students at a small school in the northeast U.S. asked, among other things, “How much did you sleep last night?” The data are the responses. ", + "description": "The Sleep Foundation (www.sleepfoundation.org) says that adults should get at least 7 hours of sleep each night. A survey of students at a small school in the northeast U.S. asked, among other things, \"How much did you sleep last night?\" The data are the responses.", "url": "https://dasl.datadescription.com/download/data/3453", "filename": "Sleep", "name": "Sleep", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Common" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A pharmaceutical company tested three formulations of a pain relief medicine for migraine headache sufferers. For the experiment, 27 volunteers were selected and 9 were randomly assigned to one of three drug formulations. The subjects were instructed to take the drug during their next migraine headache episode and to report their pain on a scale […] ", + "description": "A pharmaceutical company tested three formulations of a pain relief medicine for migraine headache sufferers. For the experiment, 27 volunteers were selected and 9 were randomly assigned to one of three drug formulations. The subjects were instructed to take the drug during their next migraine headache episode and to report their pain on a scale of 1 = no pain to 10 = extreme pain 30 minutes after taking the drug.", "url": "https://dasl.datadescription.com/download/data/3053", "filename": "Analgesics", "name": "Analgesics", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study compared the effectiveness of several antidepressants by examining the experiments in which they had passed the FDA requirements. Each of those experiments compared the active drug with a placebo, an inert pill given to some of the subjects. In each experiment some patients treated with the placebo had improved, a phenomenon called the […] ", + "description": "A study compared the effectiveness of several antidepressants by examining the experiments in which they had passed the FDA requirements. Each of those experiments compared the active drug with a placebo, an inert pill given to some of the subjects. In each experiment some patients treated with the placebo had improved, a phenomenon called the placebo effect. Patients’ depression levels were evaluated on the Hamilton Depression Rating Scale, where larger numbers indicate greater improvement. (The Hamilton scale is a widely accepted standard that was used in each of the independently run studies.) It is well-understood that placebos can have a strong therapeutic effect on depression, but separating the placebo effect from the medical effect can be difficult.", "url": "https://dasl.datadescription.com/download/data/3054", "filename": "Antidepressants", "name": "Antidepressants", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student investigated just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the […] ", + "description": "A student investigated just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods - washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the washing Method, at four levels. She suspected that the number of bacteria on her hands before washing might vary considerably from day to day. To help even out the effects of those changes, she generated random numbers to determine the order of the four treatments. Each morning, she washed her hands according to the treatment randomly chosen. Then she placed her right hand on a sterile media plate designed to encourage bacteria growth. She incubated each plate for 2 days at 36°C, after which she counted the bacteria colonies. She replicated this procedure 8 times for each of the four treatments.", "url": "https://dasl.datadescription.com/download/data/3561", "filename": "Baterial-soap", "name": "Baterial soap", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements of 250 men of various ages. The percent of a man’s body that is fat is a matter of concern for health and fitness. But the %bodyfat is difficult and expensive to measure accurately. These data offer correct %bodyfat measurements along with a variety of easier to find measures. Can you build a model ", + "description": "Measurements of 250 men of various ages. The percent of a man's body that is fat is a matter of concern for health and fitness. But the %bodyfat is difficult and expensive to measure accurately. These data offer correct %bodyfat measurements along with a variety of easier to find measures. Can you build a model ", "url": "https://dasl.datadescription.com/download/data/30790", "filename": "Bodyfat", "name": "Bodyfat", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Burger King publishes full nutrition information on its menu. These data are for the foods on the menu recently. (Visit the site listed as the reference for the most current list.) ", "url": "https://dasl.datadescription.com/download/data/3089", "filename": "Burger-King-items", "name": "Burger King items", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Nutritionists are concerned that people have a good breakfast. But what does that mean? students collected nutrition information from the nutrition labels of cereals in one supermarket. ", + "description": "Nutritionists are concerned that people have a good breakfast. But what does that mean? students collected nutrition information from the nutrition labels of cereals in one supermarket.", "url": "https://dasl.datadescription.com/download/data/3107", "filename": "Cereals", "name": "Cereals", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers at the University of Denver Infant Study Center wondered whether temperature might influence the age at which babies learn to crawl. Perhaps the extra clothing that babies wear in cold weather would restrict movement and delay the age at which they started crawling. Data were collected on 208 boys and 206 girls. Parents reported the month of the baby’s birth and the age (in weeks) at which their child first crawled. The table gives the average Temperature (°F) when the babies were 6 months old and average Crawling Age (in weeks) for each month of the year.", + "description": "Researchers at the University of Denver Infant Study Center wondered whether temperature might influence the age at which babies learn to crawl. Perhaps the extra clothing that babies wear in cold weather would restrict movement and delay the age at which they started crawling. Data were collected on 208 boys and 206 girls. Parents reported the month of the baby's birth and the age (in weeks) at which their child first crawled. The table gives the average Temperature (°F) when the babies were 6 months old and average Crawling Age (in weeks) for each month of the year.", "url": "https://dasl.datadescription.com/download/data/3143", "filename": "Crawling", "name": "Crawling", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Life expectancy at birth, TV’s per capita, and doctor’s per capita for countries of the world. Doctors predict life expectancy, but is that causal? TVs also predict life expectancy. ", + "description": "Life expectancy at birth, TV's per capita, and doctor's per capita for countries of the world. Doctors predict life expectancy, but is that causal? TVs also predict life expectancy.", "url": "https://dasl.datadescription.com/download/data/3169", "filename": "life-expectancy", "name": "Doctors and life expectancy", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Fertility (births/woman) and Female life expectancy for 219 countries of the world. (Data is available on both variables for only 200). How is life expectancy related to fertility? Are there any outliers and, if so, what do they indicate", "url": "https://dasl.datadescription.com/download/data/3202", "filename": "Fertility-and-life-expectancy-2014", "name": "Fertility and life expectancy 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Gossett says in his seminal 1908 paper: “Before I had succeeded in solving my problem analytically, I had endeavoured to do so empirically. The material used was a correlation table containing the height and left middle finger measurements of 3000 criminals, from a paper by W. R. MacDonell (Biometrika, Vol. I., p. 219).” His method was to write the 3000 finger length values on cards, shuffle them thoroughly, and the deal out 750 hands of 4 cards. For each hand he then calculated (with a mechanical calculator) the mean and standard deviation. (Note; He divided by n (= 4) and not by n-1 (= 3).) He then found values of ybar – the population mean (which he knew because he had the population; it is 11.5474) and divided each by the standard deviation. The resulting values formed the distribution which he then correctly described.\nThe finger measurements were originally given in mm and the heights in feet and inches. They have been converted to cm (at https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/crimtab.html). The midpoint of intervals are used where MacDonnel gives a range of values.", + "description": "Gossett says in his seminal 1908 paper: \"Before I had succeeded in solving my problem analytically, I had endeavoured to do so empirically. The material used was a correlation table containing the height and left middle finger measurements of 3000 criminals, from a paper by W. R. MacDonell (Biometrika, Vol. I., p. 219).\" His method was to write the 3000 finger length values on cards, shuffle them thoroughly, and the deal out 750 hands of 4 cards. For each hand he then calculated (with a mechanical calculator) the mean and standard deviation. (Note; He divided by n (= 4) and not by n-1 (= 3).) He then found values of ybar - the population mean (which he knew because he had the population; it is 11.5474) and divided each by the standard deviation. The resulting values formed the distribution which he then correctly described.\nThe finger measurements were originally given in mm and the heights in feet and inches. They have been converted to cm (at https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/crimtab.html). The midpoint of intervals are used where MacDonnel gives a range of values.", "url": "https://dasl.datadescription.com/download/data/3204", "filename": "Fingers-and-Heights", "name": "Fingers and Heights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Is it true that students\ntend to gain weight during their first year in college? Cornell Professor of Nutrition David Levitsky recruited students from two large sections\nof an introductory health course. Although they were\nvolunteers, they appeared to match the rest of the freshman\nclass in terms of demographic variables such as sex\nand ethnicity. The students were weighed during the first\nweek of the semester, then again 12 weeks later. Based\non Professor Levitsky’s data, estimate the mean weight\ngain in first-semester freshmen and comment on the\n“freshman 15.” (Weights are in pounds.)", + "description": "Is it true that students\ntend to gain weight during their first year in college? Cornell Professor of Nutrition David Levitsky recruited students from two large sections\nof an introductory health course. Although they were\nvolunteers, they appeared to match the rest of the freshman\nclass in terms of demographic variables such as sex\nand ethnicity. The students were weighed during the first\nweek of the semester, then again 12 weeks later. Based\non Professor Levitsky's data, estimate the mean weight\ngain in first-semester freshmen and comment on the\n\"freshman 15\". (Weights are in pounds.)", "url": "https://dasl.datadescription.com/download/data/3218", "filename": "Freshman-15", "name": "Freshman 15", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For humans, pregnancy lasts about 280 days. In other species of animals, the length of time from conception to birth varies. Is there any evidence that the gestation period is related to the animal’s life span? The data give Gestation Period (in days) and Life Expectancy (in years) for 18 species of mammals.", + "description": "For humans, pregnancy lasts about 280 days. In other species of animals, the length of time from conception to birth varies. Is there any evidence that the gestation period is related to the animal's life span? The data give Gestation Period (in days) and Life Expectancy (in years) for 18 species of mammals.", "url": "https://dasl.datadescription.com/download/data/3241", "filename": "Gestation_", "name": "Gestation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Physical therapists measure a patient’s manual dexterity with a simple task. The patient\npicks up small cylinders from a 4 * 4 frame with one hand, flips them over (still with one\nhand), and replaces them in the frame. The task is timed for all 16 cylinders. The tool was originally normed for adults. In a follow-up study, researchers\nused this tool to study how dexterity improves with age in children and establish norms against which to compare a patient’s dexterity.", + "description": "Physical therapists measure a patient's manual dexterity with a simple task. The patient\npicks up small cylinders from a 4 * 4 frame with one hand, flips them over (still with one\nhand), and replaces them in the frame. The task is timed for all 16 cylinders. The tool was originally normed for adults. In a follow-up study, researchers\nused this tool to study how dexterity improves with age in children and establish norms against which to compare a patient's dexterity.", "url": "https://dasl.datadescription.com/download/data/3253", "filename": "Hand-dexterity", "name": "Hand dexterity", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", + "description": "Fitting someone for a hearing aid requires assessing the patient's hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient's hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", "url": "https://dasl.datadescription.com/download/data/3261", "filename": "Hearing", "name": "Hearing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", + "description": "Fitting someone for a hearing aid requires assessing the patient's hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient's hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", "url": "https://dasl.datadescription.com/download/data/3262", "filename": "Hearing-4-lists", "name": "Hearing 4 lists", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data hold measurements on people of various ages. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also Igf13, which concentrates on children. ", + "description": "The data hold measurements on people of various ages. The main variable of interest is the level of insulin-like growth factor (ig\u0192) (J. Clin. Endocrinol. Metab. 78(3): 744-752, March 1994). Each row in the data set corresponds to one individual. See also Igf13, which concentrates on children.", "url": "https://dasl.datadescription.com/download/data/3562", "filename": "Igf", "name": "Igf", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements on children under 13 years of age. Most of the data was collected from physical examinations in schools. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also the dataset Igf, which includes adults.", + "description": "Measurements on children under 13 years of age. Most of the data was collected from physical examinations in schools. The main variable of interest is the level of insulin-like growth factor (ig\u0192) (J. Clin. Endocrinol. Metab. 78(3): 744-752, March 1994). Each row in the data set corresponds to one individual. See also the dataset Igf, which includes adults.", "url": "https://dasl.datadescription.com/download/data/3563", "filename": "Igf13", "name": "Igf13", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Homer’s Iliad is an epic poem, compiled around 800 BCE, that describes several weeks of the last year of the 10-year siege of Troy (Ilion) by the Achaeans. The story centers on the rage of the great warrior Achilles. But it includes many details of injuries and outcomes, and is thus the oldest record of Greek medicine. The data report 146 recorded injuries for which both injury site and outcome are provided in the Illiad. Are some kinds of injuries more lethal than others?", + "description": "Homer's Iliad is an epic poem, compiled around 800 BCE, that describes several weeks of the last year of the 10-year siege of Troy (Ilion) by the Achaeans. The story centers on the rage of the great warrior Achilles. But it includes many details of injuries and outcomes, and is thus the oldest record of Greek medicine. The data report 146 recorded injuries for which both injury site and outcome are provided in the Illiad. Are some kinds of injuries more lethal than others?", "url": "https://dasl.datadescription.com/download/data/3281", "filename": "Illiad-Injuries", "name": "Illiad Injuries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1974, the Bellevue-Stratford Hotel in Philadelphia was the scene of an outbreak of\nwhat later became known as legionnaires’ disease. The cause of the disease was finally discovered to be bacteria that thrived in the air-conditioning units of the hotel.\nOwners of the Rip Van Winkle Motel, hearing about the Bellevue-Stratford, replace their air-conditioning system. The data are the bacteria counts in the air of eight rooms, before and after a new air-conditioning system was installed (measured in colonies per cubic foot of air). Has the new system has succeeded in lowering the bacterial count?", + "description": "In 1974, the Bellevue-Stratford Hotel in Philadelphia was the scene of an outbreak of\nwhat later became known as legionnaires' disease. The cause of the disease was finally discovered to be bacteria that thrived in the air-conditioning units of the hotel.\nOwners of the Rip Van Winkle Motel, hearing about the Bellevue-Stratford, replace their air-conditioning system. The data are the bacteria counts in the air of eight rooms, before and after a new air-conditioning system was installed (measured in colonies per cubic foot of air). Has the new system has succeeded in lowering the bacterial count?", "url": "https://dasl.datadescription.com/download/data/3310", "filename": "Legionnaires-disease", "name": "Legionnaires disease", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2015 the Council of Europe published a report entitled The European School Survey Project on Alcohol and Other Drugs (www.espad.org). Among other issues, the survey investigated the percent-ages of 16-year-olds who had used marijuana. The data are the results for 38 European countries. ", + "description": "In 2015 the Council of Europe published a report entitled The European School Survey Project on Alcohol and Other Drugs (www.espad.org). Among other issues, the survey investigated the percent-ages of 16-year-olds who had used marijuana. The data are the results for 38 European countries.", "url": "https://dasl.datadescription.com/download/data/3326", "filename": "Marijuana-2015", "name": "Marijuana 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers in Food Science studied how big people’s mouths tend to be. They measured mouth volume by pouring water into the mouths of subjects who lay on their backs. Unless this is your idea of a good time, it would be helpful to have a model to estimate mouth volume more simply. Fortunately, mouth volume is related to height. (Mouth volume is measured in cubic centimeters and height in meters.)", + "description": "Researchers in Food Science studied how big people's mouths tend to be. They measured mouth volume by pouring water into the mouths of subjects who lay on their backs. Unless this is your idea of a good time, it would be helpful to have a model to estimate mouth volume more simply. Fortunately, mouth volume is related to height. (Mouth volume is measured in cubic centimeters and height in meters.)", "url": "https://dasl.datadescription.com/download/data/3345", "filename": "Mouth-volume", "name": "Mouth volume", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A hospital in Nashville is considering changes to the prenatal care they offer. They collected the gestation times of 70 pregnancies that ended in live births. The established human gestation time is 266 days. ", + "description": "A hospital in Nashville is considering changes to the prenatal care they offer. They collected the gestation times of 70 pregnancies that ended in live births. The established human gestation time is 266 days.", "url": "https://dasl.datadescription.com/download/data/3359", "filename": "Nashville", "name": "Nashville", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Neck size", "url": "https://dasl.datadescription.com/download/data/3360", "filename": "Neck-size", "name": "Neck size", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Paralyzed veterans", "url": "https://dasl.datadescription.com/download/data/3388", "filename": "Paralyzed-veterans", "name": "Paralyzed veterans", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Paralyzed Veterans of America (PVA) is a Congressionally chartered veterans’ service organization that represents the interests of paralyzed veterans. The agency provides a range of services to veterans who have spinal cord injury or dysfunction. It derives most of its funding from contributions. The data set PVA contains a sample of the data on donors who recently gave money to the organization.", + "description": "The Paralyzed Veterans of America (PVA) is a Congressionally chartered veterans' service organization that represents the interests of paralyzed veterans. The agency provides a range of services to veterans who have spinal cord injury or dysfunction. It derives most of its funding from contributions. The data set PVA contains a sample of the data on donors who recently gave money to the organization.", "url": "https://dasl.datadescription.com/download/data/3415", "filename": "PVA", "name": "PVA", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "People with spinal cord injuries may lose function in some, but not all, of their muscles. The ability to push oneself up is particularly important for shifting position when seated and for transferring into and out of wheelchairs. Surgeons compared two operations to restore the ability to push up in children. ", + "description": "People with spinal cord injuries may lose function in some, but not all, of their muscles. The ability to push oneself up is particularly important for shifting position when seated and for transferring into and out of wheelchairs. Surgeons compared two operations to restore the ability to push up in children.", "url": "https://dasl.datadescription.com/download/data/3479", "filename": "Tendon-transf", "name": "Tendon transfers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": " In a random sample of U.S. adults surveyed in December 2011, Pew Research asked how important it is “to you personally” to be successful in a high-paying career or profession. Responses are recorded by sex and age. ", + "description": " In a random sample of U.S. adults surveyed in December 2011, Pew Research asked how important it is \"to you personally\" to be successful in a high-paying career or profession. Responses are recorded by sex and age.", "url": "https://dasl.datadescription.com/download/data/3071", "filename": "Being-successful", "name": "Being successful", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A researcher at Cornell University wanted to know how friendship might affect simple sales such as this. She randomly divided subjects into two groups and gave each group descriptions of items they might want to buy. One group was told to imagine buying from a friend whom they expected to see again. The other group […] ", + "description": "A researcher at Cornell University wanted to know how friendship might affect simple sales such as this. She randomly divided subjects into two groups and gave each group descriptions of items they might want to buy. One group was told to imagine buying from a friend whom they expected to see again. The other group was told to imagine buying from a stranger. The data are the prices offered by the experiment participants.", "url": "https://dasl.datadescription.com/download/data/3090", "filename": "Buy-from-a-friend", "name": "Buy from a friend", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The September 1998 issue of the American T\nPsychologist published an article by Kraut et al. that\nreported on an experiment examining “the social and\npsychological impact of the Internet on 169 people in\n73 households during their first 1 to 2 years online.” In the\nexperiment, 73 households were offered free Internet access\nfor 1 or 2 years in return for allowing their time and activity\nonline to be tracked. The members of the households who\nparticipated in the study were also given a battery of tests\nat the beginning and again at the end of the study. The\nconclusion of the study made news headlines: Those who\nspent more time online tended to be more depressed at the\nend of the experiment.\nThe news reports about this study clearly concluded that\nusing the Internet causes depression. Is such a conclusion warranted?", + "description": "The September 1998 issue of the American T\nPsychologist published an article by Kraut et al. that\nreported on an experiment examining \"the social and\npsychological impact of the Internet on 169 people in\n73 households during their first 1 to 2 years online.\" In the\nexperiment, 73 households were offered free Internet access\nfor 1 or 2 years in return for allowing their time and activity\nonline to be tracked. The members of the households who\nparticipated in the study were also given a battery of tests\nat the beginning and again at the end of the study. The\nconclusion of the study made news headlines: Those who\nspent more time online tended to be more depressed at the\nend of the experiment.\nThe news reports about this study clearly concluded that\nusing the Internet causes depression. Is such a conclusion warranted?", "url": "https://dasl.datadescription.com/download/data/3158", "filename": "Depression-and-the-internet", "name": "Depression and the internet", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A Harvard psychologist recruited 75 female hotel maids to participate in a study. She randomly selected 41 of them, whom she informed (truthfully) that the work they do satisfies the Surgeon General’s recommendations for an active lifestyle, providing examples to show that their work is good exercise. The other 34 were told nothing. Various characteristics, such as weight, body fat, body mass index and blood pressure were recorded at the start of the study and again after four weeks. The researcher was interested in whether the information she provided would result in measurable physical changes. If there is a difference, it might challenge our understanding of the placebo effect because being informed could make a difference.", + "description": "A Harvard psychologist recruited 75 female hotel maids to participate in a study. She randomly selected 41 of them, whom she informed (truthfully) that the work they do satisfies the Surgeon General's recommendations for an active lifestyle, providing examples to show that their work is good exercise. The other 34 were told nothing. Various characteristics, such as weight, body fat, body mass index and blood pressure were recorded at the start of the study and again after four weeks. The researcher was interested in whether the information she provided would result in measurable physical changes. If there is a difference, it might challenge our understanding of the placebo effect because being informed could make a difference.", "url": "https://dasl.datadescription.com/download/data/3273", "filename": "Hotel-maids", "name": "Hotel maids", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In an experiment to test ginkgo bloba, subjects were assigned randomly to take ginkgo biloba supplements or a placebo. Their memory was tested to see whether it improved. ", + "description": "In an experiment to test ginkgo bloba, subjects were assigned randomly to take ginkgo biloba supplements or a placebo. Their memory was tested to see whether it improved.", "url": "https://dasl.datadescription.com/download/data/3335", "filename": "Memory", "name": "Memory", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The New York Times combined survey data (economix.blogs.nytimes.com/2013/\n07/10/working-parents-wanting-fewer-hours/) with data from\nthe U.S. Bureau of Labor Statistics (BLS) (www.bls.gov/news\n.release/archives/famee_04262013.htm) comparing how mothers\nand fathers would like to allocate their time compared with\nwhat they actually do. They asked a sample of parents with\nchildren 18 or under:\n“If money were no object, and you were free to do whatever\nyou wanted, would you stay at home, would you work full\ntime, or would you work part time?”\nPercent of respondents to this question choosing each\nalternative are reported in the “Desire” columns of the table.\nData in the “Actual” column are from the BLS. (Note:\n“Unemployed” = unemployed and actively seeking work.)\nThe table reports column percents (which may not add to\n100% due to rounding)", + "description": "The New York Times combined survey data (economix.blogs.nytimes.com/2013/\n07/10/working-parents-wanting-fewer-hours/) with data from\nthe U.S. Bureau of Labor Statistics (BLS) (www.bls.gov/news\n.release/archives/famee_04262013.htm) comparing how mothers\nand fathers would like to allocate their time compared with\nwhat they actually do. They asked a sample of parents with\nchildren 18 or under:\n\"If money were no object, and you were free to do whatever\nyou wanted, would you stay at home, would you work full\ntime, or would you work part time?\"\nPercent of respondents to this question choosing each\nalternative are reported in the \"Desire\" columns of the table.\nData in the \"Actual\" column are from the BLS. (Note:\n\"Unemployed\" = unemployed and actively seeking work.)\nThe table reports column percents (which may not add to\n100% due to rounding)", "url": "https://dasl.datadescription.com/download/data/3344", "filename": "Mothers-fathers-aspirations", "name": "Mothers and fathers aspirations", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In a study published in the journal Psychological Science, Rauscher, Shaw, and Ky reported that when students were given a spatial reasoning section of a standard IQ test, those who listened to Mozart for 10 minutes improved their scores more than those who simply sat quietly. ", + "description": "In a study published in the journal Psychological Science, Rauscher, Shaw, and Ky reported that when students were given a spatial reasoning section of a standard IQ test, those who listened to Mozart for 10 minutes improved their scores more than those who simply sat quietly.", "url": "https://dasl.datadescription.com/download/data/3350", "filename": "Mozart", "name": "Mozart", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Researchers interviewed participants to find some who reliably fell asleep and awoke on one side and who could remember their dreams. They found 63 participants, of whom 41 were right-side sleepers and 22 slept on their left side. Then they interviewed them about their dreams. Of the 41 right-side sleepers, only 6 reported often having nightmares. But of the 22 left-side sleepers 9 reported nightmares. Is the difference significant?", "url": "https://dasl.datadescription.com/download/data/3366", "filename": "Nightmares", "name": "Nightmares", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Stereograms appear to be composed entirely of\nrandom dots. However, they contain separate images that a\nviewer can “fuse” into a three-dimensional (3D) image by staring\nat the dots while defocusing the eyes. An experiment was\nperformed to determine whether knowledge of the embedded\nimage affected the time required for subjects to fuse the images.\nOne group of subjects (group NV) received no information or\njust verbal information about the shape of the embedded object.\nA second group (group VV) received both verbal information\nand visual information (specifically, a drawing of the object).\nThe experimenters measured how many seconds it took for the\nsubject to report that he or she saw the 3D image.", + "description": "Stereograms appear to be composed entirely of\nrandom dots. However, they contain separate images that a\nviewer can \"fuse\" into a three-dimensional (3D) image by staring\nat the dots while defocusing the eyes. An experiment was\nperformed to determine whether knowledge of the embedded\nimage affected the time required for subjects to fuse the images.\nOne group of subjects (group NV) received no information or\njust verbal information about the shape of the embedded object.\nA second group (group VV) received both verbal information\nand visual information (specifically, a drawing of the object).\nThe experimenters measured how many seconds it took for the\nsubject to report that he or she saw the 3D image.", "url": "https://dasl.datadescription.com/download/data/3459", "filename": "Stereograms", "name": "Stereograms", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Psychology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study examined the health risks of smoking measured the cholesterol levels of people who had smoked for at least 25 years and people of similar ages who had smoked for no more than 5 years and then stopped", "url": "https://dasl.datadescription.com/download/data/3111", "filename": "Cholesterol-and-smoking", "name": "Cholesterol and smoking", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on 816 brands of cigarettes. What relationships are there among the nicotine content, tars, and CO? Are any brands unusually high or low in nicotine? Can you account for that? ", "url": "https://dasl.datadescription.com/download/data/3113", "filename": "Cigarettes", "name": "Cigarettes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Researchers measured the concentration (nanograms per milliliter) of cotinine in the blood\nof three groups of people: nonsmokers who have not been exposed to smoke, nonsmokers\nwho have been Exposed To Smoke (ETS), and smokers. Cotinine is left in the blood when\nthe body metabolizes nicotine, so its value is a direct measurement of the effect of passive smoke exposure.", "url": "https://dasl.datadescription.com/download/data/3389", "filename": "Passive-smoke", "name": "Passive smoke", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Centers for Disease Control and Prevention\ntrack cigarette smoking in the United States. How has the percentage of people who smoke changed since the danger became clear during the last half of the 20th\ncentury? The data give percentages of smokers among\nmen 18–24 years of age, as estimated by surveys, from 1965\nthrough 2014.", + "description": "The Centers for Disease Control and Prevention\ntrack cigarette smoking in the United States. How has the percentage of people who smoke changed since the danger became clear during the last half of the 20th\ncentury? The data give percentages of smokers among\nmen 18-24 years of age, as estimated by surveys, from 1965\nthrough 2014.", "url": "https://dasl.datadescription.com/download/data/3455", "filename": "Smoking-2014", "name": "Smoking 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "There has been a steady decline the the percentage of pregnant mothers who smoke. These data document the trend. The run only until 2011, which appears to be the latest date for which the CDC has data. ", + "description": "There has been a steady decline the the percentage of pregnant mothers who smoke. These data document the trend. The run only until 2011, which appears to be the latest date for which the CDC has data.", "url": "https://dasl.datadescription.com/download/data/3456", "filename": "Smoking-and-Pregnancy-2011", "name": "Smoking and Pregnancy 2011", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Smoking" } ] }, { "name": "Nature", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Froliger and Kane measured the pH (a scale on which a value of 7 is neutral and values below 7 are acidic) of water collected from precipitation events in Allegheny County, Pennsylvania between December 20, 1973 and May 23, 1974. Display the distribution of these values and describe with words and numbers what you see. ", + "description": "Froliger and Kane measured the pH (a scale on which a value of 7 is neutral and values below 7 are acidic) of water collected from precipitation events in Allegheny County, Pennsylvania between December 20, 1973 and May 23, 1974. Display the distribution of these values and describe with words and numbers what you see.", "url": "https://dasl.datadescription.com/download/data/3041", "filename": "acid-rain", "name": "Acid rain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the average January Temperature (in degrees Fahrenheit) and Latitude (in degrees north of the equator) for 59 U.S. cities. How are they related? ", "url": "https://dasl.datadescription.com/download/data/3114", "filename": "City-climate", "name": "City climate", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3115", "filename": "City-temperatures", "name": "City temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Global temperature from https://www.ncdc.noaa.gov/cag/data-info/global Global temperature anomaly data come from the Global Historical Climatology Network-Monthly (GHCN-M) data set and International Comprehensive Ocean-Atmosphere Data Set (ICOADS), which have data from 1880 to the present. These two datasets are blended into a single product to produce the combined global land and ocean temperature anomalies. The available timeseries of global-scale temperature anomalies are calculated with respect to the 20th century average, while the mapping tool displays global-scale temperature anomalies with respect to the 1981-2010 base period. For more information on these anomalies, please visit Global Surface Temperature Anomalies. CO2 from ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_annmean_mlo.txt DJIA from https://www.measuringworth.com\n\nScientists claim that changes in the mean global temperature are primarily due to changes in CO2 levels. Both trends are here from 1959 to 2016. For an alternative, the data includes the annual closing price of the Dow Jones Industrial Average. Can it predict global temperature?", "url": "https://dasl.datadescription.com/download/data/3116", "filename": "Climate-change-2016", "name": "Climate change 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hurricane frequencies", "url": "https://dasl.datadescription.com/download/data/3279", "filename": "Hurricane-frequencies", "name": "Hurricane frequencies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hurricane history", "url": "https://dasl.datadescription.com/download/data/3280", "filename": "Hurricane-history", "name": "Hurricane history", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The barometric pressure at the center of a hurricane is often used to measure the strength of the hurricane because it can predict the maximum wind speed of the storm. How well is the wind speed predicted by the barometric pressure? ", "url": "https://dasl.datadescription.com/download/data/3278", "filename": "Hurricanes-2015", "name": "Hurricanes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Los Angeles Almanac reports a number of variables about the weather in LA. Among them is the annual rainfall, reported here for 1991-2018. It is worthwhile to look up any outliers. ", + "description": "The Los Angeles Almanac reports a number of variables about the weather in LA. Among them is the annual rainfall, reported here for 1991-2018. It is worthwhile to look up any outliers.", "url": "https://dasl.datadescription.com/download/data/3555", "filename": "LA-rainfall", "name": "LA rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Is global climate change leading to an increase in the number of major hurricanes? The data gives the number of hurricanes classified as major hurricanes in the Atlantic Ocean each year from 1944 through 2013, as reported by NOAA: ", "url": "https://dasl.datadescription.com/download/data/3323", "filename": "Major-hurricane-2013", "name": "Major hurricanes 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tornadoes 2015\nSource: www.nws.noaa.gov/om/hazstats/resources/weather_fatalities.pdf", "url": "https://dasl.datadescription.com/download/data/3488", "filename": "Tornadoes", "name": "Tornadoes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tracking hurricanes 2015", "url": "https://dasl.datadescription.com/download/data/3493", "filename": "Tracking-hurricanes-2015", "name": "Tracking hurricanes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The National Hurricane Center (NHC) of the National Oceanic and Atmospheric\nAdministration (NOAA) tries to predict the path each hurricane will take. But hurricanes\ntend to wander around aimlessly and are pushed by fronts and other weather\nphenomena in their area, so they are notoriously difficult to predict. Even relatively small changes in a hurricane’s track can make big differences in the damage it causes. The data give the mean error in nautical miles of the NHC’s 72-hour predictions of Atlantic hurricanes for 1970-2017. NOAA refers to these errors as the Forecast\nerror or the Prediction error and reports annual results.", + "description": "The National Hurricane Center (NHC) of the National Oceanic and Atmospheric\nAdministration (NOAA) tries to predict the path each hurricane will take. But hurricanes\ntend to wander around aimlessly and are pushed by fronts and other weather\nphenomena in their area, so they are notoriously difficult to predict. Even relatively small changes in a hurricane's track can make big differences in the damage it causes. The data give the mean error in nautical miles of the NHC's 72-hour predictions of Atlantic hurricanes for 1970-2017. NOAA refers to these errors as the Forecast\nerror or the Prediction error and reports annual results.", "url": "https://dasl.datadescription.com/download/data/3494", "filename": "Tracking-hurricanes-2016", "name": "Tracking hurricanes 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tsunamis 2016", "url": "https://dasl.datadescription.com/download/data/3500", "filename": "Tsunamis-2016", "name": "Tsunamis 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "http://www.ngdc.noaa.gov/hazard/tsu_db.shtml Extracted Event Validity 3 and 4 Cause Codes 1-5 Event Validity: 4 = definite tsunami 3 = probable tsunami 2 = questionable tsunami 1 = very doubtful tsunami 0 = event that only caused a seiche or disturbance in an inland river -1 = erroneous entry Cause Code: Valid values: 0 to 11 The source of the tsunami: 0 = Unknown 1 = Earthquake 2 = Questionable Earthquake 3 = Earthquake and Landslide 4 = Volcano and Earthquake 5 = Volcano, Earthquake, and Landslide 6 = Volcano 7 = Volcano and Landslide 8 = Landslide 9 = Meteorological 10 = Explosion 11 = Astronomical Tide", "url": "https://dasl.datadescription.com/download/data/3501", "filename": "Tsunamis-2018", "name": "Tsunamis 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Weather forecasts", "url": "https://dasl.datadescription.com/download/data/3519", "filename": "Weather-forecasts", "name": "Weather forecasts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Wind speed", "url": "https://dasl.datadescription.com/download/data/3528", "filename": "Wind-speed", "name": "Wind speed", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Weather" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The annual number of deaths from floods in the United states from 1995 through 2015. Years are not provided, but the data values are in time order.", "url": "https://dasl.datadescription.com/download/data/3211", "filename": "Floods-2015", "name": "Floods 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Climate scientists have been observing the extent of sea ice using satellite observations. Many have expressed concern because, since 1980, the extent of sea ice has declined precipitously—possibly due to global climate change. But a multiple regression of Extent on temp and year gives a coefficient for temp that is essentially zero. ", + "description": "Climate scientists have been observing the extent of sea ice using satellite observations. Many have expressed concern because, since 1980, the extent of sea ice has declined precipitously - possibly due to global climate change. But a multiple regression of Extent on temp and year gives a coefficient for temp that is essentially zero.", "url": "https://dasl.datadescription.com/download/data/3443", "filename": "Sea-ice", "name": "Sea ice", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "As part of the course work, a class at an upstate\nNY college collects data on streams each year. Students\nrecord a number of biological, chemical, and physical variables,\nincluding the stream name, the substrate of the stream\n(limestone (L), shale (S), or mixed (M)), the pH, the temperature\n(\u001dC), and the BCI, a measure of biological diversity.", + "description": "As part of the course work, a class at an upstate\nNY college collects data on streams each year. Students\nrecord a number of biological, chemical, and physical variables,\nincluding the stream name, the substrate of the stream\n(limestone (L), shale (S), or mixed (M)), the pH, the temperature\n(°C), and the BCI, a measure of biological diversity.", "url": "https://dasl.datadescription.com/download/data/3463", "filename": "Streams", "name": "Streams", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Waters" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3074", "filename": "Bird-Species-2013", "name": "Bird-Species-2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ranges inhabited by the Indian gharial\ncrocodile and the Australian saltwater crocodile overlap in\nBangladesh. Suppose a very large crocodile skeleton is found\nthere, and we wish to determine the species of the animal.\nWildlife scientists have measured the lengths of the heads\nand the complete bodies of several crocs (in centimeters) of\neach species.\n", "url": "https://dasl.datadescription.com/download/data/3147", "filename": "Crocodile-lengths", "name": "Crocodile lengths", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In 2004, a team of researchers published a study of contaminants in farmed salmon. Fish from many sources were analyzed for 14 organic contaminants. The study\nexpressed concerns about the level of contaminants found. One of those was the\ninsecticide mirex, which has been shown to be carcinogenic and is suspected to be\ntoxic to the liver, kidneys, and endocrine system. The dataset holds 153 observed salmon samples and reports concentrations of a number of contaminant.", "url": "https://dasl.datadescription.com/download/data/3199", "filename": "Farmed-salmon", "name": "Farmed salmon", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Wildlife researchers monitor many wildlife populations by taking aerial photographs. Can they estimate the weights of alligators accurately from the air? Here are data on the Weight of alligators (in pounds) and their Length (in inches). ", + "description": "Wildlife researchers monitor many wildlife populations by taking aerial photographs. Can they estimate the weights of alligators accurately from the air? Here are data on the Weight of alligators (in pounds) and their Length (in inches).", "url": "https://dasl.datadescription.com/download/data/3236", "filename": "Gators", "name": "Gators", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Maine lobster fishing industry is carefully controlled and licensed, and facts about it have been recorded for more than a century, so it is an important industry that we can examine in detail. The dataset holds annual data ", "url": "https://dasl.datadescription.com/download/data/3317", "filename": "Lobsters-2016", "name": "Lobsters 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Manatees are gentle mammals that live in the waters off the coast of Florida and a few other places. Unfortunately, many are killed each year in collisions with powerboats. Marine biologists warn that the growing number of powerboats registered in Florida threatens the existence of manatees. The data here are the number of manatees killed each year since 1982 and the number of powerboats registered in Florida (in thousands) for those years. Is there a relationship?", "url": "https://dasl.datadescription.com/download/data/3325", "filename": "Manatees-2015", "name": "Manatees 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Psychology experiments sometimes involve testing the\nability of rats to navigate mazes. The mazes are classified\naccording to difficulty, as measured by the mean length of\ntime it takes rats to find the food at the end. One researcher\nneeded a maze that will take rats an average of about one minute\nto solve. He tested one maze on several rats, collecting the\ndata provided.", "url": "https://dasl.datadescription.com/download/data/3333", "filename": "Maze", "name": "Maze", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Can pleasant smells improve learning? Researchers timed 21 subjects as they tried to complete paper-and-pencil mazes. Each subject attempted a maze both with and without the presence of a floral aroma. Subjects were randomized with respect to whether they did the scented trial first or second. Is there any evidence that the floral scent improved the subjects’ ability to complete the mazes?", + "description": "Can pleasant smells improve learning? Researchers timed 21 subjects as they tried to complete paper-and-pencil mazes. Each subject attempted a maze both with and without the presence of a floral aroma. Subjects were randomized with respect to whether they did the scented trial first or second. Is there any evidence that the floral scent improved the subjects' ability to complete the mazes?", "url": "https://dasl.datadescription.com/download/data/3334", "filename": "Mazes-smells", "name": "Mazes and smells", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Emperor penguins are the most accomplished divers among birds, making routine\ndives of 5–12 minutes, with the longest recorded dive over 27 minutes. These\nbirds can also dive to depths of over 500 meters! Since air-breathing animals like\npenguins must hold their breath while submerged, the duration of any given dive\ndepends on how much oxygen is in the bird’s body at the beginning of the dive, how\nquickly that oxygen gets used, and the lowest level of oxygen the bird can tolerate.\nThe rate of oxygen depletion is primarily determined by the penguin’s heart rate.\nConsequently, studies of heart rates during dives can help us understand how these\nanimals regulate their oxygen consumption in order to make such impressive dives.The researchers equipped emperor penguins with devices that record their heart rates during\ndives. The dataset reports Dive Heart Rate (beats per minute), the Duration\n(minutes) of dives, and other related variables.", + "description": "Emperor penguins are the most accomplished divers among birds, making routine\ndives of 5-12 minutes, with the longest recorded dive over 27 minutes. These\nbirds can also dive to depths of over 500 meters! Since air-breathing animals like\npenguins must hold their breath while submerged, the duration of any given dive\ndepends on how much oxygen is in the bird's body at the beginning of the dive, how\nquickly that oxygen gets used, and the lowest level of oxygen the bird can tolerate.\nThe rate of oxygen depletion is primarily determined by the penguin's heart rate.\nConsequently, studies of heart rates during dives can help us understand how these\nanimals regulate their oxygen consumption in order to make such impressive dives.The researchers equipped emperor penguins with devices that record their heart rates during\ndives. The dataset reports Dive Heart Rate (beats per minute), the Duration\n(minutes) of dives, and other related variables.", "url": "https://dasl.datadescription.com/download/data/3391", "filename": "Penguins", "name": "Penguins", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Salmon", "url": "https://dasl.datadescription.com/download/data/3435", "filename": "Salmon", "name": "Salmon", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The number of storks in Oldenburg, Germany, plotted against the population of the town for 7 years in the 1930s. Do storks bring babies? ", "url": "https://dasl.datadescription.com/download/data/3462", "filename": "Storks", "name": "Storks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Large herds of wild horses can become a problem on some federal lands in the West. Researchers hoping to improve the management of these herds collected data to see if they could predict the number of foals that would be born based on the size of the current herd. ", + "description": "Large herds of wild horses can become a problem on some federal lands in the West. Researchers hoping to improve the management of these herds collected data to see if they could predict the number of foals that would be born based on the size of the current herd.", "url": "https://dasl.datadescription.com/download/data/3524", "filename": "Wild-horses", "name": "Wild horses", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Animals" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A biology student studied the effect of10 different fertilizers on the growth of mung bean sprouts. She sprouts 12 beans in each of 10 different petri dishes, and adds the same amount of fertilizer to each dish. After one week she measures the heights of the 120 sprouts in millimeters. ", + "description": "A biology student studied the effect of10 different fertilizers on the growth of mung bean sprouts. She sprouts 12 beans in each of 10 different petri dishes, and adds the same amount of fertilizer to each dish. After one week she measures the heights of the 120 sprouts in millimeters.", "url": "https://dasl.datadescription.com/download/data/3203", "filename": "Fertilizers", "name": "Fertilizers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1936 Sir Ronald Fisher presented data on irises as the example in a famous statistics paper. Ever since, “Fisher’s Iris data” have been a feature of statistics texts. Fisher presents 4 measurements of Iris flowers of three species. Can we differentiate the species? If so, how best to do that?", + "description": "In 1936 Sir Ronald Fisher presented data on irises as the example in a famous statistics paper. Ever since, \"Fisher's Iris data\" have been a feature of statistics texts. Fisher presents 4 measurements of Iris flowers of three species. Can we differentiate the species? If so, how best to do that?", "url": "https://dasl.datadescription.com/download/data/3206", "filename": "Fisher-Irises", - "name": "Fisher’s Irises", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, + "name": "Fisher's Irises", "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Hopkins Memorial Forest is a 2500-acre reserve in Massachusetts, New York, and Vermont managed by the Williams College Center for Environmental Studies (CES). As part of its mission, the CES monitors forest resources and conditions over the long term. ", + "description": "The Hopkins Memorial Forest is a 2500-acre reserve in Massachusetts, New York, and Vermont managed by the Williams College Center for Environmental Studies (CES). As part of its mission, the CES monitors forest resources and conditions over the long term.", "url": "https://dasl.datadescription.com/download/data/3271", "filename": "Hopkins-Forest", "name": "Hopkins Forest", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "One can determine how old a tree is by counting its rings, but that requires either cutting the tree down or extracting a sample from the tree’s core. Can we estimate the tree’s age simply from its diameter?A forester measured 27 trees of the same species that had been cut down, and counted the rings to determine the ages of the trees. ", + "description": "One can determine how old a tree is by counting its rings, but that requires either cutting the tree down or extracting a sample from the tree's core. Can we estimate the tree's age simply from its diameter?A forester measured 27 trees of the same species that had been cut down, and counted the rings to determine the ages of the trees.", "url": "https://dasl.datadescription.com/download/data/3277", "filename": "old-tree", "name": "How old is that Tree", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "As the number of oranges on a tree increases, the fruit tends to get smaller. The dataset gives numbers of oranges/tree and average weight/orange (in pounds).", "url": "https://dasl.datadescription.com/download/data/3385", "filename": "Oranges", "name": "Oranges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment on mung beans was performed\nto investigate the environmental effects of salinity and\nwater temperature on sprouting. Forty beans were randomly\nallocated to each of 36 petri dishes that were subject\nto one of four levels of Salinity (0, 4, 8, and 12 ppm)\nand one of three Temperatures (32°, 34°, or 36° C). After\n48 hours, the biomass of the sprouts in gm was measured. The percent of beans germinating is also recorded.", "url": "https://dasl.datadescription.com/download/data/3458", "filename": "Sprouts", "name": "Sprouts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tree growth", "url": "https://dasl.datadescription.com/download/data/3497", "filename": "Tree-growth", "name": "Tree growth", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Vineyards", "url": "https://dasl.datadescription.com/download/data/3513", "filename": "Vineyards", "name": "Vineyards", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Plants" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Meteor Crater in Arizona was the first recognized impact crater and was identified as such only in the 1920s. With the help of satellite images, more and more craters have been identified; now more than 180 are known. These, of course, are only a small sample of all the impacts the earth has experienced: Only 29% of earth’s surface is land, and many craters have been covered or eroded away. Astronomers have recog-nized a roughly 35 million-year cycle in the frequency of cratering, although the cause of this cycle is not fully understood.\nThe data hold information about craters. craters from the most recent 35Ma (million years) may be the more reliable data, and are suitable for analyses relating age and diameter.", + "description": "Meteor Crater in Arizona was the first recognized impact crater and was identified as such only in the 1920s. With the help of satellite images, more and more craters have been identified; now more than 180 are known. These, of course, are only a small sample of all the impacts the earth has experienced: Only 29% of earth's surface is land, and many craters have been covered or eroded away. Astronomers have recog-nized a roughly 35 million-year cycle in the frequency of cratering, although the cause of this cycle is not fully understood.\nThe data hold information about craters. craters from the most recent 35Ma (million years) may be the more reliable data, and are suitable for analyses relating age and diameter.", "url": "https://dasl.datadescription.com/download/data/3142", "filename": "Craters", "name": "Craters", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Geology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3094", "filename": "Carbon-footprint", "name": "Carbon footprint", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3095", "filename": "Carbon-footprint-2015", "name": "Carbon footprint 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3240", "filename": "Gemstones", "name": "Gemstones", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "It is a common belief that Yellowstone’s most famous geyser erupts once an hour at very predictable intervals. But, in fact, the intervals between eruptions can vary greatly. Can we predict the interval from, for example, the duration of the previous eruption? Are there other patterns in the data worth noting? ", + "description": "It is a common belief that Yellowstone's most famous geyser erupts once an hour at very predictable intervals. But, in fact, the intervals between eruptions can vary greatly. Can we predict the interval from, for example, the duration of the previous eruption? Are there other patterns in the data worth noting? ", "url": "https://dasl.datadescription.com/download/data/3380", "filename": "Old-Faithful", "name": "Old Faithful", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ozone levels (in parts per billion, ppb) were recorded at sites in New Jersey monthly between 1926 and 1971. Here are boxplots of the data for each month (over the 46 years), lined up in order (January = 1): ", "url": "https://dasl.datadescription.com/download/data/3386", "filename": "Ozone", "name": "Ozone", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The National Interagency Fire Center reports statistics about wildfires. They report data from 1960, but the years 1960-1984 are so different from subsequent years that they can’t be analyzed together. These data are for 1985-2015. Is there a pattern over time? What is the relationship between the number of fires and the acres affected? Are fires getting larger or smaller on average?", + "description": "The National Interagency Fire Center reports statistics about wildfires. They report data from 1960, but the years 1960-1984 are so different from subsequent years that they can't be analyzed together. These data are for 1985-2015. Is there a pattern over time? What is the relationship between the number of fires and the acres affected? Are fires getting larger or smaller on average?", "url": "https://dasl.datadescription.com/download/data/3523", "filename": "Wildfires-2015", "name": "Wildfires 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" } ] }, { "name": "Statistics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "To shorten the time it takes him to make his favorite pizza, a student designed an experiment to test the effect of sugar and milk on the activation times for baking yeast. Specifically, he tested four different recipes and measured how many seconds it took for the same amount of dough to rise to the […] ", + "description": "To shorten the time it takes him to make his favorite pizza, a student designed an experiment to test the effect of sugar and milk on the activation times for baking yeast. Specifically, he tested four different recipes and measured how many seconds it took for the same amount of dough to rise to the top of a bowl. He randomized the order of the recipes and replicated each treatment 4 times.", "url": "https://dasl.datadescription.com/download/data/3042", "filename": "activating-baking-yeast", "name": "Activating baking yeast", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […] ", + "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency government assistance.Between 2007 and 2009 AIG stock lost more than 99% of its value, hitting $0.35 in early March. Could the crash have been predicted?", "url": "https://dasl.datadescription.com/download/data/3046", "filename": "AIG-daily", "name": "AIG daily", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […]", + "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency [\u2026]", "url": "https://dasl.datadescription.com/download/data/3047", "filename": "AIG-monthly", "name": "AIG monthly", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A sample of model 2011 cars from an online information service colleted to see how fuel efficiency (as highway mpg) relates to the cost (MSRP) ", "url": "https://dasl.datadescription.com/download/data/3050", "filename": "All-the-efficiency", "name": "All the efficiency", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The price of delicious apples and regular gas are components of the Consumer Price Index. The data give those prices monthly for the year 2006 ", "url": "https://dasl.datadescription.com/download/data/3055", "filename": "Apples-and-gas", "name": "Apples and gas", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "You have decided to invest in a bond fund and plan to limit your choice of funds to Morningstar “medalist” funds. But now you must choose between a taxable fund and a municipal bond fund that is at least partially tax-free. Which is better? Here are the % returns for the three-year period leading up", + "description": "You have decided to invest in a bond fund and plan to limit your choice of funds to Morningstar \"medalist\" funds. But now you must choose between a taxable fund and a municipal bond fund that is at least partially tax-free. Which is better? Here are the % returns for the three-year period leading up to spring of 2013.", "url": "https://dasl.datadescription.com/download/data/3080", "filename": "Bond-funds", "name": "Bond funds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Number of sales people working in a bookstore and sales (in $1000) that day. These are realistic but invented data. ", + "description": "Number of sales people working in a bookstore and sales (in $1000) that day. These are realistic but invented data.", "url": "https://dasl.datadescription.com/download/data/3081", "filename": "Bookstore-sales", "name": "Bookstore sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In 2015, the website NewGeography.com listed its ranking of the best cities for job growth in the United States. Nonfarm employment is also provided", "url": "https://dasl.datadescription.com/download/data/3082", "filename": "Boomtowns-2015", "name": "Boomtowns 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Home prices in two neighborhoods near San Francisco. Palo Alto is an older neighborhood and Foster City, a newer one. How do prices compare?", "url": "https://dasl.datadescription.com/download/data/3104", "filename": "CA-House-Prices", "name": "CA House Prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3097", "filename": "Car-discounts", "name": "Car discounts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3098", "filename": "Car-origins", "name": "Car origins", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The S&P/Case-Shiller Home Price Indices track changes in the value of residential real estate nationally and in 20 metropolitan regions. (Some of these indices are actually traded on the Chicago Mercantile Exchange.) The data set Case-Shiller by City gives the monthly index values for each of the 20 cities tracked by the Case-Shiller index and […] ", + "description": "The S&P/Case-Shiller Home Price Indices track changes in the value of residential real estate nationally and in 20 metropolitan regions. (Some of these indices are actually traded on the Chicago Mercantile Exchange.) The data set Case-Shiller by City gives the monthly index values for each of the 20 cities tracked by the Case-Shiller index and [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3102", "filename": "Case-Shiller-by-city", "name": "Case-Shiller by city", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Beginning in 2017, public companies will be required to disclose the ratio of CEO pay to median worker pay. The Glassdoor Economic Research Blog has published the data for 2014. The data includes CEO identities, companies, CEO compensation, median worker compensation (compiled by Glassdoor), and the ratio of CEO to worker compensation.", "url": "https://dasl.datadescription.com/download/data/3105", "filename": "CEO-Compensation-2014", "name": "CEO Compensation 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3106", "filename": "CEO-Salary-2012", "name": "CEO Salary 2012", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Coffee is the world’s second largest\nlegal export commodity (after oil) and is the second largest\nsource of foreign exchange for developing nations. The\nUnited States consumes about one-fifth of the world’s coffee.\nThe International Coffee Organization (ICO) computes\na coffee price index using Colombian, Brazilian, and\na mixture of other coffee data. Data are provided for the\nmonthly average ICO price index (in $US) from Jan 2009 to December 2017c", + "description": "Coffee is the world's second largest\nlegal export commodity (after oil) and is the second largest\nsource of foreign exchange for developing nations. The\nUnited States consumes about one-fifth of the world's coffee.\nThe International Coffee Organization (ICO) computes\na coffee price index using Colombian, Brazilian, and\na mixture of other coffee data. Data are provided for the\nmonthly average ICO price index (in $US) from Jan 2009 to December 2017c", "url": "https://dasl.datadescription.com/download/data/3119", "filename": "Coffee-prices-2017", "name": "Coffee-prices-2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The cost of a variety of common items in 576 cities around the world in $, adjusted so that New York, U.S.A. is 100.", "url": "https://dasl.datadescription.com/download/data/3120", "filename": "COLall-2016", "name": "COLall 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Facts about companies selected from the Forbes 500 list for 1986. This is a 1/10 systematic sample from the alphabetical list of companies. The Forbes 500 includes all companies in the top 500 on any of the criteria, and thus has almost 800 companies in the list.", "url": "https://dasl.datadescription.com/download/data/3125", "filename": "Companies", "name": "Companies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Facts about companies selected from the Forbes 500 list for 2000", "url": "https://dasl.datadescription.com/download/data/3595", "filename": "Companies-Quickstart", "name": "Companies Quickstart", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3129", "filename": "Consumer-spending", "name": "Consumer spending", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3130", "filename": "Consumer-spending-post-holiday", "name": "Consumer spending post holiday", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Numbeo.com lists the cost of living (COL) for 576 cities around the world. They report the typical cost of a number of staples. The cost of living is made up of many components. These data report a variety of everyday costs. How are they related? Can an overall cost of living be constructed from them?", "url": "https://dasl.datadescription.com/download/data/3132", "filename": "Cost-of-living-2016", "name": "Cost of living 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3135", "filename": "Cost-of-Living-2017", "name": "Cost of Living 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Cost of Living Index (Excl. Rent) is a relative indicator of consumer goods prices, including groceries, restaurants, transportation and utilities. Cost of Living Index doesn’t include accommodation expenses such as rent or mortgage. If a city has a Cost of Living Index of 120, it means Numbeo estimates it is 20% more expensive than New York (excluding rent).\nRent Index is an estimation of prices of renting apartments in the city compared to New York City. If Rent index is 80, Numbeo estimates that price of rents in that city is on an average 20% less than the price in New York.\nGroceries Index is an estimation of grocery prices in the city compared to New York City. To calculate this section, Numbeo uses weights of items in the “Markets” section for each city.\nRestaurants Index is a comparison of prices of meals and drinks in restaurants and bars compared to NYC.\nCost of Living Plus Rent Index is an estimation of consumer goods prices including rent comparing to New York City.\nLocal Purchasing Power shows relative purchasing power in buying goods and services in a given city for the average wage in that city. If domestic purchasing power is 40, this means that the inhabitants of that city with the average salary can afford to buy on an average 60% less goods and services than New York City residents with an average salary..", + "description": "Cost of Living Index (Excl. Rent) is a relative indicator of consumer goods prices, including groceries, restaurants, transportation and utilities. Cost of Living Index doesn't include accommodation expenses such as rent or mortgage. If a city has a Cost of Living Index of 120, it means Numbeo estimates it is 20% more expensive than New York (excluding rent).\nRent Index is an estimation of prices of renting apartments in the city compared to New York City. If Rent index is 80, Numbeo estimates that price of rents in that city is on an average 20% less than the price in New York.\nGroceries Index is an estimation of grocery prices in the city compared to New York City. To calculate this section, Numbeo uses weights of items in the \"Markets\" section for each city.\nRestaurants Index is a comparison of prices of meals and drinks in restaurants and bars compared to NYC.\nCost of Living Plus Rent Index is an estimation of consumer goods prices including rent comparing to New York City.\nLocal Purchasing Power shows relative purchasing power in buying goods and services in a given city for the average wage in that city. If domestic purchasing power is 40, this means that the inhabitants of that city with the average salary can afford to buy on an average 60% less goods and services than New York City residents with an average salary..", "url": "https://dasl.datadescription.com/download/data/3136", "filename": "Cost-of-living-2018", "name": "Cost of living 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Consumer Price Index (CPI) summarizes the cost of a representative market basket\nof goods that includes groceries, restaurants, transportation, utilities, and medical\ncare. Global companies often use the CPI to determine living allowances and salaries\nfor employees. Inflation is often measured by how much the CPI changes from year to\nyear. Relative CPIs can be found for different cities. We have data giving CPI components\nrelative to New York City. For New York City, each index is 100(%).", "url": "https://dasl.datadescription.com/download/data/3139", "filename": "CPI-Worldwide-2016", "name": "CPI Worldwide 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A credit card company wants to see how much customers in a particular segment of\ntheir market use their credit card. They have provided data on the amount\nspent by 500 selected customers during a 3-month period and have asked you to\nsummarize the expenditures. (Data are realistic, but disguised for confidentiality.)", "url": "https://dasl.datadescription.com/download/data/3146", "filename": "Credit-card-charges", "name": "Credit card charges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Peninsula Creameries sells both cottage cheese and ice cream. The CEO recently noticed that in months when the company sells more cottage cheese, it seems to sell more ice cream as well.", "url": "https://dasl.datadescription.com/download/data/3152", "filename": "Dairy-sales", "name": "Dairy sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on raw diamonds from the internet. Price of a diamond depends on its Carat weight, color, clarity, and cut. The data are for 2690 diamonds of a variety of weights, colors, clarity, and cut. What predicts the price? Do the variables need to be reexpressed?", "url": "https://dasl.datadescription.com/download/data/3161", "filename": "Diamonds_", "name": "Diamonds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Dow Jones stock index measures the performance of the stocks of America’s largest companies. A regression of the Dow prices on years 1972–2015 appears to be successful, but the residuals raise some questions.", + "description": "The Dow Jones stock index measures the performance of the stocks of America's largest companies. A regression of the Dow prices on years 1972-2015 appears to be successful, but the residuals raise some questions.", "url": "https://dasl.datadescription.com/download/data/3176", "filename": "Dow-Jones-2015", "name": "Dow Jones 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly e-commerce retail sales (in millions of dollars) in the United States from 1999 to 2008 ", "url": "https://dasl.datadescription.com/download/data/3180", "filename": "E-commerce", "name": "E-commerce", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "When implementing a packaged\nEnterprise Resource Planning (ERP) system, many companies\nreport that the module they first install is Financial\nAccounting. Among the measures used to gauge the\neffectiveness of their ERP system implementation is acceleration\nof the financial close process. The data hold a sample of\n8 companies that report their average time (in weeks) to\nfinancial close before and after the implementation of their\nERP system.", "url": "https://dasl.datadescription.com/download/data/3191", "filename": "ERP-Effectiveness", "name": "ERP Effectiveness", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sales (in $) for one week were collected for 18 stores in a food store chain in the northeastern United States. The stores and the towns they are located in vary in size.", "url": "https://dasl.datadescription.com/download/data/3213", "filename": "Food-sales", "name": "Food sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The U.S. government provides fuel economy (in miles per gallon) and other information about late model cars sold in the US. How would you model the relationship between fuel economy and engine displacement (in liters)? Are there any cars that don’t fit the model? Can you explain why? ", + "description": "The U.S. government provides fuel economy (in miles per gallon) and other information about late model cars sold in the US. How would you model the relationship between fuel economy and engine displacement (in liters)? Are there any cars that don't fit the model? Can you explain why? ", "url": "https://dasl.datadescription.com/download/data/3225", "filename": "Fueleconomy-2016", "name": "Fuel economy 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Weekly gas prices for regular gas in the United States as reported by the U.S. Energy Information Administration for 2009 through August 2016 ", "url": "https://dasl.datadescription.com/download/data/3232", "filename": "Gas-prices-2016", "name": "Gas prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3233", "filename": "Gas-Prices-2017", "name": "Gas Prices 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly gas prices for all grades and all formulations ($/gallon) in the United States as reported by the U.S. Energy Information Administration for 1993 through August 2018. Prices are available at the cite for all weeks. Data here are for the final week of each month.", "url": "https://dasl.datadescription.com/download/data/3234", "filename": "Gas-prices-2018", "name": "Gas prices 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don’t let the drivers know about this experiment.", + "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don't let the drivers know about this experiment.", "url": "https://dasl.datadescription.com/download/data/3235", "filename": "Gas-prices-monthly", "name": "Gas prices monthly", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3238", "filename": "GDP-state", "name": "GDP by state", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3239", "filename": "GDP-growth-2017", "name": "GDP growth 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily opening and closing stock prices (adjusted for splits and dividends) for Google, Inc. from Aug 19, 2004 through June 21, 2013 ", "url": "https://dasl.datadescription.com/download/data/3247", "filename": "Google-stock-prices", "name": "Google stock prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A graphite manufacturer makes long\nrolls of flexible graphite to be used to seal components in\ncombustion engines. The specifications state that the mean\nstrength should be 21.2 ounces per square yard with a\nstandard deviation of 0.29. Further specifications state that\nno roll should have strength less than 20.2 or more than\n22.2 ounces per square yard. If there is a defect in terms\nof the strength of the graphite rolls, the seal will not hold.\nAfter the roll is created, a beta scanner takes readings of\nthe basis weight in ounces per square yard. The data is\nseparated into 10 lanes with 20 scans in each lane. A sample\nconsists of one roll from each lane. The results from 20\nsamples follow are in the data.", "url": "https://dasl.datadescription.com/download/data/3250", "filename": "Graphite-production", "name": "Graphite production", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "WinCo Foods, a large discount grocery\nretailer in the western United States, promotes itself as the lowest priced grocery retailer. In newspaper ads WinCo Foods published a price comparison for products between WinCo and several competing grocery retailers. One of the retailers compared against WinCo was Walmart, also known as a low price competitor. WinCo selected a variety of products, listed the price of the product charges at each retailer, and showed the sales receipt to prove the prices at WinCo were the lowest in the area. A sample of the products and their price comparison at both WinCo and Walmart are given.", "url": "https://dasl.datadescription.com/download/data/3251", "filename": "Grocery-prices", "name": "Grocery prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Health expenditures", "url": "https://dasl.datadescription.com/download/data/3260", "filename": "Health-expenditures", "name": "Health expenditures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. ", + "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful.", "url": "https://dasl.datadescription.com/download/data/3266", "filename": "Historica-Oil-Prices-2016", "name": "Historical Oil Prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holiday shopping", "url": "https://dasl.datadescription.com/download/data/3267", "filename": "Holiday-shopping", "name": "Holiday shopping", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holiday spending", "url": "https://dasl.datadescription.com/download/data/3268", "filename": "Holiday-spending", "name": "Holiday spending", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Home depot sales", "url": "https://dasl.datadescription.com/download/data/3269", "filename": "Home-depot-sales", "name": "Home depot sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Home Price Index 2017", "url": "https://dasl.datadescription.com/download/data/3270", "filename": "Home-Price-Index-2017", "name": "Home Price Index 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", "url": "https://dasl.datadescription.com/download/data/3275", "filename": "Housing-prices", "name": "Housing prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", "url": "https://dasl.datadescription.com/download/data/3276", "filename": "Housing-prices-GE19", "name": "Housing prices GE19", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "How are housing costs related to median family income?", "url": "https://dasl.datadescription.com/download/data/3283", "filename": "Income-housing", "name": "Income and housing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Income vs Hours 2013", "url": "https://dasl.datadescription.com/download/data/3286", "filename": "Income-vs-Hours-2013", "name": "Income vs Hours 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The U.S. Consumer Price Index and year, every 5 years since 1916. These are the values for January of each year. What is the trend? Can we model it with a linear regression? ", "url": "https://dasl.datadescription.com/download/data/3291", "filename": "Inflation-2016", "name": "Inflation 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Average annual interest rates (banks prime lending) in the United States from 1966 through 2009 ", "url": "https://dasl.datadescription.com/download/data/3296", "filename": "Interest-rates-2009", "name": "Interest rates 2009", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "he amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", + "description": "he amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year.", "url": "https://dasl.datadescription.com/download/data/3297", "filename": "Interest-mortgage", "name": "Interest rates and mortgages", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", + "description": "The amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year.", "url": "https://dasl.datadescription.com/download/data/3298", "filename": "Interest-mortgage-2015", "name": "Interest rates and mortgages 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This example is based on 1998 case study written by J. Hunt, E. Landry, and J. Rao as part of the Babson College case series. The data and setting used in this example are based on the actual case study, but the data have been modified and the conclusions are fictitious.", "url": "https://dasl.datadescription.com/download/data/3308", "filename": "Komtek-Technologies", "name": "Komtek Technologies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Real estate agents want to set correctly\nthe price of a house that’s about to go on the real estate\nmarket. They must choose a price that strikes a balance\nbetween one that is so high that the house takes too long\nto sell and one that’s so low that not enough value will go\nto the homeowner. One appraisal method is the “Comparative\nMarket Analysis” approach by which the market\nvalue of a house is based on recent sales of similar homes\nin the neighborhood. Because no two houses are exactly\nthe same, appraisers have to adjust comparable homes for\nsuch features as extra square footage, bedrooms, fireplaces,\nupgrading, parking facilities, swimming pool, lot size, location,\nand so on. The appraised market values and the selling\nprices of 45 homes from the same region are given.", + "description": "Real estate agents want to set correctly\nthe price of a house that's about to go on the real estate\nmarket. They must choose a price that strikes a balance\nbetween one that is so high that the house takes too long\nto sell and one that's so low that not enough value will go\nto the homeowner. One appraisal method is the \"Comparative\nMarket Analysis\" approach by which the market\nvalue of a house is based on recent sales of similar homes\nin the neighborhood. Because no two houses are exactly\nthe same, appraisers have to adjust comparable homes for\nsuch features as extra square footage, bedrooms, fireplaces,\nupgrading, parking facilities, swimming pool, lot size, location,\nand so on. The appraised market values and the selling\nprices of 45 homes from the same region are given.", "url": "https://dasl.datadescription.com/download/data/3328", "filename": "Market-value", "name": "Market value", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Marketing managers salaries", "url": "https://dasl.datadescription.com/download/data/3327", "filename": "Marketing-managers-salaries", "name": "Marketing managers salaries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly median weekly earnings from the first quarter of 2003 through the first quarter of 2013 for men, 25 years of age or older, in the United States ", "url": "https://dasl.datadescription.com/download/data/3336", "filename": "Men-weekly-earnings-2013", - "name": "Men’s weekly earnings 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, + "name": "Men's weekly earnings 2013", "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Movie budgets", "url": "https://dasl.datadescription.com/download/data/3347", "filename": "Movie-budgets", "name": "Movie budgets", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Does money purchase a good movie? Is the US Gross revenue related to either the budge or the Rotten Tomatoes score? The dataset holds data on 609 recent releases that includes the USGross (in $M), the Budget ($M), the Run Time (minutes), and the score given by the critics on the Rotten Tomatoes website. ", + "description": "Does money purchase a good movie? Is the US Gross revenue related to either the budge or the Rotten Tomatoes score? The dataset holds data on 609 recent releases that includes the USGross (in $M), the Budget ($M), the Run Time (minutes), and the score given by the critics on the Rotten Tomatoes website.", "url": "https://dasl.datadescription.com/download/data/3349", "filename": "Movie-profits", "name": "Movie profits", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Mutual fund flows", "url": "https://dasl.datadescription.com/download/data/3354", "filename": "Mutual-fund-flows", "name": "Mutual fund flows", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "On December 30, 2016, the Standard and Poor’s (S&P) 500 index hit an all-time high. During 2016, the S&P returned 12.25%. Here is a histogram of the 2016 net returns (total return – annual expenses) for Money Magazine’s 50 Best Mutual Funds and ETFs. The net returns are computed from the data given by Money Magazine.", + "description": "On December 30, 2016, the Standard and Poor's (S&P) 500 index hit an all-time high. During 2016, the S&P returned 12.25%. Here is a histogram of the 2016 net returns (total return - annual expenses) for Money Magazine's 50 Best Mutual Funds and ETFs. The net returns are computed from the data given by Money Magazine.", "url": "https://dasl.datadescription.com/download/data/3353", "filename": "Mutual-funds-2016", "name": "Mutual funds 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study by the U.S. Small\nBusiness Administration used historical data to model the\nGDP per capita of 24 of the countries in the Organization\nfor Economic Cooperation and Development(OECD). The researchers hoped to show that more regulation leads to lower GDP/Capita. The multiple regression with all terms does have a significant P-value for Economic Regulation Index.\nHowever, Primary Education is not a significant predictor. If it is removed from the model, then OECD Regulation is no longer significant at .05. Was it added to the model just to judge the P-value of OECD regulation down to permit a publication that claimed an effect?\nCheck to see whether you think there is such an effect.", "url": "https://dasl.datadescription.com/download/data/3373", "filename": "OECD-economic-regulations", "name": "OECD economic regulations", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "OECD GDP", "url": "https://dasl.datadescription.com/download/data/3374", "filename": "OECD-GDP", "name": "OECD GDP", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "OECD GDP Growth", "url": "https://dasl.datadescription.com/download/data/3375", "filename": "OECD-GDP-Growth", "name": "OECD GDP Growth", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "OECD Unemployment", "url": "https://dasl.datadescription.com/download/data/3376", "filename": "OECD-Unemployment", "name": "OECD Unemployment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. The data include both the inflation-adjusted prices of a barrel of oil from 1968 to 2016 and two prediction models. ", + "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. The data include both the inflation-adjusted prices of a barrel of oil from 1968 to 2016 and two prediction models.", "url": "https://dasl.datadescription.com/download/data/3377", "filename": "Oil-prices-2016", "name": "Oil prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Online Shopping", "url": "https://dasl.datadescription.com/download/data/3384", "filename": "Online-Shopping", "name": "Online Shopping", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sales volume and price of a slice of plain pizza ($) in Baltimore, Dallas, Chicago, and Denver for 156 weeks. How are prices and sales volumes related? Are patterns the same across cities? ", "url": "https://dasl.datadescription.com/download/data/3395", "filename": "Pizza-prices", "name": "Pizza prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Poverty and Region 2015", "url": "https://dasl.datadescription.com/download/data/3403", "filename": "Poverty-and-Region-2015", "name": "Poverty and Region 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "UBS (one of the largest banks in the world) prepared\na report comparing prices, wages, and other economic conditions in cities around the world for it’s international clients. Some of the variables it measured in 73 cities are Cost of Living, Food Costs, Average Hourly Wage, average number of Working Hours per Year, average number of Vacation Days, hours of work (at the average wage) needed to buy an iPhone, minutes of work needed to buy a Big Mac, and Women’s Clothing Cost.", + "description": "UBS (one of the largest banks in the world) prepared\na report comparing prices, wages, and other economic conditions in cities around the world for it's international clients. Some of the variables it measured in 73 cities are Cost of Living, Food Costs, Average Hourly Wage, average number of Working Hours per Year, average number of Vacation Days, hours of work (at the average wage) needed to buy an iPhone, minutes of work needed to buy a Big Mac, and Women's Clothing Cost.", "url": "https://dasl.datadescription.com/download/data/3405", "filename": "Prices-Earnings", "name": "Prices and Earnings", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The owner of a small organic food\nstore was concerned about her sales of a specialty yogurt\nmanufactured in Greece. As a result of increasing fuel\ncosts, she recently had to increase its price. To help boost\nsales, she decided to place the product on a different shelf\n(near eye level for most consumers) and in a location near\nother popular international products. She kept track of\nsales (number of containers sold per week) for six months\nafter she made the change.", "url": "https://dasl.datadescription.com/download/data/3410", "filename": "Product-placement", "name": "Product placement", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A company is producing and marketing\nnew reading activities for elementary school children that\nit believes will improve reading comprehension scores. A\nresearcher randomly assigns third graders to an eight-week\nprogram in which some will use these activities and others\nwill experience traditional teaching methods. At the end of\nthe experiment, both groups take a reading comprehension\nexam. Do these results suggest that the new activities\nare better?", "url": "https://dasl.datadescription.com/download/data/3411", "filename": "Product-testing", "name": "Product testing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Productivity 2016", "url": "https://dasl.datadescription.com/download/data/3409", "filename": "Productivity-2016", "name": "Productivity 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "As a class project, students in a large Statistics class collected publicly available information on recent home sales in their hometowns. There are 894 properties. These are not a random sample, but they may be representative of home sales during a short period of time, nationwide. Among the variables available is an indication of whether the home was in an urban, suburban, or rural setting.", "url": "https://dasl.datadescription.com/download/data/3423", "filename": "Real-Estate", "name": "Real Estate", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Real estate sample 1200", "url": "https://dasl.datadescription.com/download/data/3423", "filename": "Real-estate-sample-1200", "name": "Real estate sample 1200", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Regular gas 2017", "url": "https://dasl.datadescription.com/download/data/3426", "filename": "Regular-gas-2017", "name": "Regular gas 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Retail trade index", "url": "https://dasl.datadescription.com/download/data/3427", "filename": "Retail-trade-index", "name": "Retail trade index", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A sample from Fortune 500 companies", "url": "https://dasl.datadescription.com/download/data/3434", "filename": "Sales-profits", "name": "Sales and profits", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", + "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear.", "url": "https://dasl.datadescription.com/download/data/3437", "filename": "Saratoga-house-prices", "name": "Saratoga house prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", + "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear.", "url": "https://dasl.datadescription.com/download/data/3436", "filename": "Saratoga-houses", "name": "Saratoga houses", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A group of Statistics students cut ads out of magazines. They were careful to find two ads for each of 10 similar items, one with a sexual image and one without. They arranged the ads in random order and had 39 subjects look at them for one minute. Then they asked the subjects to list as many of the products as they could remember. Their data are shown in the table. Is there evidence that the sexual images mattered?", "url": "https://dasl.datadescription.com/download/data/3444", "filename": "Sex-sells", "name": "Sex sells", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers studying how a car’s fuel efficiency (in Miles Per Gallon) varies with its Speed drove a compact car 200 miles at various speeds on a test track. Their data are shown in the table. ", + "description": "Researchers studying how a car's fuel efficiency (in Miles Per Gallon) varies with its Speed drove a compact car 200 miles at various speeds on a test track. Their data are shown in the table.", "url": "https://dasl.datadescription.com/download/data/3454", "filename": "Slower-is-cheaper", "name": "Slower is cheaper", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the federal rate on 3-month Treasury bills from 1950 to 1980 and Years Since 1950. ", + "description": "The data give the federal rate on 3-month Treasury bills from 1950 to 1980 and Years Since 1950.", "url": "https://dasl.datadescription.com/download/data/3477", "filename": "TBill-rates-2016", "name": "TBill rates 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tiffany was founded in 1837, when Charles Lewis Tiffany opened his first store in downtown Manhattan. Tiffany retails and distributes a selection of Tiffany & Co. brand jewelry at a range of prices. Today, more than 150 Tiffany & Co. stores sell to customers in U.S. and international markets.\nThe dataset holds quarterly sales data from 2005 through the middle of 2017. The data are suitable for time series modeling.", "url": "https://dasl.datadescription.com/download/data/3482", "filename": "Tiffany", "name": "Tiffany 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Time on market", "url": "https://dasl.datadescription.com/download/data/3483", "filename": "Time-on-market", "name": "Time on market", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Are people who use tobacco products more likely to consume alcohol? Here are data on household spending (in pounds) taken by the British government on 11 regions in Great Britain. Do tobacco and alcohol spending appear to be related? What questions do you have about these data? What conclusions can you draw? ", "url": "https://dasl.datadescription.com/download/data/3485", "filename": "Tobacco-and-alcohol", "name": "Tobacco and alcohol", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily closing stock prices for Toyota Motor Manufacturing from April 1, 2008, through June 21, 2013 ", "url": "https://dasl.datadescription.com/download/data/3491", "filename": "Toyota-stock-prices-2013", "name": "Toyota stock prices 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "US Unemployment rate from 1/1/2003 to 8/1/17. ", + "description": "US Unemployment rate from 1/1/2003 to 8/1/17.", "url": "https://dasl.datadescription.com/download/data/3507", "filename": "Unemployment-2017", "name": "Unemployment 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Kelly's Blue Book: https://www.kbb.com/cars-for-sale/ accessed on 31 Aug 2017 using zip code 94305 200 mile radius BMW M5", "url": "https://dasl.datadescription.com/download/data/3508", "filename": "Used-BMW", "name": "Used BMW M5 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "How does the age of a used car influence its price? This is a small enough data set to find a model with a calculator. ", + "description": "How does the age of a used car influence its price? This is a small enough data set to find a model with a calculator.", "url": "https://dasl.datadescription.com/download/data/3509", "filename": "Used-cars", "name": "Used cars 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The web site www.autotrader.com lists cars for sale. On January 22 2017,\nit listed 55 used Honda Civics for sale by owner. From those listings, we extracted the asking price ($), the mileage, and the model year (from which we computed the age of the car at the time the data were collected\nQuestions include how to best predict the price from mileage and age and whether any of the cars is a particularly good buy.\nOne care is a particularly old (1989) car that has relatively low mileage for such an old car. The seller claims it hasn’t been driven for several years. \nIt looks like Price might benefit from re-expression by logs.", + "description": "The web site www.autotrader.com lists cars for sale. On January 22 2017,\nit listed 55 used Honda Civics for sale by owner. From those listings, we extracted the asking price ($), the mileage, and the model year (from which we computed the age of the car at the time the data were collected\nQuestions include how to best predict the price from mileage and age and whether any of the cars is a particularly good buy.\nOne care is a particularly old (1989) car that has relatively low mileage for such an old car. The seller claims it hasn't been driven for several years. \nIt looks like Price might benefit from re-expression by logs.", "url": "https://dasl.datadescription.com/download/data/3510", "filename": "Used-Civics", "name": "Used Civics 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the gross domestic product (GDP) of the United States in trillions of 2009 dollars and time. ", + "description": "The data give the gross domestic product (GDP) of the United States in trillions of 2009 dollars and time.", "url": "https://dasl.datadescription.com/download/data/3511", "filename": "USGDP-2016", "name": "USGDP 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Walmart revenue", "url": "https://dasl.datadescription.com/download/data/3514", "filename": "Walmart-revenue", "name": "Walmart revenue", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Gallup Poll of 1015 U.S. adults on April 9 – 12, 2015. Respondents were classified as high income (over $75,000), middle income ($30k–$75k), or low income (less than $30k). Those polled were asked for their views on redistributing U.S. wealth by heavily taxing the rich. Counts are reconstructed from percentages published by Gallup. ", + "description": "Gallup Poll of 1015 U.S. adults on April 9 - 12, 2015. Respondents were classified as high income (over $75,000), middle income ($30k-$75k), or low income (less than $30k). Those polled were asked for their views on redistributing U.S. wealth by heavily taxing the rich. Counts are reconstructed from percentages published by Gallup.", "url": "https://dasl.datadescription.com/download/data/3518", "filename": "Wealth-Redistribution", "name": "Wealth Redistribution", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly sales of Whole Foods Markets from 1995 through 2016. Whole Foods was purchased by Amazon in 2017, so 2016 is the final complete year prior to the merger. The data show a strong seasonal component even though food sales should not be seasonal. ", + "description": "Quarterly sales of Whole Foods Markets from 1995 through 2016. Whole Foods was purchased by Amazon in 2017, so 2016 is the final complete year prior to the merger. The data show a strong seasonal component even though food sales should not be seasonal.", "url": "https://dasl.datadescription.com/download/data/3522", "filename": "Whole-Foods", "name": "Whole Foods 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Wine production", "url": "https://dasl.datadescription.com/download/data/3529", "filename": "Wine-production", "name": "Wine production", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly median weekly earnings for U.S. women 25 years of age or older. Data are provided from the first quarter of 2003 through the first quarter of 2013. ", + "description": "Quarterly median weekly earnings for U.S. women 25 years of age or older. Data are provided from the first quarter of 2003 through the first quarter of 2013.", "url": "https://dasl.datadescription.com/download/data/3535", "filename": "Women-earnings", - "name": "Women’s weekly earnings 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, + "name": "Women's weekly earnings 2013", "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Youth Unemployment 2016", "url": "https://dasl.datadescription.com/download/data/3546", "filename": "Youth-Unemployment-2016", "name": "Youth Unemployment 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Economics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": " Alex Rodriguez (known to fans as A-Rod)was the youngest player ever to hit 500 home runs. The file holds the number of home runs hit by A-Rod during the 1994–2016 seasons. Describe the distribution, mentioning its shape and any unusual features. ", + "description": " Alex Rodriguez (known to fans as A-Rod)was the youngest player ever to hit 500 home runs. The file holds the number of home runs hit by A-Rod during the 1994-2016 seasons. Describe the distribution, mentioning its shape and any unusual features.", "url": "https://dasl.datadescription.com/download/data/3038", "filename": "a-rod-2016", "name": "A-Rod 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In Olympic Archery both men and women start with a field of 64 qualifiers. Each archer shoots a round of 72 arrows (total possible score: 720) to establish a seeding position. Then they participate in a single-elimination contest. Thus, the seeding round is the only one that provides data for all archers (because some are […] ", + "description": "In Olympic Archery both men and women start with a field of 64 qualifiers. Each archer shoots a round of 72 arrows (total possible score: 720) to establish a seeding position. Then they participate in a single-elimination contest. Thus, the seeding round is the only one that provides data for all archers (because some are [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3056", "filename": "Archery", "name": "Archery", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "American League baseball games are played under the designated hitter rule, meaning that pitchers, often weak hitters, do not come to bat. Baseball owners believe that the designated hitter rule means more runs scored, which in turn means higher attendance. Is there evidence that more fans attend games if the teams score more runs? The […] ", + "description": "American League baseball games are played under the designated hitter rule, meaning that pitchers, often weak hitters, do not come to bat. Baseball owners believe that the designated hitter rule means more runs scored, which in turn means higher attendance. Is there evidence that more fans attend games if the teams score more runs? The [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3057", "filename": "Attendance-2016", "name": "Attendance 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "It has been suggested that children born in the summer have an advantage over their peers when it comes to sports, perhaps because they can be outdoors when they are young. The data report the number of professional ballplayers born in each month of the year for one season of professional baseball. ", + "description": "It has been suggested that children born in the summer have an advantage over their peers when it comes to sports, perhaps because they can be outdoors when they are young. The data report the number of professional ballplayers born in each month of the year for one season of professional baseball.", "url": "https://dasl.datadescription.com/download/data/3060", "filename": "Ballplayer-births", "name": "Ballplayer births", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3063", "filename": "Baseball-attendance", "name": "Baseball attendance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3064", "filename": "Baseball-circumferences", "name": "Baseball circumferences", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Ballplayers have been signing ever larger contracts. The highest salaries (in millions of dollars per season) for each year since 1874 are in the data file. ", + "description": "Ballplayers have been signing ever larger contracts. The highest salaries (in millions of dollars per season) for each year since 1874 are in the data file.", "url": "https://dasl.datadescription.com/download/data/3065", "filename": "Baseball-salaries-2015", "name": "Baseball salaries 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3066", "filename": "Baseball-salaries-2016", "name": "Baseball salaries 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3067", "filename": "Baseball-weights", "name": "Baseball weights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3069", "filename": "Basketball-shots", "name": "Basketball shots", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A company that makes basketballs has the motto: “Our basketballs are ready to play.” Therefore, it is important to the company that the basketballs are inflated with the proper amount of air when shipped. Most basketballs are inflated to 7 to 9 pounds per square inch. Recently the company selected a random basketball from its […] ", + "description": "A company that makes basketballs has the motto: \"Our basketballs are ready to play.\" Therefore, it is important to the company that the basketballs are inflated with the proper amount of air when shipped. Most basketballs are inflated to 7 to 9 pounds per square inch. Recently the company selected a random basketball from its [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3068", "filename": "Basketballs", "name": "Basketballs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Belmont Stakes is the last and longest of the three horse races that make up the Triple Crown. Curiously, in some of the Belmont races horses have run clockwise around the track, and in others they have run counterclockwise. Do the horses care? But note that the length of the race has also not […] ", + "description": "The Belmont Stakes is the last and longest of the three horse races that make up the Triple Crown. Curiously, in some of the Belmont races horses have run clockwise around the track, and in others they have run counterclockwise. Do the horses care? But note that the length of the race has also not [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3072", "filename": "Belmont-stakes-2015", "name": "Belmont stakes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3151", "filename": "Cyclists-2015", "name": "Cyclists 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3154", "filename": "Darts", "name": "Darts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In the National league all players take a turn at bat. But in the American league, a “designated hitter” usually bats for the pitcher, who is likely not to be a strong batter. The theory is that a designated hitter will lead to more hits, more runs, and a higher-scoring game. The data give the average runs per game and total home runs for major league baseball teams during the 2012 season. Is there a discernible difference between the leagues?", + "description": "In the National league all players take a turn at bat. But in the American league, a \"designated hitter\" usually bats for the pitcher, who is likely not to be a strong batter. The theory is that a designated hitter will lead to more hits, more runs, and a higher-scoring game. The data give the average runs per game and total home runs for major league baseball teams during the 2012 season. Is there a discernible difference between the leagues?", "url": "https://dasl.datadescription.com/download/data/3159", "filename": "Designated-hitter-2012", "name": "Designated hitter 2012", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Motorcycles designed to run off-road, often known as dirt bikes, are specialized\nvehicles. The dataset holds data on 114 many attributes of dirt bikes.\nSome cost as little as\n$1399, while others are substantially more expensive. One interest is in building a model to predict the price of a dirt bike from attributes of the bikes.", "url": "https://dasl.datadescription.com/download/data/3166", "filename": "Dirt-bikes", "name": "Dirt bikes 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A leading manufacturer of exercise\nequipment wanted to collect data on the effectiveness of\ntheir equipment. An August 2001 article in the journal\nMedicine and Science in Sports and Exercise compared how\nlong it would take men and women to burn 200 calories\nduring light or heavy workouts on various kinds of exercise\nequipment. The results summarized in the table are the average\ntimes for a group of physically active young men and\nwomen whose performances were measured on a representative\nsample of exercise equipment.", "url": "https://dasl.datadescription.com/download/data/3195", "filename": "Exercise-equipment", "name": "Exercise equipment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Football owners are constantly in competition for good players. The more wins, the more likely that the team will provide good business returns for the owners. The resources that each of the 32 teams has in the National Football League (NFL) vary, but the draft system is designed to counteract the advantages that wealthier teams may have.", "url": "https://dasl.datadescription.com/download/data/3214", "filename": "Football-salaries-2017", "name": "Football salaries 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student performed an experiment with three different grips to see what effect it might have on the distance of a backhanded Frisbee throw. She tried it with her normal grip, with one finger out, and with the Frisbee inverted. She measured in paces how far her throws went. ", + "description": "A student performed an experiment with three different grips to see what effect it might have on the distance of a backhanded Frisbee throw. She tried it with her normal grip, with one finger out, and with the Frisbee inverted. She measured in paces how far her throws went.", "url": "https://dasl.datadescription.com/download/data/3221", "filename": "Frisbee-throws", "name": "Frisbee throws", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Golf courses", "url": "https://dasl.datadescription.com/download/data/3245", "filename": "Golf-courses", "name": "Golf courses", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The average drive distance (in yards) for 199 professional golfers during a week on the men’s PGA tour in 2015.", + "description": "The average drive distance (in yards) for 199 professional golfers during a week on the men's PGA tour in 2015.", "url": "https://dasl.datadescription.com/download/data/3246", "filename": "Golf-drives-2015", "name": "Golf drives 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Golfers 2017", "url": "https://dasl.datadescription.com/download/data/3244", "filename": "Golfers-2017", "name": "Golfers 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", + "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he'd completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner's trophy, and Mulford's protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", "url": "https://dasl.datadescription.com/download/data/3288", "filename": "Indy-2016", "name": "Indy 500 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", + "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he'd completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner's trophy, and Mulford's protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", "url": "https://dasl.datadescription.com/download/data/3289", "filename": "Indy-2017", "name": "Indy 500 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", + "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he'd completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner's trophy, and Mulford's protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", "url": "https://dasl.datadescription.com/download/data/3290", "filename": "Indy-2018", "name": "Indy 500 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", + "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn't run such a long race that\nearly in the season. (It has been run in May every year but\none - 1901 - when it took place on April 29.)", "url": "https://dasl.datadescription.com/download/data/3305", "filename": "Kentucky-Derby-2016", "name": "Kentucky Derby 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", + "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn't run such a long race that\nearly in the season. (It has been run in May every year but\none - 1901 - when it took place on April 29.)", "url": "https://dasl.datadescription.com/download/data/3306", "filename": "Kentucky-Derby-2017", "name": "Kentucky Derby 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", + "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn't run such a long race that\nearly in the season. (It has been run in May every year but\none - 1901 - when it took place on April 29.)", "url": "https://dasl.datadescription.com/download/data/3307", "filename": "Kentucky-Derby-2018", "name": "Kentucky Derby 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NY Marathon 2016", "url": "https://dasl.datadescription.com/download/data/3370", "filename": "NY-Marathon-2016", "name": "NY Marathon 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "How are Olympic performances in various events related? The data gives winning long-jump and high-jump distances in meters, for the Summer Olympics from 1912 through 2016 ", "url": "https://dasl.datadescription.com/download/data/3382", "filename": "Olympic-jumps-2016", "name": "Olympic jumps 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "NFL data from the 2015 football season reported the number of yards gained by each of the league’s 488 receivers ", + "description": "NFL data from the 2015 football season reported the number of yards gained by each of the league's 488 receivers ", "url": "https://dasl.datadescription.com/download/data/3425", "filename": "Receivers-2015", "name": "Receivers 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Times (in minutes) for one runner to run 4 miles on various courses during a 10-year period. ", + "description": "Times (in minutes) for one runner to run 4 miles on various courses during a 10-year period.", "url": "https://dasl.datadescription.com/download/data/3433", "filename": "Run-times", "name": "Run times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hill races are races that climb generally steep hills, held throughout Scotland throughout the year. The file holds records for men and women in these races the last time those were posted in an accessible table along with facts about the races. In particular, we know the length(km) and total climb(m). These are two independent predictors of the record times. Sex of the runner can be an additional indicator variable.", "url": "https://dasl.datadescription.com/download/data/3440", "filename": "Scottish-Hill-Races", "name": "Scottish Hill Races", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A college hockey coach collected data from the 2016–2017 National Hockey League season. He hopes to convince his players that the number of shots taken has an effect on the number of goals scored. The data includes both offensive and defensive players. ", + "description": "A college hockey coach collected data from the 2016-2017 National Hockey League season. He hopes to convince his players that the number of shots taken has an effect on the number of goals scored. The data includes both offensive and defensive players.", "url": "https://dasl.datadescription.com/download/data/3448", "filename": "Shoot-to-Score-2016", "name": "Shoot to Score 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Bjork Larsen was trying to decide whether to use a\nnew racing wax for cross-country skis. He decided that the\nwax would be worth the price if he could average less than\n55 seconds on a course he knew well, so he planned to study\nthe wax by racing on the course 8 times. The data report his race times. \n", "url": "https://dasl.datadescription.com/download/data/3450", "filename": "Ski-wax", "name": "Ski wax", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2014 Winter Olympics at Sochi. ", + "description": "The Men's Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2014 Winter Olympics at Sochi.", "url": "https://dasl.datadescription.com/download/data/3451", "filename": "Slalom-times-2014", "name": "Slalom times 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2018 Winter Olympics at PyeongChang. ", + "description": "The Men's Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2018 Winter Olympics at PyeongChang.", "url": "https://dasl.datadescription.com/download/data/3452", "filename": "Slalom-times-2018", "name": "Slalom times 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Advertisements for an instructional video claim that the techniques will improve the ability of Little League pitchers to throw strikes and that, after undergoing the training, players will be able to throw strikes on at least 60% of their pitches. To test this claim, we have 20 Little Leaguers throw 50 pitches each, and we record the number of strikes. After the players participate in the training program, we repeat the test. The table shows the number of strikes each player threw before and after the training.", "url": "https://dasl.datadescription.com/download/data/3464", "filename": "Strikes", "name": "Strikes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fifty nine countries won gold medals in the 2016 Summer Olympics. The dataset lists them, along with the total number of gold medals each won. It can be a challenge to find a good display for data like these. ", + "description": "Fifty nine countries won gold medals in the 2016 Summer Olympics. The dataset lists them, along with the total number of gold medals each won. It can be a challenge to find a good display for data like these.", "url": "https://dasl.datadescription.com/download/data/3468", "filename": "Summer-Olympics-2016", "name": "Summer Olympics 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Super Bowl 2016", "url": "https://dasl.datadescription.com/download/data/3470", "filename": "Super-Bowl-2016", "name": "Super Bowl 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Swim and Run", "url": "https://dasl.datadescription.com/download/data/3577", "filename": "Swim-Run", "name": "Swim and Run", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "People swim across Lake Ontario from Niagara on the Lake to Toronto–a distance of 52 km (32,3 miles). Because the lake is fresh water, this swim is considered more difficult than ocean swims of similar length because salt water provides more boyancy than fresh water. (For comparison, the English Channel is 21 miles across and, despite strong currents, generally takes less time to cross.)", + "description": "People swim across Lake Ontario from Niagara on the Lake to Toronto-a distance of 52 km (32,3 miles). Because the lake is fresh water, this swim is considered more difficult than ocean swims of similar length because salt water provides more boyancy than fresh water. (For comparison, the English Channel is 21 miles across and, despite strong currents, generally takes less time to cross.)", "url": "https://dasl.datadescription.com/download/data/3473", "filename": "Swim-lake", "name": "Swim the lake 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Unlike track events, swimming heats are not determined at random. Instead, swimmers are seeded so that better swimmers are placed in later heats. Here are the times (in seconds) for the women’s 400-m freestyle for two heats in the 2016 Olympics. ", + "description": "Unlike track events, swimming heats are not determined at random. Instead, swimmers are seeded so that better swimmers are placed in later heats. Here are the times (in seconds) for the women's 400-m freestyle for two heats in the 2016 Olympics.", "url": "https://dasl.datadescription.com/download/data/3471", "filename": "Swimming-heats", "name": "Swimming heats 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Swimming heats London", "url": "https://dasl.datadescription.com/download/data/3472", "filename": "Swimming-heats-London", "name": "Swimming heats London", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", + "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong's 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", "url": "https://dasl.datadescription.com/download/data/3489", "filename": "Tour-de-France-2016", "name": "Tour de France 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", + "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong's 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", "url": "https://dasl.datadescription.com/download/data/3490", "filename": "Tour-de-France-2017", "name": "Tour de France 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Gallup poll asked 1008 Americans age 18 and over whether they planned to watch the upcoming Super Bowl. The pollster also asked those who planned to watch whether they were looking forward more to seeing the football game or the commercials. ", + "description": "The Gallup poll asked 1008 Americans age 18 and over whether they planned to watch the upcoming Super Bowl. The pollster also asked those who planned to watch whether they were looking forward more to seeing the football game or the commercials.", "url": "https://dasl.datadescription.com/download/data/3516", "filename": "Watch-Super-bowl", "name": "Watch the Super bowl", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The world men’s weightlifting records are categorized by weight class of the competitors. How does the weight class relate to the record? ", + "description": "The world men's weightlifting records are categorized by weight class of the competitors. How does the weight class relate to the record? ", "url": "https://dasl.datadescription.com/download/data/3520", "filename": "Weightlifting-2016", "name": "Weightlifting 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Boston Marathon has had a wheelchair division since 1977.\nWho do you think\nis typically faster, the men’s marathon winner on foot\nor the women’s wheelchair marathon winner? Because\nthe conditions differ from year to year, and speeds have\nimproved over the years, it seems best to treat these as\npaired measurements. Here are summary statistics for\nthe pairwise differences in finishing time (in minutes):", + "description": "The Boston Marathon has had a wheelchair division since 1977.\nWho do you think\nis typically faster, the men's marathon winner on foot\nor the women's wheelchair marathon winner? Because\nthe conditions differ from year to year, and speeds have\nimproved over the years, it seems best to treat these as\npaired measurements. Here are summary statistics for\nthe pairwise differences in finishing time (in minutes):", "url": "https://dasl.datadescription.com/download/data/3521", "filename": "Wheelchair-Marathon", "name": "Wheelchair Marathon 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Sears Cup was established in 1993\nto honor institutions that maintain a broad-based athletic\nprogram, achieving success in many sports, both men’s and\nwomen’s. In the years following its Division III inception in\n1995, the cup was won by Williams College 15 of 17 years.\nWhy did the football team win so much? Was it because\nthey were heavier than their opponents? The data gives the\naverage team weights for selected years from 1973 to 1993.", + "description": "The Sears Cup was established in 1993\nto honor institutions that maintain a broad-based athletic\nprogram, achieving success in many sports, both men's and\nwomen's. In the years following its Division III inception in\n1995, the cup was won by Williams College 15 of 17 years.\nWhy did the football team win so much? Was it because\nthey were heavier than their opponents? The data gives the\naverage team weights for selected years from 1973 to 1993.", "url": "https://dasl.datadescription.com/download/data/3525", "filename": "Williams-football", "name": "Williams football", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The times from the first race of the women’s 2 X 500-m speed skating times at the 2010 Winter Olympics in Vancouver, B.C. are given. ", + "description": "The times from the first race of the women's 2 X 500-m speed skating times at the 2010 Winter Olympics in Vancouver, B.C. are given.", "url": "https://dasl.datadescription.com/download/data/3530", "filename": "speed-skating", "name": "Winter Olympics 2010 speed skating", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Source: https://www.olympic.org/sochi-2014/alpine-skiing/slalom-men", "url": "https://dasl.datadescription.com/download/data/3531", "filename": "Winter-Olympics-2014", "name": "Winter Olympics 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "53 men completed the men’s alpine downhill. The gold medal winner finished in 100.25 seconds. Here are the times (in seconds) for all competitors. ", + "description": "53 men completed the men's alpine downhill. The gold medal winner finished in 100.25 seconds. Here are the times (in seconds) for all competitors.", "url": "https://dasl.datadescription.com/download/data/3532", "filename": "olympics-downhill", "name": "Winter olympics 2018 downhill", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "he women’s heptathlon in the Olympics consists of seven track-and-field events: the 200 m and 800 m runs, 100 m high hurdles, shot put, javelin, high jump, and long jump. Each contestant is awarded points for each event based on her performance. So, which performance deserves more points? It’s not clear how to compare them. They aren’t measured in the same units, or even in the same direction (longer jumps are better but shorter times are better.)", + "description": "he women's heptathlon in the Olympics consists of seven track-and-field events: the 200 m and 800 m runs, 100 m high hurdles, shot put, javelin, high jump, and long jump. Each contestant is awarded points for each event based on her performance. So, which performance deserves more points? It's not clear how to compare them. They aren't measured in the same units, or even in the same direction (longer jumps are better but shorter times are better.)", "url": "https://dasl.datadescription.com/download/data/3536", "filename": "Womens-Heptathlon", "name": "Womens Heptathlon 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Women’s 500 metres in short track speed skating at the 2018 Winter Olympics took place from 10 to 13 February 2018 at the Gangneung Ice Arena in Gangneung, South Korea.The defending champion from 2014, Li Jianrou, had retired, but the 2014 silver medalist Arianna Fontana competed and eventually won the event. ", + "description": "The Women's 500 metres in short track speed skating at the 2018 Winter Olympics took place from 10 to 13 February 2018 at the Gangneung Ice Arena in Gangneung, South Korea.The defending champion from 2014, Li Jianrou, had retired, but the 2014 silver medalist Arianna Fontana competed and eventually won the event.", "url": "https://dasl.datadescription.com/download/data/3537", "filename": "Womens-short-track", "name": "Womens short track 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The women’s 1500 metres speed skating competition for the 2006 Winter Olympics was held in Turin, Italy, on 22 February ", + "description": "The women's 1500 metres speed skating competition for the 2006 Winter Olympics was held in Turin, Italy, on 22 February ", "url": "https://dasl.datadescription.com/download/data/3538", "filename": "Womens-speed-skating", "name": "Womens speed skating 2006", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Sport" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Progressive Insurance asked customers who had been involved in auto accidents how far they were from home when the accident happened. ", + "description": "Progressive Insurance asked customers who had been involved in auto accidents how far they were from home when the accident happened.", "url": "https://dasl.datadescription.com/download/data/3039", "filename": "accidents", "name": "Accidents", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "At a barbershop music singing competition, choruses are judged on three scales: Music (quality of the arrangement, etc.), Performance, and Singing. The scales are supposed to be independent of each other, and each is scored by a different judge, but a friend claims that he can predict a chorus’s singing score from the other two […] ", + "description": "At a barbershop music singing competition, choruses are judged on three scales: Music (quality of the arrangement, etc.), Performance, and Singing. The scales are supposed to be independent of each other, and each is scored by a different judge, but a friend claims that he can predict a chorus's singing score from the other two [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3061", "filename": "Barbershop-music", "name": "Barbershop music", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2016 13.27 million people attended a Broadway show, paying an average of more than $100 per ticket. The Broadway League, Inc. (https://www.broadwayleague.com/research/statistics-broadway-nyc/) provides some historical and current data. These variables are available for each year since the 1984-85 season: Season (The initial year of the season, so the 1984-85 season is 1984.) Gross ($M) […] ", + "description": "In 2016 13.27 million people attended a Broadway show, paying an average of more than $100 per ticket. The Broadway League, Inc. (https://www.broadwayleague.com/research/statistics-broadway-nyc/) provides some historical and current data. These variables are available for each year since the 1984-85 season: Season (The initial year of the season, so the 1984-85 season is 1984.) Gross ($M) [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3087", "filename": "Broadway-shows", "name": "Broadway shows", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fast food is often considered unhealthy because much of it is high in both fat and sodium. But are the two related? The data give the fat and sodium contents of several brands of burgers. ", + "description": "Fast food is often considered unhealthy because much of it is high in both fat and sodium. But are the two related? The data give the fat and sodium contents of several brands of burgers.", "url": "https://dasl.datadescription.com/download/data/3088", "filename": "Burgers", "name": "Burgers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset holds facts about candy bars read from their nutrition labels. The data are a good example for multiple regression (e.g. what contributes to the calories of a candy bar?). For such an analysis, the indicator variable for nuts appears to work well. Note that 5 sugar-free candy bars are marked as NA in […] ", + "description": "The dataset holds facts about candy bars read from their nutrition labels. The data are a good example for multiple regression (e.g. what contributes to the calories of a candy bar?). For such an analysis, the indicator variable for nuts appears to work well. Note that 5 sugar-free candy bars are marked as NA in [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3092", "filename": "Candy-bars", "name": "Candy bars", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1998, as an advertising campaign, the Nabisco Company announced a “1000 Chips Challenge,” claiming that every 18-ounce bag of their Chips Ahoy! cookies contained at least 1000 chocolate chips. Dedicated statistics students at the Air Force Academy randomly selected bags of cookies and counted the chocolate chips. The data report their counts. ", + "description": "In 1998, as an advertising campaign, the Nabisco Company announced a \"1000 Chips Challenge\", claiming that every 18-ounce bag of their Chips Ahoy! cookies contained at least 1000 chocolate chips. Dedicated statistics students at the Air Force Academy randomly selected bags of cookies and counted the chocolate chips. The data report their counts.", "url": "https://dasl.datadescription.com/download/data/3110", "filename": "Chips-Ahoy", "name": "Chips Ahoy!", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The website rcdb.com, the Roller Coaster Database, holds facts about every roller coaster in the world, current or past. (If you know of one that is missing, please let the site master know.) These data are for recently opened coasters, most of which are still in operation.", "url": "https://dasl.datadescription.com/download/data/3118", "filename": "Coasters-2015", "name": "Coasters 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are drawn from the work of O. M. Latter in 1902 and were used in a fundamental textbook on statistical quality control by L. H. C. Tippett (1902–1985), one of the pioneers in that field. \n", + "description": "The data are drawn from the work of O. M. Latter in 1902 and were used in a fundamental textbook on statistical quality control by L. H. C. Tippett (1902-1985), one of the pioneers in that field. \n", "url": "https://dasl.datadescription.com/download/data/3149", "filename": "Cuckoos-and-quality-control", "name": "Cuckoos and quality control", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are from a production process that makes 250 units each hour. The data were collected over a normal 12-hour shift one day.", "url": "https://dasl.datadescription.com/download/data/3155", "filename": "Defect-monitoring", "name": "Defect monitoring", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are from a production process that makes 250 units each hour. The data were collected over a normal 12-hour shift one day. ", + "description": "The data are from a production process that makes 250 units each hour. The data were collected over a normal 12-hour shift one day.", "url": "https://dasl.datadescription.com/download/data/3156", "filename": "Defect-monitoring_", "name": "Defect monitoring second product", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Some students checked 6 bags of Doritos marked with a net weight of 28.3 grams. They carefully weighed the contents of each bag and recorded the weights in grams.", "url": "https://dasl.datadescription.com/download/data/3171", "filename": "Doritos", "name": "Doritos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A student wants to investigate the effects of real vs.\nsubstitute eggs on his favorite brownie recipe. He enlists the\nhelp of 10 friends and asks them to rank each of 8 batches\non a scale from 1 to 10. Four of the batches were made with\nreal eggs, four with substitute eggs. The judges tasted the\nbrownies in random order.", "url": "https://dasl.datadescription.com/download/data/3185", "filename": "Eggs", "name": "Eggs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th. ", + "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th.", "url": "https://dasl.datadescription.com/download/data/3219", "filename": "Friday-13-Accidents", "name": "Friday the 13th Accidents", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "As a project for an Introductory Statistics course, students checked 6 bags of Fritos marked with a net weight of 35.4 grams. They carefully weighed the contents of each bag, recording the weights (in grams):", "url": "https://dasl.datadescription.com/download/data/3222", "filename": "Fritos", "name": "Fritos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The movie Harry Potter and the Sorcerer’s Stone opened as a great success. But every movie sees declining revenue over time. The dataset gives the daily revenues for the movie during its first 17 days. ", + "description": "The movie Harry Potter and the Sorcerer's Stone opened as a great success. But every movie sees declining revenue over time. The dataset gives the daily revenues for the movie during its first 17 days.", "url": "https://dasl.datadescription.com/download/data/3256", "filename": "Harry-Potter-revenue", "name": "Harry Potter revenue", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Is the Statue of Liberty’s nose too long? Her nose measures 4′6″, but she is a large statue, after all. Her arm is 42 feet long. That means her arm is 42/4.5 = 9.3 times as long as her nose. Is that a reasonable ratio? The data give arm and nose lengths of 18 girls ", + "description": "Is the Statue of Liberty's nose too long? Her nose measures 4\u20326\u2033, but she is a large statue, after all. Her arm is 42 feet long. That means her arm is 42/4.5 = 9.3 times as long as her nose. Is that a reasonable ratio? The data give arm and nose lengths of 18 girls ", "url": "https://dasl.datadescription.com/download/data/3311", "filename": "Libertys-nose", "name": "Libertys nose", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Lottery numbers", "url": "https://dasl.datadescription.com/download/data/3318", "filename": "Lottery-numbers", "name": "Lottery numbers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Loyalty program", "url": "https://dasl.datadescription.com/download/data/3319", "filename": "Loyalty-program", "name": "Loyalty program", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Movie lengths 2010", "url": "https://dasl.datadescription.com/download/data/3348", "filename": "Movie-lengths-2010", "name": "Movie lengths 2010", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Students in an introductory statistics course were asked how many songs they had in their digital music library.", "url": "https://dasl.datadescription.com/download/data/3352", "filename": "Music-library", "name": "Music library", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "New York State inspectors assess all bridges in the state every two years including a bridge’s individual parts. Bridges are analyzed for their capacity to carry vehicular loads. Inspectors are required to evaluate, assign a condition score, and document the condition of up to 47 structural elements, including rating 25 components of each span of a bridge, in addition to general components common to all bridges. The NYSDOT condition rating scale ranges from 1 to 7, with 7 being in new condition and a rating of 5 or greater considered as good conditionBridges that cannot safely carry heavy vehicles, such as some tractor trailers, are posted with weight limits. Based upon inspection and load capacity analysis, any bridge deemed unsafe gets closed.\nHow does the condition of the bridge relate to its age? Are there any outliers? Can you account for them by identifying them?", + "description": "New York State inspectors assess all bridges in the state every two years including a bridge's individual parts. Bridges are analyzed for their capacity to carry vehicular loads. Inspectors are required to evaluate, assign a condition score, and document the condition of up to 47 structural elements, including rating 25 components of each span of a bridge, in addition to general components common to all bridges. The NYSDOT condition rating scale ranges from 1 to 7, with 7 being in new condition and a rating of 5 or greater considered as good conditionBridges that cannot safely carry heavy vehicles, such as some tractor trailers, are posted with weight limits. Based upon inspection and load capacity analysis, any bridge deemed unsafe gets closed.\nHow does the condition of the bridge relate to its age? Are there any outliers? Can you account for them by identifying them?", "url": "https://dasl.datadescription.com/download/data/3364", "filename": "New-York-bridges-2016", "name": "New York bridges 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The GfK Roper Reports® Worldwide Survey asked 30,000 consumers in 23 countries about their attitudes on health, beauty, and other personal values. One question participants were asked was how important their personal appearance is to them. The data are a contingency table of responses to this question by age decade. ", + "description": "The GfK Roper Reports\u00ae Worldwide Survey asked 30,000 consumers in 23 countries about their attitudes on health, beauty, and other personal values. One question participants were asked was how important their personal appearance is to them. The data are a contingency table of responses to this question by age decade.", "url": "https://dasl.datadescription.com/download/data/3392", "filename": "Personal-appearance", "name": "Personal appearance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "BYU Human Performance Research Center http://www.byu.edu/chhp/intro.html#lrc Director: Mark Ricard 116A RB, (801) 378-8958", "url": "https://dasl.datadescription.com/download/data/3445", "filename": "Shirt-sizes", "name": "Shirt sizes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A last is a form, traditionally made of wood, in the\nshape of the human foot. Lasts of various sizes are used by\nshoemakers to make shoes. In the United States, shoe sizes are\ndefined differently for men and women:\nU.S. men’s shoe size = (last size in inches * 3) – 24\nU.S. women’s shoe size = (last size in inches * 3) – 22.5\nBut in Europe, they are both: Euro size = last size in cm * 3/2\nThe data give the European shoe sizes of 269 college\nstudents (converted from their reported U.S. shoe sizes.)", + "description": "A last is a form, traditionally made of wood, in the\nshape of the human foot. Lasts of various sizes are used by\nshoemakers to make shoes. In the United States, shoe sizes are\ndefined differently for men and women:\nU.S. men's shoe size = (last size in inches * 3) - 24\nU.S. women's shoe size = (last size in inches * 3) - 22.5\nBut in Europe, they are both: Euro size = last size in cm * 3/2\nThe data give the European shoe sizes of 269 college\nstudents (converted from their reported U.S. shoe sizes.)", "url": "https://dasl.datadescription.com/download/data/3447", "filename": "Shoe-Sizes", "name": "Shoe Sizes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset gives the heights (in inches) of 130 members of a choir and the part they sing. Note that Sopranos and Altos are typically women and Tenors and Basses are typically men. ", + "description": "The dataset gives the heights (in inches) of 130 members of a choir and the part they sing. Note that Sopranos and Altos are typically women and Tenors and Basses are typically men.", "url": "https://dasl.datadescription.com/download/data/3449", "filename": "Singers-by-parts", "name": "Singers by parts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Sugar is a major ingredient in many breakfast cereals. The data gives the sugar content as a percentage of weight for 49 brands of cereal. Data were collected from nutrition labels in a supermarket. ", + "description": "Sugar is a major ingredient in many breakfast cereals. The data gives the sugar content as a percentage of weight for 49 brands of cereal. Data were collected from nutrition labels in a supermarket.", "url": "https://dasl.datadescription.com/download/data/3467", "filename": "Sugar-cereal", "name": "Sugar in cereal", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give counts of 626 individuals categorized according to their “tattoo status” and their “hepatitis status.” Is there a relationship? ", + "description": "The data give counts of 626 individuals categorized according to their \"tattoo status\" and their \"hepatitis status\". Is there a relationship? ", "url": "https://dasl.datadescription.com/download/data/3476", "filename": "Tattoos", "name": "Tattoos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A bank is studying the time that it takes 6 of its tellers to serve an average customer. Customers line up in the queue and then go to the next available teller. Is there a difference? Can we pick out the best or worst performing teller? ", "url": "https://dasl.datadescription.com/download/data/3478", "filename": "Tellers", "name": "Tellers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Since 1994, the Best Roller Coaster Poll (www. ushsho.com/bestrollercoasterpoll.htm) has been ranking the world’s best roller coasters. In 2013, Bizarro dropped to 4th after earning the top steel coaster rank for six straight years. Data on the top 14 steel coasters from this poll are given. ", + "description": "Since 1994, the Best Roller Coaster Poll (www. ushsho.com/bestrollercoasterpoll.htm) has been ranking the world's best roller coasters. In 2013, Bizarro dropped to 4th after earning the top steel coaster rank for six straight years. Data on the top 14 steel coasters from this poll are given.", "url": "https://dasl.datadescription.com/download/data/3481", "filename": "Thrills-2013", "name": "Thrills 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Minnesota Department of Transportation\nhoped that they could measure the weights of big trucks without\nactually stopping the vehicles by using a newly developed\n“weight-in-motion” scale. To see if the new device was accurate,\nthey conducted a calibration test. They weighed several stopped\ntrucks (Static Weight) and assumed that this weight was correct.\nThen they weighed the trucks again while they were moving to\nsee how well the new scale could estimate the actual weight.", + "description": "The Minnesota Department of Transportation\nhoped that they could measure the weights of big trucks without\nactually stopping the vehicles by using a newly developed\n\"weight-in-motion\" scale. To see if the new device was accurate,\nthey conducted a calibration test. They weighed several stopped\ntrucks (Static Weight) and assumed that this weight was correct.\nThen they weighed the trucks again while they were moving to\nsee how well the new scale could estimate the actual weight.", "url": "https://dasl.datadescription.com/download/data/3512", "filename": "Vehicle-weights", "name": "Vehicle weights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Washing", "url": "https://dasl.datadescription.com/download/data/3515", "filename": "Washing", "name": "Washing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Consumer Reports tested 11 brands of vanilla yogurt and found these numbers of calories per serving. ", + "description": "Consumer Reports tested 11 brands of vanilla yogurt and found these numbers of calories per serving.", "url": "https://dasl.datadescription.com/download/data/3544", "filename": "Yogurt_", "name": "Yogurt", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Yogurt flavors", "url": "https://dasl.datadescription.com/download/data/3545", "filename": "Yogurt-flavors", "name": "Yogurt flavors", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the ‘underlying issues’: the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission’s National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission’s discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", + "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the 'underlying issues': the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission's National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission's discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", "url": "http://www.statsci.org/data/oz/custody.txt", "filename": "custody", "name": "Aboriginal Deaths in Custody", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Facts on the countries of Asia. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCountry \n\nName\n\nArea \n\nTotal area (sq km)\n\nPopulation \n\nPopulation July 1995 est.\n\nLife \n\nLife Expectancy 1995 est. (years)\n\nGDP \n\nGDP 1994 (US$ billions)\n\nGDP/caput \n\nGDP per person 1994 est (US$)\n\n\n\n", "url": "http://www.statsci.org/data/oz/asia.txt", "filename": "asia", "name": "Countries of Asia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The United States Census Bureau keeps track of the number of adoptions in each State (and Washington D.C.). The data includes the population of each state as well. How should adoptions be summarized and displayed? ", "url": "https://dasl.datadescription.com/download/data/3043", "filename": "Adoptions", "name": "Adoptions", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3177", "filename": "Drivers-Licenses-2014", "name": "Drivers Licenses 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Do flexible work schedules reduce the demand for resources? The Lake County, Illinois, Health Department experimented with a flexible four-day workweek. For a year, the department recorded the mileage driven by 11 field workers on an ordinary five-day workweek. Then it changed to a flexible four-day workweek and recorded mileage for another year. ", + "description": "Do flexible work schedules reduce the demand for resources? The Lake County, Illinois, Health Department experimented with a flexible four-day workweek. For a year, the department recorded the mileage driven by 11 field workers on an ordinary five-day workweek. Then it changed to a flexible four-day workweek and recorded mileage for another year.", "url": "https://dasl.datadescription.com/download/data/3540", "filename": "Work-week", "name": "Work week", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Administration" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Commercial airlines overbook flights, selling more tickets than they have seats, because a sizeable number of reservation holders don’t show up in time for their flights. But sometimes, there are more passengers wishing to board than there are seats. Most airlines try to entice travelers to voluntarily give up their seats in return for free […] ", + "description": "Commercial airlines overbook flights, selling more tickets than they have seats, because a sizeable number of reservation holders don't show up in time for their flights. But sometimes, there are more passengers wishing to board than there are seats. Most airlines try to entice travelers to voluntarily give up their seats in return for free [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3048", "filename": "Airline-bumping", "name": "Airline bumping 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "https://www.albany.edu/sourcebook/csv/t3177.csv adapted from: U.S. Department of Transportation, Federal Aviation Administration, Semiannual Report to Congress on the Effectiveness of the Civil Aviation Security Program, July 1 to \"December 31, 1978, Exhibit 10; July 1 to December 31, 1982, Exhibit 10; July 1 to December 31, 1984, Exhibit 7; July 1 to December 31, 1989, p. 11 (Washington, DC: U.S. Department of Transportation); U.S. Department of Transportation, Federal Aviation Administration, Annual Report to Congress on Civil Aviation Security, January 1, 1993-December 31, 1993, p. 9; January 1, 1995-December 31, 1995, p. 11 (Washington, DC: U.S. Department of Transportation); and data provided by the U.S. Department of Transportation, Federal Aviation Administration and Bureau of Transportation Statistics [Online]. Available: http://www.bts.gov/publications/national_transportation_statistics/ 2003/html/table_02_16.html [May 24, 2004]. Table adapted by SOURCEBOOK staff.", "url": "https://dasl.datadescription.com/download/data/3049", "filename": "Airport-screening", "name": "Airport screening", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Bicycle Helmet Safety Institute website includes a report on the number of bicycle fatalities per year in the United States. The data gives the counts for the years 1994–2015. ", + "description": "The Bicycle Helmet Safety Institute website includes a report on the number of bicycle fatalities per year in the United States. The data gives the counts for the years 1994-2015.", "url": "https://dasl.datadescription.com/download/data/3073", "filename": "Bike-safety-2015", "name": "Bike safety 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset is the number of camp sites at each of the public parks in Vermont ", "url": "https://dasl.datadescription.com/download/data/3091", "filename": "Camp-sites", "name": "Camp sites", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the number of domestic U.S. flights flown in each year from 2000 to 2016 ", "url": "https://dasl.datadescription.com/download/data/3209", "filename": "Flights-2016", "name": "Flights 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Bureau of Transportation Statistics of the U.S. Department of Transportation publishes information about airline performance. The data report the percentage of flights departing on time each month from January 1994 through June 2016. ", + "description": "The Bureau of Transportation Statistics of the U.S. Department of Transportation publishes information about airline performance. The data report the percentage of flights departing on time each month from January 1994 through June 2016.", "url": "https://dasl.datadescription.com/download/data/3210", "filename": "Flights-on-time-2016", "name": "Flights on time 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th. ", + "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th.", "url": "https://dasl.datadescription.com/download/data/3220", "filename": "Friday-13-traffic", "name": "Friday the 13th traffic", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don’t let the drivers know about this experiment.", + "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don't let the drivers know about this experiment.", "url": "https://dasl.datadescription.com/download/data/3230", "filename": "Gasoline__", "name": "Gasoline", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Much of the public and private industry in Hawaii depends on tourism. The following time series plot shows the number of domestic visitors to Hawaii by air from the rest of the United States per month from January 2002 through December 2006 before the financial crisis of 2008. ", + "description": "Much of the public and private industry in Hawaii depends on tourism. The following time series plot shows the number of domestic visitors to Hawaii by air from the rest of the United States per month from January 2002 through December 2006 before the financial crisis of 2008.", "url": "https://dasl.datadescription.com/download/data/3257", "filename": "Hawaii-tourism", "name": "Hawaii tourism", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data report the percentage of flights that were late and the percentage that departed on time for each month from 1995 through early 2016 ", "url": "https://dasl.datadescription.com/download/data/3309", "filename": "Late-arrivals-2016", "name": "Late arrivals 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights. ", + "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights.", "url": "https://dasl.datadescription.com/download/data/3315", "filename": "Load-factors-2016", "name": "Load factors 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights. ", + "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights.", "url": "https://dasl.datadescription.com/download/data/3316", "filename": "Load-factors-2017", "name": "Load factors 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the number of passengers at Oakland (CA) airport month by month since 1997. ", + "description": "The data give the number of passengers at Oakland (CA) airport month by month since 1997.", "url": "https://dasl.datadescription.com/download/data/3371", "filename": "Oakland-passengers-2016", "name": "Oakland passengers 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The National Highway Traffic Safety Administration reports seat belt use and fatalities in car accidents by state. How do fatalities relate to seat belt use? ", "url": "https://dasl.datadescription.com/download/data/3442", "filename": "Seat-belts-2015", "name": "Seat belts 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data report the density (cars per mile) and average speed of traffic on city highways. The data were collected at the same location at 10 different times randomly selected within a span of 3 months. ", + "description": "The data report the density (cars per mile) and average speed of traffic on city highways. The data were collected at the same location at 10 different times randomly selected within a span of 3 months.", "url": "https://dasl.datadescription.com/download/data/3560", "filename": "Speed-density", "name": "Speed and density", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement. ", + "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement.", "url": "https://dasl.datadescription.com/download/data/3460", "filename": "Stopping-distance", "name": "Stopping distance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement from 60 miles per hour. The test was run on both dry and wet pavement. (The actual braking distance takes into account the driver’s reaction time, which typically adds nearly 300 feet at 60 mph!)", + "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement from 60 miles per hour. The test was run on both dry and wet pavement. (The actual braking distance takes into account the driver's reaction time, which typically adds nearly 300 feet at 60 mph!)", "url": "https://dasl.datadescription.com/download/data/3461", "filename": "Stopping-distance-60", "name": "Stopping distance 60", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Traffic fatalities in a variety of vehicles and for a variety of situations for the years from 1975 to 2013. These are multiple time series, but can also be related to each other. ", + "description": "Traffic fatalities in a variety of vehicles and for a variety of situations for the years from 1975 to 2013. These are multiple time series, but can also be related to each other.", "url": "https://dasl.datadescription.com/download/data/3495", "filename": "Traffic-fatalities", "name": "Traffic fatalities 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The U.S. Energy Information Administration (EIA) collects data on the total energy used per capita in transportation for each state and the District of Columbia. The data show the per capita consumption in the year 2015 in millions of BTU per person. ", + "description": "The U.S. Energy Information Administration (EIA) collects data on the total energy used per capita in transportation for each state and the District of Columbia. The data show the per capita consumption in the year 2015 in millions of BTU per person.", "url": "https://dasl.datadescription.com/download/data/3496", "filename": "Transportation-Energy", "name": "Transportation Energy use", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "U.S. Department of Transportation reports records of border crossings into each state on the U.S. border. Here are the border crossings by trucks for Alaska, recorded each month from 1999 through 2017. ", + "description": "U.S. Department of Transportation reports records of border crossings into each state on the U.S. border. Here are the border crossings by trucks for Alaska, recorded each month from 1999 through 2017.", "url": "https://dasl.datadescription.com/download/data/3499", "filename": "Trucks_", "name": "Trucks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Travel" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A survey was conducted in the United States and 10 countries of Western Europe to determine the percentage of teenagers who had used marijuana and other drugs. The data give percentages of drug use by country. ", + "description": "A survey was conducted in the United States and 10 countries of Western Europe to determine the percentage of teenagers who had used marijuana and other drugs. The data give percentages of drug use by country.", "url": "https://dasl.datadescription.com/download/data/3178", "filename": "Drug-abuse", "name": "Drug abuse", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The 2013 World Drug Report investigated the prevalence of drug use as a percentage of the population aged 15 to 64. Data from 32 European countries are shown.", "url": "https://dasl.datadescription.com/download/data/3179", "filename": "Drug-use-2013", "name": "Drug use 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Prisons 2014", "url": "https://dasl.datadescription.com/download/data/3406", "filename": "Prisons-2014", "name": "Prisons 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Crime" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are a random sample from the data in Population commute times.", "url": "https://dasl.datadescription.com/download/data/3123", "filename": "Commute-times-sample100", "name": "Commute times sample100", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3137", "filename": "Couples", "name": "Couples", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data give the mortality rate (deaths per 100,000 people) and the education level (average number of years in school) for 58 U.S. cities. ", + "description": "Data give the mortality rate (deaths per 100,000 people) and the education level (average number of years in school) for 58 U.S. cities.", "url": "https://dasl.datadescription.com/download/data/3183", "filename": "Education-and-mortality", "name": "Education and mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Students in a large statistics class were asked to report the eye color and hair color. Is there an association? ", "url": "https://dasl.datadescription.com/download/data/3197", "filename": "Eye-and-Hair-color", "name": "Eye and Hair color", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Eurostat, an agency of the European Union (EU), conducts surveys on several aspects of daily life in EU countries. Recently, the agency asked samples of 1000 respondents in each of 14 European countries whether they read the newspaper on a daily basis. ", + "description": "Eurostat, an agency of the European Union (EU), conducts surveys on several aspects of daily life in EU countries. Recently, the agency asked samples of 1000 respondents in each of 14 European countries whether they read the newspaper on a daily basis.", "url": "https://dasl.datadescription.com/download/data/3363", "filename": "Newspapers", "name": "Newspapers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Population Commute Times", "url": "https://dasl.datadescription.com/download/data/3401", "filename": "Population-Commute", "name": "Population Commute Times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "the percentage change in population for the 50 states and the District of Columbia from the 2000 census to the 2010 census. ", + "description": "the percentage change in population for the 50 states and the District of Columbia from the 2000 census to the 2010 census.", "url": "https://dasl.datadescription.com/download/data/3402", "filename": "Population-growth-2010", "name": "Population growth 2010", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Crowd Management Strategies monitors accidents at rock concerts. In their database, they list the names and other variables of victims whose deaths were attributed to “crowd crush” at rock concerts. The data give the victims’ ages for data from a one-year period: ", + "description": "Crowd Management Strategies monitors accidents at rock concerts. In their database, they list the names and other variables of victims whose deaths were attributed to \"crowd crush\" at rock concerts. The data give the victims' ages for data from a one-year period: ", "url": "https://dasl.datadescription.com/download/data/3429", "filename": "Rock-concert-deaths", "name": "Rock concert deaths", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study at a liberal arts college attempted to find out whether men and women watch the same amount of TV, on average and whether it mattered if students were varsity athletes or not. Student researchers asked 200 randomly selected students questions about their backgrounds and about their television-viewing habits and received 197 legitimate responses. The researchers found that men watch, on average, about 2.5 hours per week more TV than women, and that varsity athletes watch about 3.5 hours per week more than those who are not varsity athletes. But is this the whole story? To investigate further, they divided the students into four groups: male athletes (MA), male non-athletes (MNA), female\nathletes (FA), and female non-athletes (FNA).", "url": "https://dasl.datadescription.com/download/data/3504", "filename": "TV-watching", "name": "TV watching", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Insurance companies and other organizations use actuarial tables to estimate the remaining lifespans of their customers. The data file gives estimated life expectancy and additional years of life for black males in the United States, according to a 2016 National Vital Statistics Report, A regression model to predict Life expectancy from Age appears to fit well, but consider the residuals.", "url": "https://dasl.datadescription.com/download/data/3542", "filename": "Years-to-live", "name": "Years to live 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fortune magazine collected the zodiac signs of 256 heads of the largest 400 companies. The data shows the number of births for each sign. ", + "description": "Fortune magazine collected the zodiac signs of 256 heads of the largest 400 companies. The data shows the number of births for each sign.", "url": "https://dasl.datadescription.com/download/data/3547", "filename": "Zodiac", "name": "Zodiac", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Population" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "https://en.wikipedia.org/wiki/List_of_U.S._states_by_electricity_production_from_renewable_sources", "url": "https://dasl.datadescription.com/download/data/3051", "filename": "Alternative-energy", "name": "Alternative energy 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In a statement to a Senate Public Works Committee, a senior executive of Texaco, Inc., cited a study on the effectiveness of auto filters on reducing noise. Because of concerns about performance, two types of filters were studied, a standard silencer and a new device developed by the Associated Octel Company. Noise is in decibels/10. […] ", + "description": "In a statement to a Senate Public Works Committee, a senior executive of Texaco, Inc., cited a study on the effectiveness of auto filters on reducing noise. Because of concerns about performance, two types of filters were studied, a standard silencer and a new device developed by the Associated Octel Company. Noise is in decibels/10. [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3058", "filename": "Auto-noise-filters", "name": "Auto noise filters", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student experiment was run to test the performance of 4 brands of batteries under 2 different Environments (room temperature and cold). For each of the 8 treatments, 2 batteries of a particular brand were put into a flashlight. The flashlight was then turned on and allowed to run until the light went out. The […] ", + "description": "A student experiment was run to test the performance of 4 brands of batteries under 2 different Environments (room temperature and cold). For each of the 8 treatments, 2 batteries of a particular brand were put into a flashlight. The flashlight was then turned on and allowed to run until the light went out. The [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3070", "filename": "Batteries", "name": "Batteries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Stopping distances in feet for a car tested 3 times at each of 5 speeds. We hope to create a model that predicts Stopping Distance from the Speed of the car. ", + "description": "Stopping distances in feet for a car tested 3 times at each of 5 speeds. We hope to create a model that predicts Stopping Distance from the Speed of the car.", "url": "https://dasl.datadescription.com/download/data/3086", "filename": "Brakes", "name": "Brakes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements on 38 1978-79 model automobiles. Gas mileage in miles per gallon as measured by Consumers’ Union on a test track. Other values as reported by automobile manufacturer. Used to illustrate regression model building and diagnosis. Be sure to check the residuals when predicting MPG. ", + "description": "Measurements on 38 1978-79 model automobiles. Gas mileage in miles per gallon as measured by Consumers' Union on a test track. Other values as reported by automobile manufacturer. Used to illustrate regression model building and diagnosis. Be sure to check the residuals when predicting MPG.", "url": "https://dasl.datadescription.com/download/data/3096", "filename": "Cars", "name": "Cars", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A start-up company has developed an improved electronic chip for use in laboratory equipment. The company needs to project the manufacturing cost, so it develops a spreadsheet model that takes into account the purchase of production equipment, overhead, raw materials, depreciation, maintenance, and other business costs. The spreadsheet estimates the cost of producing 10,000 to […] ", + "description": "A start-up company has developed an improved electronic chip for use in laboratory equipment. The company needs to project the manufacturing cost, so it develops a spreadsheet model that takes into account the purchase of production equipment, overhead, raw materials, depreciation, maintenance, and other business costs. The spreadsheet estimates the cost of producing 10,000 to [\u2026] ", "url": "https://dasl.datadescription.com/download/data/3109", "filename": "Chips", "name": "Chips", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3126", "filename": "Computer-chip", "name": "Computer chip manufacturing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dalia collects data via smartphone from users worldwide. This survey asked (among many other questions) about access to cars and the use of ride-hailing apps ", "url": "https://dasl.datadescription.com/download/data/3153", "filename": "Dalia", "name": "Dalia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Disk drive capacity is often given in terabytes (TB), where 1 TB = 1000 gigabytes, or about a trillion bytes. A search of prices for external disk drives on Amazon.com in mid-2016 found the data on capacity and price. ", + "description": "Disk drive capacity is often given in terabytes (TB), where 1 TB = 1000 gigabytes, or about a trillion bytes. A search of prices for external disk drives on Amazon.com in mid-2016 found the data on capacity and price.", "url": "https://dasl.datadescription.com/download/data/3167", "filename": "Disk-drives", "name": "Disk drives 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Most water tanks have a drain plug so that the tank may be emptied when it’s to be moved or repaired. How long it takes a certain size of tank to drain depends on the size of the plug, as shown in the table. ", + "description": "Most water tanks have a drain plug so that the tank may be emptied when it's to be moved or repaired. How long it takes a certain size of tank to drain depends on the size of the plug, as shown in the table.", "url": "https://dasl.datadescription.com/download/data/3175", "filename": "Down-the-Drain", "name": "Down the Drain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A university teacher saved every e-mail receive from students in a large introductory statistics class during one term. He then counted, for each student who had sent him at least one e-mail, how many e-mails each student had sent. What is the distribution of e-mail communications? ", "url": "https://dasl.datadescription.com/download/data/3181", "filename": "E-mails", "name": "E-mails", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fuel economy (mpg) and the number of cylinders in a sample of cars. Data extracted from a larger cars dataset. ", + "description": "Fuel economy (mpg) and the number of cylinders in a sample of cars. Data extracted from a larger cars dataset.", "url": "https://dasl.datadescription.com/download/data/3226", "filename": "Fuel-economy-and-cylinders", "name": "Fuel economy and cylinders", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment to test a new gasoline additive, Gasplus, was performed on three different cars: a sports car, a minivan, and a hybrid. Each car was tested with both Gasplus and regular gas on 10 different occasions and their gas mileage was recorded.", "url": "https://dasl.datadescription.com/download/data/3231", "filename": "Gas-additives", "name": "Gas additives", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Internet users 2014", "url": "https://dasl.datadescription.com/download/data/3299", "filename": "Internet-users", "name": "Internet users 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "iPod failures", "url": "https://dasl.datadescription.com/download/data/3300", "filename": "iPod-failures", "name": "iPod failures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Richard DeVeaux owned a Nissan Maxima for 8 years. He\nrecorded the car’s fuel efficiency (in mpg) each time he filled the tank. He wanted to know what fuel efficiency to expect as “ordinary” for his car. Knowing this, he was able to predict when he’d need to fill the tank again and to notice if the fuel efficiency suddenly got worse, which could be a sign of trouble.", + "description": "Richard DeVeaux owned a Nissan Maxima for 8 years. He\nrecorded the car's fuel efficiency (in mpg) each time he filled the tank. He wanted to know what fuel efficiency to expect as \"ordinary\" for his car. Knowing this, he was able to predict when he'd need to fill the tank again and to notice if the fuel efficiency suddenly got worse, which could be a sign of trouble.", "url": "https://dasl.datadescription.com/download/data/3367", "filename": "Nissan", "name": "Nissan", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Costs of construction for 32 light water nuclear plants.", "url": "https://dasl.datadescription.com/download/data/3554", "filename": "Nuclear-plants", "name": "Nuclear plants", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Pew Research conducted a survey about social networking in several countries. They asked whether respondents had access to and used social networking. Responses were “yes” (use social networking), “no”, and “not available”.", + "description": "Pew Research conducted a survey about social networking in several countries. They asked whether respondents had access to and used social networking. Responses were \"yes\" (use social networking), \"no\", and \"not available\".", "url": "https://dasl.datadescription.com/download/data/3457", "filename": "Social-networking", "name": "Social networking", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Cnet.com tests tablet computers and continuously updates its list. As of January 2014, the list included the battery life (in hours) and luminous intensity (i.e., screen brightness, in cd/m^2). ", + "description": "Cnet.com tests tablet computers and continuously updates its list. As of January 2014, the list included the battery life (in hours) and luminous intensity (i.e., screen brightness, in cd/m^2).", "url": "https://dasl.datadescription.com/download/data/3474", "filename": "Tablet-computers-2014", "name": "Tablet computers 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Should you generate electricity with your own personal\nwind turbine? That depends on whether you have enough\nwind on your site. To produce enough energy, your site should\nhave an annual average wind speed above 8 miles per hour, according\nto the Wind Energy Association. One candidate site was\nmonitored for a year, with wind speeds recorded every 6 hours.\nA total of 1114 readings of wind speed averaged 8.019 mph with\na standard deviation of 3.813 mph. The data are provided.", "url": "https://dasl.datadescription.com/download/data/3527", "filename": "Wind-power", "name": "Wind power", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Technology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Pew Research Center conducted a representative telephone survey in October of 2016. Among the reported results was the following table concerning the preferred political party affiliation of respondents and their ages for white voters. Is there evidence of age-based differences in party affiliation in the United States for white voters? ", "url": "https://dasl.datadescription.com/download/data/3045", "filename": "Age-and-party", "name": "Age and party 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The outcome of the 2000 U.S. presidential election was determined in Florida amid much\ncontroversy. Even years later, historians continue to debate who really received the most\nvotes. The main race was between George W. Bush and Al Gore, but two minor candidates\nplayed a significant role. To the political right of the major party candidates was Pat\nBuchanan, while to the political left was Ralph Nader. Generally, Nader earned more votes\nthan Buchanan throughout the state. We would expect counties with larger vote totals to\ngive more votes to each candidate. The dataset gives Buchanan’s and Nader’s vote totals by\ncounty in the state of Florida. Plot to identify the outlier and consider what it means.", + "description": "The outcome of the 2000 U.S. presidential election was determined in Florida amid much\ncontroversy. Even years later, historians continue to debate who really received the most\nvotes. The main race was between George W. Bush and Al Gore, but two minor candidates\nplayed a significant role. To the political right of the major party candidates was Pat\nBuchanan, while to the political left was Ralph Nader. Generally, Nader earned more votes\nthan Buchanan throughout the state. We would expect counties with larger vote totals to\ngive more votes to each candidate. The dataset gives Buchanan's and Nader's vote totals by\ncounty in the state of Florida. Plot to identify the outlier and consider what it means.", "url": "https://dasl.datadescription.com/download/data/3187", "filename": "Election-2000", "name": "Election 2000", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3201", "filename": "Female-president", "name": "Female president", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "How accurate are pollsters in predicting the outcomes of Congressional elections? The table shows the actual number of Democrat seats in the House of Representatives and the number predicted by the Gallup organization for nonpresidential election years in the 4 decades following World War II. ", + "description": "How accurate are pollsters in predicting the outcomes of Congressional elections? The table shows the actual number of Democrat seats in the House of Representatives and the number predicted by the Gallup organization for nonpresidential election years in the 4 decades following World War II.", "url": "https://dasl.datadescription.com/download/data/3564", "filename": "Polling", "name": "Polling", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Politics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "1998 Baby data from http://www.nber.org/natality/ftp.cdc.gov/pub/Health_Statistics/NCHS/Dataset_Documentation/DVS/natality/", "url": "https://dasl.datadescription.com/download/data/3059", "filename": "Babysamp", "name": "Babysamp 98", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Births per 1000 population in the United States, starting in 1965. There has been concern that the birthrate may be declining. A good model for tends in birthrate may allow for some prediction. ", + "description": "Births per 1000 population in the United States, starting in 1965. There has been concern that the birthrate may be declining. A good model for tends in birthrate may allow for some prediction.", "url": "https://dasl.datadescription.com/download/data/3075", "filename": "Birthrates-2015", "name": "Birthrates 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In a Chance magazine article (Summer 2005), Danielle Vasilescu and Howard Wainer used data from the United Nations Center for Human Settlements to investigate aspects of living conditions for several countries. Among the variables they looked at were the country’s per capita gross domestic product (GDP, in $) and Crowdedness, defined as the average number of persons per room living in homes there.\nVasilescu and Wainer re-express GDP to -10000/GDP. Doing that reveals an outlier that may be due to an error in the data.", + "description": "In a Chance magazine article (Summer 2005), Danielle Vasilescu and Howard Wainer used data from the United Nations Center for Human Settlements to investigate aspects of living conditions for several countries. Among the variables they looked at were the country's per capita gross domestic product (GDP, in $) and Crowdedness, defined as the average number of persons per room living in homes there.\nVasilescu and Wainer re-express GDP to -10000/GDP. Doing that reveals an outlier that may be due to an error in the data.", "url": "https://dasl.datadescription.com/download/data/3148", "filename": "Crowdedness", "name": "Crowdedness", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3237", "filename": "GDP-DJIA", "name": "GDP and DJIA 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data for 800 respondents in each of five countries. The variables provide demographic information (sex, age, education, marital status) and responses to questions of interest to marketers on personal finance and purchasing. ", + "description": "Data for 800 respondents in each of five countries. The variables provide demographic information (sex, age, education, marital status) and responses to questions of interest to marketers on personal finance and purchasing.", "url": "https://dasl.datadescription.com/download/data/3242", "filename": "Global", "name": "Global", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset gives profits (in $M) for 30 of the 500 largest global corporations (as measured by revenue). ", + "description": "The dataset gives profits (in $M) for 30 of the 500 largest global corporations (as measured by revenue).", "url": "https://dasl.datadescription.com/download/data/3243", "filename": "Global500-2014", "name": "Global500 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In an investigation of environmental causes of disease, data were collected on the annual mortality rate (deaths per 100,000) for males in 61 large towns in England and Wales. In addition, the water hardness was recorded as the calcium concentration (parts per million, ppm) in the drinking water.", "url": "https://dasl.datadescription.com/download/data/3255", "filename": "Hard-water", "name": "Hard water", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country. In 2015, the HDI was as high as 0.94 for Norway and as low as 0.35 for Niger. ", + "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country. In 2015, the HDI was as high as 0.94 for Norway and as low as 0.35 for Niger.", "url": "https://dasl.datadescription.com/download/data/3258", "filename": "HDI-2015", "name": "HDI 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country. ", + "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country.", "url": "https://dasl.datadescription.com/download/data/3259", "filename": "HDI-2016", "name": "HDI 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Life expectancy at birth in 195 countries. ", + "description": "Life expectancy at birth in 195 countries.", "url": "https://dasl.datadescription.com/download/data/3312", "filename": "Life-Expectancy", "name": "Life Expectancy", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Here is a table from the National Vital Statistics Report that gives the Life Expectancy for white males in the United States every decade during the 20th century (1 = 1900 to 1910, 2 = 1911 to 1920, etc.). Does a linear model relating life expectancy to decade fit? Would re-expressing either variable help?", "url": "https://dasl.datadescription.com/download/data/3313", "filename": "Life-expectancy-US", "name": "Life expectancy US", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze. ", + "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze.", "url": "https://dasl.datadescription.com/download/data/3329", "filename": "Marriage-age-2015", "name": "Marriage age 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze. ", + "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze.", "url": "https://dasl.datadescription.com/download/data/3330", "filename": "Marriage-age-2016", "name": "Marriage age 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The estimated median age at fist marriage by sex from 1890 to 2017 is provided by the U.S. Census bureau. Since 1960, marriage ages have been increasing steadily. Has the difference between men’s and women’s first marriage age changed? ", + "description": "The estimated median age at fist marriage by sex from 1890 to 2017 is provided by the U.S. Census bureau. Since 1960, marriage ages have been increasing steadily. Has the difference between men's and women's first marriage age changed? ", "url": "https://dasl.datadescription.com/download/data/3331", "filename": "Marriage-age-2017", "name": "Marriage age 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Source: JAMA 284 [2000]:335–341) \nNumber of Cases: 278", + "description": "Source: JAMA 284 [2000]:335-341) \nNumber of Cases: 278", "url": "https://dasl.datadescription.com/download/data/3506", "filename": "Twin-Births", "name": "Twin Births", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In January 2012, the New York Times\npublished a story called “Twin Births in the U.S., Like Never\nBefore,” in which they reported a 76 percent increase in the\nrate of twin births from 1980 to 2009. The dataset gives the number\nof twin births each year (per 1000 live births). Can you confirm the Times report?\nThe dataset also includes the atmospheric CO2 levels (ppm) for those years to offer an alternative predictor in case there appears to be an argument for causation.", + "description": "In January 2012, the New York Times\npublished a story called \"Twin Births in the U.S., Like Never\nBefore\", in which they reported a 76 percent increase in the\nrate of twin births from 1980 to 2009. The dataset gives the number\nof twin births each year (per 1000 live births). Can you confirm the Times report?\nThe dataset also includes the atmospheric CO2 levels (ppm) for those years to offer an alternative predictor in case there appears to be an argument for causation.", "url": "https://dasl.datadescription.com/download/data/3505", "filename": "Twins-by-Year", "name": "Twins by Year 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Working parents", "url": "https://dasl.datadescription.com/download/data/3539", "filename": "Working-parents", "name": "Working parents", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Demographics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A statistics professor at a large university polled his students to find out what their majors were and what position they held in the family birth order. The results are summarized in the table.", "url": "https://dasl.datadescription.com/download/data/3076", "filename": "Birth-order", "name": "Birth order", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The technology committee at a school has stated that the average time spent by students per lab visit has increased and the increase supports their argument that they need to increase lab fees.\nTo substantiate this claim, the committee randomly sampled 12 student lab visits and noted the amount of time spent using the computer. The times in minutes are given:", "url": "https://dasl.datadescription.com/download/data/3127", "filename": "Computer-lab", "name": "Computer lab fees", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Students in two basic Spanish classes were required to learn 50 new vocabulary words. One group of 45 students received the list on Monday and studied the words all week. Statistics summarizing this group’s scores on Friday’s quiz are given. The other group of 25 students did not get the vocabulary list until Thursday. They also took the quiz on Friday, after “cramming” Thursday night. Then, when they returned to class the following Monday, they were retested—without advance warning. Both sets of test scores for these students are given.", + "description": "Students in two basic Spanish classes were required to learn 50 new vocabulary words. One group of 45 students received the list on Monday and studied the words all week. Statistics summarizing this group's scores on Friday's quiz are given. The other group of 25 students did not get the vocabulary list until Thursday. They also took the quiz on Friday, after \"cramming\" Thursday night. Then, when they returned to class the following Monday, they were retested - without advance warning. Both sets of test scores for these students are given.", "url": "https://dasl.datadescription.com/download/data/3140", "filename": "Cramming", "name": "Cramming", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3184", "filename": "Education-by-age", "name": "Education by age", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Is college worth the expense? Which colleges have graduates who earn the most? And what is the best predictor of earnings 5-years out? The data provide several possible predictors and background information suitable for building regression models. ", + "description": "Is college worth the expense? Which colleges have graduates who earn the most? And what is the best predictor of earnings 5-years out? The data provide several possible predictors and background information suitable for building regression models.", "url": "https://dasl.datadescription.com/download/data/3249", "filename": "Graduate-Earnings", "name": "Graduate Earnings", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The National Center for Education Statistic reports average mathematics achievement scores for eighth graders in all 50 states.", "url": "https://dasl.datadescription.com/download/data/3332", "filename": "Math-scores-2013", "name": "Math scores 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Scores on SAT tests for 162 students at the same school. (The identity of the school is not provided for privacy.) How are Math and Verbal scores related? Would a regression model be appropriate? Is there a difference in male and female scores? How would that difference be modeled? ", "url": "https://dasl.datadescription.com/download/data/3438", "filename": "SAT-scores", "name": "SAT scores", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A school district superintendent wants to test a new method of teaching arithmetic in the fourth grade at his 15 schools. He plans to select 8 students from each school to take part in the experiment, but to make sure they are roughly of the same ability, he first gives a test to all 120 students. The data hold the scores of the test by school.", "url": "https://dasl.datadescription.com/download/data/3439", "filename": "School-system", "name": "School system", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset contains data from a class survey ", "url": "https://dasl.datadescription.com/download/data/3465", "filename": "Student-survey", "name": "Student survey", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Researchers randomly assigned subjects to take one of two tests (form A or form B) either electronically or with pencil and paper. Subjects then took the other test using the other method. The two forms had been designed to be equivalent in difficulty, but nevertheless, that equivalence was checked as part of the experiment. Our concern is whether subjects did equally well with each testing method.", "url": "https://dasl.datadescription.com/download/data/3466", "filename": "Student-testing", "name": "Student testing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Summer school", "url": "https://dasl.datadescription.com/download/data/3469", "filename": "Summer-school", "name": "Summer school", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tuition 2016", "url": "https://dasl.datadescription.com/download/data/3502", "filename": "Tuition-2016", "name": "Tuition 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "https://collegescorecard.ed.gov/data/", "url": "https://dasl.datadescription.com/download/data/3503", "filename": "Tuition-All-Schools", "name": "Tuition All Schools 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the mean ACT composite scores for all 450 Wisconsin public schools in 2015 along with the type of school and number of students.", "url": "https://dasl.datadescription.com/download/data/3533", "filename": "Wisconsin-ACT-2015", "name": "Wisconsin ACT 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Wisconsin ACT math", "url": "https://dasl.datadescription.com/download/data/3534", "filename": "Wisconsin-ACT-math", "name": "Wisconsin ACT math", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Education" } ] }, { "name": "Physics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "On August 24, 2006, the International Astronomical Union voted that Pluto is not a planet. Some members of the public have been reluctant to accept that decision. The data show the average distance of each of the traditional nine planets from the sun. Is there a pattern? Does Pluto fit with the other “official” planets? ", + "description": "On August 24, 2006, the International Astronomical Union voted that Pluto is not a planet. Some members of the public have been reluctant to accept that decision. The data show the average distance of each of the traditional nine planets from the sun. Is there a pattern? Does Pluto fit with the other \"official\" planets? ", "url": "https://dasl.datadescription.com/download/data/3397", "filename": "Planets", "name": "Planets", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "On August 24, 2006, the International Astronomical Union voted that Pluto is not a planet. Some members of the public have been reluctant to accept that decision. The data show a variety of facts about the 8 planets and Pluto. Exercises consider two models for the planets. Does Pluto behave like a planet? ", "url": "https://dasl.datadescription.com/download/data/3398", "filename": "Planets-more", "name": "Planets more data", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Astronomy" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Scientist Robert Boyle examined the relationship between the volume in which a gas is contained and the pressure in its container. He used a cylindrical container with a moveable top that could be raised or lowered to change the volume. He measured the Height in inches by counting equally spaced marks on the cylinder, and", "url": "https://dasl.datadescription.com/download/data/3083", "filename": "Boyle", "name": "Boyle", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "We know from common sense and from Physics that heavier cars need more fuel, but exactly how does a car’s weight affect its fuel efficiency? The data set continues data on 38 cars including their fuel efficiency in miles per gallon measured on a track. ", + "description": "We know from common sense and from Physics that heavier cars need more fuel, but exactly how does a car's weight affect its fuel efficiency? The data set continues data on 38 cars including their fuel efficiency in miles per gallon measured on a track.", "url": "https://dasl.datadescription.com/download/data/3228", "filename": "Fuel-efficiency", "name": "Fuel efficiency", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student experimenting with a pendulum counted the number of full swings the pendulum made in 20 seconds for various lengths of string. Her data are given. ", + "description": "A student experimenting with a pendulum counted the number of full swings the pendulum made in 20 seconds for various lengths of string. Her data are given.", "url": "https://dasl.datadescription.com/download/data/3390", "filename": "Pendulum", "name": "Pendulum", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" } ] }, { "name": "Chemistry", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "-", "url": "https://dasl.datadescription.com/download/data/3112", "filename": "Chromatography", "name": "Chromatography", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student, preparing for a triathlon, suspected that the 45 minutes each day\nshe spent training in a chlorinated pool was damaging her nail polish. She\nwished to investigate whether the color of the nail polish might make a difference.\nShe mounted acrylic nails on sticks and polished them with two different color nail polishes. She soaked them together in a chlorine solution equivalent to a swimming pool’s chlorination and then tapped them 100 times on a computer keyboard to simulate daily stress. The response is the % of nail chipped off as measured by scanning images of the nails and using an image processing program.", + "description": "A student, preparing for a triathlon, suspected that the 45 minutes each day\nshe spent training in a chlorinated pool was damaging her nail polish. She\nwished to investigate whether the color of the nail polish might make a difference.\nShe mounted acrylic nails on sticks and polished them with two different color nail polishes. She soaked them together in a chlorine solution equivalent to a swimming pool's chlorination and then tapped them 100 times on a computer keyboard to simulate daily stress. The response is the % of nail chipped off as measured by scanning images of the nails and using an image processing program.", "url": "https://dasl.datadescription.com/download/data/3356", "filename": "Nail-polish", "name": "Nail polish", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "General" } ] } ] } diff --git a/data/datasets/JSEDataArchive.json b/data/datasets/JSEDataArchive.json index 03d1c9f74..b9a560102 100644 --- a/data/datasets/JSEDataArchive.json +++ b/data/datasets/JSEDataArchive.json @@ -1,757 +1,757 @@ { "name": "JSEDataArchive", "categories": [ { "name": "Medicine", "subcategories": [ { "name": "Common", "datasets": [ { "description": "Time of Birth, Sex, and Birth Weight of 44 Babies", "description_url": "http://jse.amstat.org/datasets/babyboom.txt", "url": "http://jse.amstat.org/datasets/babyboom.dat.txt", "filename": "babyboom", "name": "Time of Birth, Sex, and Birth Weight of 44 Babies", "separator": "TAB", "columns": ["Time of birth recorded on the 24-hour clock", "Sex of the child (1 = girl, 2 = boy)", "Birth weight in grams", "Number of minutes after midnight of each birth"] }, { "description": "This dataset contains 21 body dimension measurements as well as age, \nweight, height, and gender on 507 individuals. The 247 men and 260 \nwomen were primarily individuals in their twenties and thirties, with a \nscattering of older men and women, all exercising several hours a week.", "url": "http://jse.amstat.org/datasets/body.dat.txt", "filename": "Body", "name": "Exploring Relationships in Body Dimensions", "separator": "SPACE" }, { "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ", "description_url": "http://jse.amstat.org/datasets/calcium.txt", "url": "http://jse.amstat.org/datasets/calcium.dat.txt", "filename": "Calcium", "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ", "separator": "SPACE", "columns": ["OBSNO - Patient Observation Number", "AGE - Years", "SEX - 1=Male, 2=Female", "ALKPHOS - Alkaline Phosphatase International Units/Liter", "Lab - 1=Metpath; 2=Deyor; 3=St. Elizabeth's; 4=CB Rouche; 5=YOH; 6=Horizon", "CAMMOL - Calcium mmol/L", "PHOSMMOL - Inorganic Phosphorus mmol/L", "AGEGROUP - Age group 1=65-69; 2=70-74; 3=75-79; 4=80-84; 5=85-89 Years"] }, { "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ", "description_url": "http://jse.amstat.org/datasets/calcium.txt", "url": "http://jse.amstat.org/datasets/calciumgood.dat.txt", "filename": "Calciumgood", "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ", "separator": "SPACE", "columns": ["OBSNO - Patient Observation Number", "AGE - Years", "SEX - 1=Male, 2=Female", "ALKPHOS - Alkaline Phosphatase International Units/Liter", "Lab - 1=Metpath; 2=Deyor; 3=St. Elizabeth's; 4=CB Rouche; 5=YOH; 6=Horizon", "CAMMOL - Calcium mmol/L", "PHOSMMOL - Inorganic Phosphorus mmol/L", "AGEGROUP - Age group 1=65-69; 2=70-74; 3=75-79; 4=80-84; 5=85-89 Years"] }, { "description": "Percentage of body fat, age, weight, height, and ten body circumference\nmeasurements (e.g., abdomen) are recorded for 252 men. Body fat, a\nmeasure of health, is estimated through an underwater weighing\ntechnique. Fitting body fat to the other measurements using multiple\nregression provides a convenient way of estimating body fat for men\nusing only a scale and a measuring tape.", "description_url": "http://jse.amstat.org/datasets/fat.txt", "url": "http://jse.amstat.org/datasets/fat.dat.txt", "filename": "fat", "name": "Fitting Percentage of Body Fat to Simple Body Measurements", "separator": "SPACE", "columns": ["Case Number", "Percent body fat using Brozek's equation, 457/Density - 414.2", "Percent body fat using Siri's equation, 495/Density - 450", "Density (gm/cm^3)", "Age (yrs)", "Weight (lbs)", "Height (inches)", "Adiposity index = Weight/Height^2 (kg/m^2)", "Fat Free Weight = (1 - fraction of body fat) * Weight, using Brozek's formula (lbs)", "Neck circumference (cm)", "Chest circumference (cm)", "Abdomen circumference (cm) \"at the umbilicus and level with the iliac crest\"", "Hip circumference (cm)", "Thigh circumference (cm)", "Knee circumference (cm)", "Ankle circumference (cm)", "Extended biceps circumference (cm)", "Forearm circumference (cm)", "Wrist circumference (cm) \"distal to the styloid processes\""] }, { "description": "Sample of 654 youths, aged 3 to 19, in the area of East Boston\nduring middle to late 1970's. Interest concerns the relationship\nbetween smoking and FEV. Since the study is necessarily\nobservational, statistical adjustment via regression models\nclarifies the relationship.", "description_url": "http://jse.amstat.org/datasets/fev.txt", "url": "http://jse.amstat.org/datasets/fev.dat.txt", "filename": "fev", "name": "Forced Expiratory Volume (FEV) Data", "separator": "SPACE", "columns": ["age - discrete measure, positive integer (years)", "fev - continuous measure (liters)", "ht - continuous measure (inches)", "sex - discrete/nominal (Female coded 0, Male coded 1)", "smoke - discrete/nominal (Nonsmoker coded 0, Smoker coded 1)"] }, { "description": "The tab-delimited data set gives characteristics of young female patients between\nthe ages of 11 to 26 who came to clinics of Johns Hopkins Medical Institutions between\n2006 and 2008 to begin the three-shot regimen of vaccinations with the anti-human\npapillomavirus (HPV) medication Gardasil. ", "url": "http://jse.amstat.org/v19n1/gardasil.dat.txt", "filename": "gardasil", "name": "Retrospective Study (Potential Predictors for Completion or Non-Completion of the Three-Shot Anti-HPV Gardasil Vaccine Regimen)", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "From a very young age, shoes for boys tend to be wider than shoes for \ngirls. Is this because boys have wider feet, or because it is assumed that \ngirls, even in elementary school, are willing to sacrifice comfort for fashion? \nTo assess the former, a statistician measures kids' feet. Methods for analysis include \nt-tests, ANCOVA, and least-squares model building. This data set is useful for \ndiscussion of covariates, confounding, and conclusions in the context of the problem.", "description_url": "http://jse.amstat.org/datasets/kidsfeet.txt", "url": "http://jse.amstat.org/datasets/kidsfeet.dat.txt", "filename": "kidsfeet", "name": "Foot measurements for fourth grade children", "separator": "SPACE", "columns": ["Birthdate: month and year (data were collected in October 1997)", "Length of longer foot (cm)", "Width of longer foot (cm), measured at widest part of foot", "Sex: boy or girl", "Foot measured (right or left)", "Right- or left-handedness"] }, { "description": "This article takes data from a paper in the _Journal of the American\nMedical Association_ that examined whether the true mean body\ntemperature is 98.6 degrees Fahrenheit. Because the dataset suggests\nthat the true mean is approximately 98.2, it helps students to grasp\nconcepts about true means, confidence intervals, and t-statistics.\nStudents can use a t-test to test for sex differences in body\ntemperature and regression to investigate the relationship between\ntemperature and heart rate.", "description_url": "http://jse.amstat.org/datasets/normtemp.txt", "url": "http://jse.amstat.org/datasets/normtemp.dat.txt", "filename": "normtemp", "name": "Normal Body Temperature, Gender, and Heart Rate ", "separator": "SPACE", "columns": ["Body temperature (degrees Fahrenheit)", "Gender (1 = male, 2 = female)", "Heart rate (beats per minute)"] } ] }, { "name": "Other", "datasets": [ { "description": "Drug interaction study of a new and a standard oral contraceptive \ntherapy.", - "description_url": "jse.amstat.org/datasets/ocdrug.txt", + "description_url": "http://jse.amstat.org/datasets/ocdrug.txt", "url": "http://jse.amstat.org/datasets/ocdrug.dat.txt", "filename": "ocdrug", "name": "Drug Interaction", "separator": "SPACE", "columns": ["Female Subject Number (1 to 22)", "Treatment Sequence (1 = Drug D, placebo; 2 = placebo, Drug D)", "Study Period (1, 2)", "Treatment (0 = placebo, 1 = Drug D)", "EE - AUC (pg*hr/ml)", "EE - Cmax (pg/ml)", "NET - AUC (pg*hr/ml)", "NET - Cmax (pg/ml)"] }, { "description": "Bacteria are cultured in medical laboratories to identify them so patients can be treated \ncorrectly. The tryptone dataset contains measurements of bacteria counts following the \nculturing of five strains of Staphylococcus aureus. There are many strains of \nStaphylococcus aureus; five were used by the experimenter. They are identified by numbers \nin the data because their names are too complicated to be useful as identifiers. The \ndataset also contains the time of incubation, temperature of incubation and concentration \nof tryptone, a nutrient. The protocols for culturing this bacteria, set the time at 24 \nhours, the temperature at 35 degrees and the tryptone concentration at 1.0%. The question \nis whether the conditions recommended in the protocols for the culturing of these strains \nare optimal. The task is to find the incubation time, temperature and tryptone concentration \nthat optimises the growth of this Bacterium.", "description_url": "http://jse.amstat.org/datasets/Tryptone.txt", "url": "http://jse.amstat.org/datasets/Tryptone.dat.txt", "filename": "Tryptone", "name": "The Tryptone Task", "separator": "SPACE", "use_first_row_for_vectorname": true }, { "description": "Examining Potential Predictors for Completion of the Gardasil Vaccine Sequence Based on Data Gathered at Clinics of Johns Hopkins Medical Institutions", "description_url": "http://jse.amstat.org/v19n1/gardasil.txt", "url": "http://jse.amstat.org/v19n1/gardasil.dat.txt", "filename": "gardasil", "name": "Gardasil Vaccine Data", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "The readabilities of 30 pamphlets about cancer are compared to the reading comprehension levels of 63 patients with cancer. Both variables are measured in grade levels. The data are presented as frequencies of occurrence over grade levels for both the pamphlet readabilities and the reading levels of the patients.", "description_url": "http://jse.amstat.org/datasets/readability.txt", "url": "http://jse.amstat.org/datasets/readability.dat.txt", "filename": "readability", "name": "Readability of Educational Materials for Patients with Cancer", "separator": "SPACE", "columns": ["Grade level", "Frequency of occurrence for brochure readabilities", "Frequency of occurrence for patient reading levels"] } ] }, { "name": "Smoking", "datasets": [ { "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.", "description_url": "http://jse.amstat.org/datasets/cigarettes.txt", "url": "http://jse.amstat.org/datasets/cigarettes.dat.txt", "filename": "Cigarettes", "name": "Cigarette data for an introduction to multiple regression", "separator": "SPACE", "columns": ["Brand name", "Tar content (mg)", "Nicotine content (mg)", "Weight (g)", "Carbon monoxide content (mg)"] } ] } ] }, { "name": "Nature", "subcategories": [ { "name": "Animals", "datasets": [ { "description": "The dataset consists of a few variables that may influence the demand for Beef in the United States. It provides an example of the influence of inflation in monetary time series data as well as providing some interesting statistical features in building demand models in regression.", "description_url": "http://jse.amstat.org/v22n1/kopcso/BeefDemandDoc.txt", "url": "http://jse.amstat.org/v22n1/kopcso/BeefDemand.txt", "filename": "BeefDemand", "name": "Beef Demand", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "159 fishes of 7 species are caught and measured. Altogether there are\n8 variables. All the fishes are caught from the same lake\n(Laengelmavesi) near Tampere in Finland.", "description_url": "http://jse.amstat.org/datasets/fishcatch.txt", "url": "http://jse.amstat.org/datasets/fishcatch.dat.txt", "filename": "fishcatch", "name": "fishcatch", "separator": "SPACE", "columns": ["Obs - Observation number ranges from 1 to 159", "Species - (Numeric)", "Weight - Weight of the fish (in grams)", "Length1 - Length from the nose to the beginning of the tail (in cm)", "Length2 - Length from the nose to the notch of the tail (in cm)", "Length3 - Length from the nose to the end of the tail (in cm)", "Height% - Maximal height as % of Length3", "Width% - Maximal width as % of Length3", "Sex - 1 = male 0 = female"] }, { "description": "A cost of increased reproduction in terms of reduced longevity has been\nshown for female fruitflies, but not for males. The flies used were an\noutbred stock. Sexual activity was manipulated by supplying individual\nmales with one or eight receptive virgin females per day. The\nlongevity of these males was compared with that of two control types.\nThe first control consisted of two sets of individual males kept with\none or eight newly inseminated females. Newly inseminated females will\nnot usually remate for at least two days, and thus served as a control\nfor any effect of competition with the male for food or space. The\nsecond control was a set of individual males kept with no females.\nThere were 25 males in each of the five groups, which were treated\nidentically in number of anaesthetizations (using CO2) and provision of\nfresh food medium.", "description_url": "http://jse.amstat.org/datasets/fruitfly.txt", "url": "http://jse.amstat.org/datasets/fruitfly.dat.txt", "filename": "fruitfly", "name": "Sexual activity and the lifespan of male fruitflies", "separator": "SPACE", "columns": ["ID - Serial No. (1-25) within each group of 25 (the order in which data points were abstracted)", "PARTNERS - Number of companions (0, 1 or 8)", "TYPE - Type of companion, 0: newly pregnant female, 1: virgin female, 9: not applicable (when PARTNERS=0)", "LONGEVITY - Lifespan, in days", "THORAX - Length of thorax, in mm (x.xx)", "SLEEP - Percentage of each day spent sleeping"] } ] } ] }, { "name": "Statistics", "subcategories": [ { "name": "Economics", "datasets": [ { "description": "Data are from the American Association of University Professors (AAUP) annual faculty salary survey of American colleges and universities. They include average salary and overall compensation, broken down by full, associate, and assistant professor ranks. The dataset is used for the 1995 Data Analysis Exposition, sponsored by the Statistical Graphics Section of the American Statistical Association. See the file colleges.txt for more information on the Exposition.", "description_url": "http://jse.amstat.org/datasets/aaup.txt", "url": "http://jse.amstat.org/datasets/aaup.dat.txt", "filename": "AAUP", "name": "AAUP Faculty Salary data", "separator": ",", "columns": ["FICE (Federal ID number)", "College name", "State (postal code)", "Type (I, IIA, or IIB)", "Average salary - full professors", "Average salary - associate professors", "Average salary - assistant professors", "Average salary - all ranks", "Average compensation - full professors", "Average compensation - associate professors", "Average compensation - assistant professors", "Average compensation - all ranks", "Number of full professors", "Number of associate professors", "Number of assistant professors", "Number of instructors", "Number of faculty - all ranks"] }, { "description": "The dataset bestbuy.day contains monthly data on computer usage \n(MIPS) and total number of stores from August 1996 to July 2000. \nAdditionally, information on the planned number of stores through \nDecember 2001 is available. These data can be used to compare \ntime-series forecasting with trend and seasonality components and \ncausal forecasting based on simple linear regression. The simple \nlinear regression model exhibits unequal error variances, suggesting \na transformation of Y.", "description_url": "http://jse.amstat.org/datasets/bestbuy.txt", "url": "http://jse.amstat.org/datasets/bestbuy.dat.txt", "filename": "BestBuy", "name": " BestBuy", "separator": "SPACE", "columns": ["Date dd-mm-yyyy, August 1996 - July 2000", "MIPS usage (MIPS are a measure of computing resources)", "Number of stores", "Date dd-mm-yyyy, July 2000 - December 2001", "Planned Number of Stores through December 2001"] }, { "description": "The dollar amount for a monthly (January 1991 through December 2000) \nhousehold electric bill is presented as a time series. In addition, \npotential explanatory variables are included. Twelve representative \nmonthly values are provided for the average temperature, for \nheating degree days, and for cooling degree days (not for each \nmonth for each year). Additional variables give the family size \neach month and indicate when a new electric meter and new heating \nand cooling equipment was installed. To convert the billing amount \nto estimated power consumption, a tiered rate function (supplied \nin the accompanying Instructor's Manual) and the costs of \nassociated riders (provided here) must be used. Consumption \nestimates resulting from this information are supplied.\t", "description_url": "http://jse.amstat.org/datasets/electricbill.txt", "url": "http://jse.amstat.org/datasets/electricbill.dat.txt", "filename": "electricbill", "name": " Electric Bill Data", "separator": "SPACE", "columns": ["Observation number", "Year", "Month", "Amount of bill (in dollars), includes 5% sales tax", "Average temperature (in degrees Fahrenheit)", "Heating Degree Days", "Cooling Degree Days", "Number of family members at home", "New meter? (indicator variable, 1 = yes)", "New heat pump 1? (indicator variable, 1= new)", "New heat pump 2? (indicator variable, 1= new)", "Total charge (per kwh) for all riders", "Calculated consumption (in kwh)"] }, { "description": "The data file contains information on 76 single-family homes inEugene, Oregon during 2005. This dataset is suitable for a completemultiple linear regression analysis of home price data that coversmany of the usual regression topics, including interaction andpredictor transformations. Whereas realtors use experience and localknowledge to subjectively value a house based on its characteristics(size, amenities, location, etc.) and the prices of similar housesnearby, regression analysis can provide an alternative that moreobjectively models local house prices using these same data.SOURCES:The data were provided by Victoria Whitman, a realtor in Eugene, in2005. The data were used in a case study in Pardoe (2006).", "description_url": "http://jse.amstat.org/datasets/homes76.txt", "url": "http://jse.amstat.org/datasets/homes76.dat.txt", "filename": "homes76", "name": " Modeling home prices using realtor data", "separator": "SPACE", "use_first_row_for_vectorname": true }, { "description": "For 97 countries in the world, data are given for birth rates, death\nrates, infant death rates, life expectancies for males and females, and\nGross National Product.", "description_url": "http://jse.amstat.org/datasets/poverty.txt", "url": "http://jse.amstat.org/datasets/poverty.dat.txt", "filename": "poverty", "name": "The Statistics of Poverty and Inequality ", "separator": "SPACE", "columns": ["Live birth rate per 1,000 of population", "Death rate per 1,000 of population", "Infant deaths per 1,000 of population under 1 year old", "Life expectancy at birth for males", "Life expectancy at birth for females", "Gross National Product per capita in U.S. dollars", "Country Group: 1 = Eastern Europe, 2 = South America and Mexico, 3 = Western Europe, North America, Japan, Australia, New Zealand, 4 = Middle East, 5 = Asia, 6 = Africa", "Country"] } ] }, { "name": "Sport", "datasets": [ { "description": "Each record contains the results of a test of a set of ball bearings. The quantities L10 and L50 are estimated percentiles of the fatigue failure distribution (obtained by fitting a Weibull distribution to the fatigue failure times, separately in each set). The objective is to analyse ln(L10) (and separately ln(L50)) by linear regression on the logarithms of P, Z and D, which are characteristics of the ball bearings. Differences between companies and between types of bearing can be tested.", "description_url": "http://jse.amstat.org/datasets/ballbearings.txt", "url": "http://jse.amstat.org/datasets/ballbearings.dat.txt", "filename": "ballbearings", "name": "Ball Bearing Reliability Data", "separator": "SPACE", "columns": ["Company", "Test number", "Year of test", "No. of bearings", "Load (P)", "No. of balls (Z)", "Diameter (D)", "L10", "L50", "Weibull slope", "Bearing type"] }, { "description": "We consider as our population of interest the set of Major League Baseball players who played at least one game in both the 1991 and 1992 seasons, excluding pitchers. This dataset contains the 1992 salaries for that population, along with performance measures for each player from 1991. Four categorical variables indicate how free each player was to move to other teams.", "description_url": "http://jse.amstat.org/datasets/baseball.txt", "url": "http://jse.amstat.org/datasets/baseball.dat.txt", "filename": "baseball", "name": "baseball", "separator": "SPACE", "columns": ["Salary (in thousands of dollars)", "Batting average", "On-base percentage (OBP)", "Number of runs", "Number of hits", "Number of doubles", "Number of triples", "Number of home runs", "Number of runs batted in (RBI)", "Number of walks", "Number of strike-outs", "Number of stolen bases", "Number of errors", "Indicator of \"free agency eligibility\"", "Indicator of \"free agent in 1991/2\"", "Indicator of \"arbitration eligibility\"", "Indicator of \"arbitration in 1991/2\"", "Player's name (in quotation marks)"] }, { "description": "This data set contains every NCAA Basketball Tournament game ever played. The tournament has been held every year since 1939.", "description_url": "http://jse.amstat.org/datasets/basketball.txt", "url": "http://jse.amstat.org/datasets/basketball.dat.txt", "filename": "Basketball", "name": "NCAA Basketball Tournament Data", "separator": "SPACE", "columns": ["Year", "Winning School Columns", "Winning Score", "Losing School", "Losing Score"] }, { "description": "Data are provided for Barry Bonds' plate appearances in the 2001\nbaseball season. Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).", "description_url": "http://jse.amstat.org/datasets/bonds2001.txt", "url": "http://jse.amstat.org/datasets/bonds2001.dat.txt", "filename": "Bonds", "name": "Barry Bonds' 2001 Plate Appearances", "separator": "SPACE", "columns": ["Plate appearance number", "Number of the game in the season", "Number of the plate appearance within the game", "Equals one for games in San Francisco and equals zero otherwise", "1 - there is a runner on first base when Bonds appears, 0 - otherwise", "1- there is a runner on second base when Bonds appears, 0 - otherwise", "1 - there is a runner on third base when Bonds appears 0 - otherwise", "Number of outs in inning when Bonds appears", "Inning of plate appearance", "Number of runs scored by Giants in the inning after first pitch to Bonds", "1 - Bonds walks, 0 - otherwise", "1 - Bonds walks intentionally, 0 - otherwise", "0 - Bonds does not reach base, 1 - Bonds reaches first base on a single or error, 2 - Bonds reaches second base on a double or error, 3 - Bonds reaches third base on a triple or error, 4 - Bonds hits a home run, 5 - Bonds walks or is hit by a pitch", "Opposing pitchers' career earned run average as of the end of the 2000 season", "Giants score just before first pitch to Bonds", "Opposing team's score just before first pitch to Bonds"] }, { "description": "Data are provided for Barry Bonds' plate appearances in the 2002\nbaseball season. Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).", "description_url": "http://jse.amstat.org/datasets/bonds2002.txt", "url": "http://jse.amstat.org/datasets/bonds2002.dat.txt", "filename": "Bonds", "name": "Barry Bonds' 2002 Plate Appearances", "separator": "SPACE", "columns": ["1 - there is a runner on first base when Bonds appears, 0 - otherwise", "1 - there is a runner on second base when Bonds appears and equals zero otherwise", "1 - there is a runner on third base when Bonds appears, 0 - otherwise", "Number of outs in inning when Bonds appears", "0 - Bonds does not reach base, 1 - Bonds reaches first base on a single or error, 2 - Bonds reaches second base on a double or error, 3 - Bonds reaches third base on a triple or error, 4 - Bonds hits a home run, 5 - Bonds walks or is hit by a pitch", "Number of runs scored by Giants in the inning after first pitch to Bonds", "Opposing pitchers' career earned run average as of the end of the 2001 season", "Initials of player batting immediately after Bonds: JK = Jeff Kent, BS = Benito Santiago, RS = Reggie Sanders, RA = Rich Aurelia, YT = Yorvit Torrealba, DB = David Bell, SD = Shawn Dunston, RM = Ramon Martinez, NA = missing", "Player batting immediately after Bonds (previous column numerically coded): 0 = missing, 1 = Jeff Kent, 2 = Benito Santiago, 3 = Reggie Sanders, 4 = Rich Aurelia, 5 = Yorvit Torrealba, 6 = David Bell, 7 = Shawn Dunston, 8 = Ramon Martinez"] }, { "description": "The dataset contains the scores, opponents, and sites of the 18 Big Ten\nmen's basketball games that involved the University of Iowa in 1997.", "description_url": "http://jse.amstat.org/datasets/hawks.txt", "url": "http://jse.amstat.org/datasets/hawks.dat.txt", "filename": "hawks", "name": " 1997 University of Iowa Big Ten Basketball Data", "separator": "SPACE" }, { "description": "The dataset consists of game-by-game information for the 1998 season\nfor Mark McGwire and the St. Louis Cardinals, and Sammy Sosa and the\nChicago Cubs. The dataset includes information on the home run hitting\nof these two players, as well as game results for the teams.", "description_url": "http://jse.amstat.org/datasets/homerun.txt", "url": "http://jse.amstat.org/datasets/homerun.dat.txt", "filename": "homerun", "name": "The 1998 Home Run Race Between Mark McGwire and Sammy Sosa", "separator": "SPACE", "columns": ["Game number", "Month of game (St. Louis)", "Date of game (St. Louis)", "Calendar date of game [days since beginning of season] (St. Louis)", "Game location (St. Louis) (0 = Away, 1 = Home)", "Runs scored (St. Louis)", "Runs scored by opposition (St. Louis)", "Game result (St. Louis) (-1 = Tie, 0 = Loss, 1 = Win)", "Number of home runs hit by McGwire", "Runs driven in by McGwire's home runs", "McGwire game status (0 = Played, 1 = Did not play)", "Month of game (Chicago)", "Date of game (Chicago)", "Calendar date of game [days since beginning of season] (Chicago)", "Game location (Chicago) (0 = Away, 1 = Home)", "Runs scored (Chicago)", "Runs scored by opposition (Chicago)", "Game result (Chicago) (0 = Loss, 1 = Win)", "Number of home runs hit by Sosa", "Runs driven in by Sosa's home runs", "Sosa game status (0 = Played, 1 = Did not play)"] }, { "description": "Data are from The Baseball Encyclopedia (1993) and Total Baseball (2001). \nThey include the location, league affiliation (National or American), \ndivision affiliation (East, Central, or West), season of play, home game \nattendance, runs scored, runs allowed, wins, losses, and number of games \nbehind the division leader for each major league franchise for the 1969 \nthrough 2000 seasons. Other data (including opening dates for new stadia, \nand dates of work stoppages) were collected from Ballparks by Munsey and \nSuppes (2001) and InfoPlease (2001).", "description_url": "http://jse.amstat.org/datasets/MLBattend.txt", "url": "http://jse.amstat.org/datasets/MLBattend.dat.txt", "filename": "MLBattend", "name": "1969-2000 Major League Baseball Attendance data", "separator": "SPACE", "columns": ["Major League Baseball franchise", "League affiliation (National or American)", "Division affiliation (East, Central, or West)", "Season", "Home game attendance", "Runs scored", "Runs allowed", "Wins", "Losses", "Number of games behind the division winner"] }, { "description": "Driver results for all NASCAR races between 1975 1nd 2003, inclusive. The dataset constitutes all \nparticipants in each of 898 races, and includes their start/finish postions, prize winnings, car \nmake and laps completed.", "description_url": "http://jse.amstat.org/datasets/nascard.txt", "url": "http://jse.amstat.org/datasets/nascard.dat.txt", "filename": "nascard", "name": "NASCAR Driver Results", "separator": "SPACE", "columns": ["Series Race", "Year", "Race/Year", "Finishing Position", "Starting Position", "Laps Completed", "Winnings", "Number of cars in race", "Car Make", "Driver"] }, { "description": "Race results for all NASCAR Winston Cup races between 1975 and 2003, inclusive. The dataset \nincludes the numbers of cars, total prize winnings, monthly consumer price index for the month \nof the race, track length, laps completed by the winner, spatial co-ordinates and name of track.", "description_url": "http://jse.amstat.org/datasets/nascarr.txt", "url": "http://jse.amstat.org/datasets/nascarr.dat.txt", "filename": "nascarr", "name": "NASCAR Race Results", "separator": "SPACE", "columns": ["Series Race", "Year", "Race/Year", "Number of cars in race", "Total race payout", "Monthly CPI-U", "Spearman’s", "Kendall’s", "Track Length", "Laps Completed by winner", "Road Indicator 1=Road Course, 0=Loop", "Caution Flags", "Lead Changes", "Winning Time", "Track Latitude", "Track Longitude", "Track Code", "Track Name"] }, { "description": "This data set contains performance statistics for National \nFootball League (NFL) teams for their 2000 regular season.", "description_url": "http://jse.amstat.org/datasets/nfl2000.txt", "url": "http://jse.amstat.org/datasets/nfl2000.dat.txt", "filename": "nfl2000", "name": "NFL Y2K PCA", "separator": "SPACE", "columns": ["team initials", "name and location of the team", "wins", "losses", "drives begun in opponents' territory", "drives begun within 20 yards of the goal", "opponents drives begun in team's territory", "opponents drives begun within 20 yards of goal", "punts blocked by team", "punts team had blocked", "touchdowns scored by team", "touchdowns scored against team", "total yardage gained by offense", "total yardage allowed by defense", "time of possession by offense (in minutes)", "time of possession by opponents' offense", "field goals made", "field goals allowed to opponents", "field goals attempted", "field goals attempted by opponents", "punts made by opponents", "average length of punts made by opponents", "average change in field position", "during opponents' punts", "opponents' punts taken for touchbacks", "opponents' punts that resulted in the team's offense beginning within 20 yards of their own (defensive) goal line", "longest opponents' punt", "punts made by team", "average length of punts made by team", "average change in field position during team's punts", "team's punts taken for touchbacks", "team's punts that resulted in the opponents' offense beginning within 20 yards of their own (defensive) goal line", "homepuntlong longest team punt", "first downs obtained by offense", "first downs allowed by defense", "sacks achieved by team's defense", "sacks allowed by team's offense", "kickoffs made by team", "kickoffs received by team", "yards gained during kickoff returns", "yards allowed to opposition during kickoff returns", "average yards gained during kickoff returns", "average yards allowed during kickoff returns", "longest kickoff return made by team", "longest kickoff return allowed by team", "kickoffs returned for a touchdown by team", "kickoffs returned for touchdown by opposition", "punts returned by team", "punts returned by opposition", "punts \"fair caught\" by team", "punts \"fair caught\" by opposition", "return yardage on punts by team", "return yardage on punts by opposition", "average length of punt returns by team", "average length of punt returns by opposition", "punts returned by team for a touchdown", "punts returned by opponents for a touchdown", "interceptions made by team's defense", "interceptions made against team's offense", "fumbles recovered by team's defense", "fumbles recovered by opposing defenses", "games played by team", "average number of yards gained per minute of possession by opponents", "average number of yards gained per minute of possession by team", "average number of punts per minute of possession by opponents", "average number of punts per minute of possession by team", "average number of touchdowns per minute of possession by opponents", "average number of touchdowns per minute of possession by team", "winning percentage", "turnovers obtained by team, per minute of possession by opponents", "turnovers allowed by team, per minute of possession", "first downs obtained by team, per minute of possession", "first downs allowed by team's defense, per minute of possession by opposition", "points scored by team", "points scored against team", "conference to which the team belongs (AFC or NFC)"] }, { "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "url": "http://jse.amstat.org/datasets/nfl93.dat.txt", "filename": "nfl93", "name": "NFL Scores and Pointspreads", "separator": "SPACE" }, { "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "description_url": "http://jse.amstat.org/datasets/nfl.txt", "url": "http://jse.amstat.org/datasets/nfl94.dat.txt", "filename": "nfl94", "name": "NFL Scores and Pointspreads", "separator": "SPACE", "columns": ["Date of game", "Visiting team name", "Visiting team score", "Home team name", "Home team score", "Indicator for overtime games (o or -)", "Pointspread", "Over/Under"] }, { "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "description_url": "http://jse.amstat.org/datasets/nfl.txt", "url": "http://jse.amstat.org/datasets/nfl95.dat.txt", "filename": "nfl95", "name": "NFL Scores and Pointspreads", "separator": "SPACE", "columns": ["Date of game", "Visiting team name", "Visiting team score", "Home team name", "Home team score", "Indicator for overtime games (o or -)", "Pointspread", "Over/Under"] }, { "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "description_url": "http://jse.amstat.org/datasets/nfl.txt", "url": "http://jse.amstat.org/datasets/nfl96.dat.txt", "filename": "nfl96", "name": " NFL Scores and Pointspreads", "separator": "SPACE", "columns": ["Date of game", "Visiting team name", "Visiting team score", "Home team name", "Home team score", "Indicator for overtime games (o or -)", "Pointspread", "Over/Under"] }, { "description": "The dataset contains scores for all regular season National Football\nLeague games from the 1998, 1999 and 2000 seasons. In addition to \nthe points scored by the home and visiting teams in each game, the\ndataset contains a pointspread that handicaps each game.", "description_url": "http://jse.amstat.org/datasets/nfl98-00.txt", "url": "http://jse.amstat.org/datasets/nfl98-00.dat.txt", "filename": "nfl98-00", "name": " NFL Scores for 1998-2000", "separator": "SPACE", "columns": ["Year (1998, 1999, or 2000)", "Week of the season (1 to 17)", "Home team name", "Home team score", "Visiting team name", "Visiting team score", "Pointspread"] }, { "description": "The data set provides the weights (in lbs)\nof the 26 men on the 1996 US Olympic Rowing Team in Atlanta. The\ndata includes the names of the participants and which event they\nrowed in. The US team participated in 7 of the 8 possible events.\nThis data set is useful for discussing outliers,\nexplanations for outliers, and comparing the robustness of the\nmean and the median.", "description_url": "http://jse.amstat.org/datasets/rowing.txt", "url": "http://jse.amstat.org/datasets/rowing.dat.txt", "filename": "rowing", "name": " Weights of 1996 US Olympic Rowing Team", "separator": "SPACE", "columns": ["Name: The rowers last name", "Event: The actual event the team member participated in", "Weight: Weight of individual team member in lbs"] }, { "description": "Data consist of 500-yard freestyle swim times for male and female swimmers age 50-94 in a biennial national competition. Variables include year, gender, age, age group, swim time, seed time (qualifying time from state competition), and split times (in each 50-yard segment).", "url": "http://jse.amstat.org/v22n1/doane/SeniorSwimTimes-DataSet.txt", "filename": "SeniorSwimTimes", "name": "SeniorSwimTimes", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "Data are provided for the 56 Tour De France bicycle races since World\nWar II. The year and dates of the event, the total number of stages,\nthe total distance, the winning total time and average speed, the name\nand country of the winner, the birth date of the winner, and the\nwinner's age at the time of victory are the variables in the dataset.", "description_url": "http://jse.amstat.org/datasets/tdf.txt", "url": "http://jse.amstat.org/datasets/tdf.dat.txt", "filename": "tdf", "name": "Tour De France Winners (Can Lance Win Six?)", "separator": "SPACE", "columns": ["Year", "Start-Town", "Start-Date", "End-Date", "Stages", "Distance in kilometers", "Speed: The average speed of the winner in kph", "Time: The total riding time of the winner in hours", "Winner", "Country", "Birth-Date", "Age-Year", "Age-Tenth"] } ] }, { "name": "Other", "datasets": [ { "description": "Full population of data (all software projects completed by the AT&T data center from 1986 through 1991).", "description_url": "http://jse.amstat.org/datasets/aptness.txt", "url": "http://jse.amstat.org/datasets/aptness.dat.txt", "filename": "aptness", "name": "Evaluating Aptness of a Regression Model", "columns": ["Function Point Count", "Work Hours", "Operating System: (0) Unix, (1) MVS", "Database Management System: (1) IDMS, (2) IMS, (3) INFORMIX, (4) INGRESS, (5) Other", "Language: (1) COBOL, (2) PLI, (3) C, (4) Other"] }, { "description": "The dataset contains information for the construction of a three-way table that illustrates Simpson's Paradox. Categorical variables observed for babies born in a metropolitan area of South Africa include race, whether or not the mother had medical aid (which is similar to health insurance), and whether or not the child was included in a follow-up study. The data are provided in two formats. The file birthtotena.dat contains the category labels and cell frequencies for the three-way table. The file birthtotenb.dat lists each case on a separate line with three variables that indicate whether or not the mother had medical aid, whether or not the mother was traced for the five-year interview, and race.", "description_url": "http://jse.amstat.org/datasets/birthtoten.txt", "url": "http://jse.amstat.org/datasets/birthtotena.dat.txt", "filename": "birthtotena", "name": "Simpson's Paradox: An Example From a Longitudinal Study in South Africa", "columns": ["Aid/NoAid", "Traced/NotTraced", "White/Black"] }, { "description": "The dataset contains information for the construction of a three-way table that illustrates Simpson's Paradox. Categorical variables observed for babies born in a metropolitan area of South Africa include race, whether or not the mother had medical aid (which is similar to health insurance), and whether or not the child was included in a follow-up study. The data are provided in two formats. The file birthtotena.dat contains the category labels and cell frequencies for the three-way table. The file birthtotenb.dat lists each case on a separate line with three variables that indicate whether or not the mother had medical aid, whether or not the mother was traced for the five-year interview, and race.", "description_url": "http://jse.amstat.org/datasets/birthtoten.txt", "url": "http://jse.amstat.org/datasets/birthtotenb.dat.txt", "filename": "birthtotenb", "name": "Simpson's Paradox: An Example From a Longitudinal Study in South Africa", "columns": ["Medical Aid? (0 = No, 1 = Yes)", "Traced? (0 = No, 1 = Five-Year Group)", "Race (1 = White, 2 = Black)"] }, { "description": "This dataset contains the prices of ladies' diamond rings and the carat size of their diamond stones. The rings are made with gold of 20 carats purity and are each mounted with a single diamond stone.", "description_url": "http://jse.amstat.org/datasets/diamond.txt", "url": "http://jse.amstat.org/datasets/diamond.dat.txt", "filename": "diamond", "name": "Diamond Ring Pricing Using Linear Regression", "columns": ["Size of diamond in carats (1 carat = .2 gram)", "Price of ring in Singapore dollars"] }, { "description": "In many statistical models the normal distribution of the response is an essential assumption.\nThis paper uses a dataset of 2000 euro coins with information (up to the milligram) about\nthe weight of each coin. As the physical coin production process is subject to a multitude\nof (very small) variability sources, it seems reasonable to expect that the empirical\ndistribution of the weight of euro coins does agree with the normal distribution. Goodness\nof fit tests however show that this is not the case. Moreover, some outliers complicate\nthe analysis. Mixtures of normal distributions and skew normal distributions are fitted\nto the data, revealing that the normality assumption might not hold for those weights.", "description_url": "http://jse.amstat.org/datasets/euroweight.txt", "url": "http://jse.amstat.org/datasets/euroweight.dat.txt", "filename": "euroweight", "name": "The Weight of Euro Coins", "separator": "TAB", "columns": ["ID", "weight in grams", "batch"] }, { "description": "Title, year of release, length in minutes, number of cast members listed, rating, and number of lines \nof description are recorded for a simple random sample of 100 movies. One can use the sample to obtain base-line information \non the movie guide from which the data were collected. The dataset also illustrates two paradoxes for associations between \nthree variables: non-transitivity of positive correlation and Simpson's paradox. SOURCE: The data were taken as a simple \nrandom sample of the approximately 19,000 movies (not including made-for-TV movies) in Leonard Maltin's Movie and Video \nGuide, 1996. ", "description_url": "http://jse.amstat.org/datasets/films.txt", "url": "http://jse.amstat.org/datasets/films.dat.txt", "filename": "films", "name": "films dataset", "separator": "SPACE", "use_first_row_for_vectorname": true }, { "description": "This dataset contains descriptive data of contestants on the game shoe \"Friend or Foe?\". Information on the contestant's \nrace, sex, age, prize money, and playing strategy are included.", "description_url": "http://jse.amstat.org/datasets/friend_or_foe.txt", "url": "http://jse.amstat.org/datasets/friend_or_foe.dat.txt", "filename": "friend_or_foe", "name": "Data from the Television Game Show \"Friend or Foe?\"", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "The dataset contains hat size as well as circumference, length of major axis \nand length of minor axis of the inner hat band for 26 hats. The manufacturer \nand the country of manufacture are also included.", "description_url": "http://jse.amstat.org/datasets/hats.txt", "url": "http://jse.amstat.org/datasets/hats.dat.txt", "filename": "hats", "name": " Hat measurements, including hat size", "separator": "SPACE", "columns": ["Hat size", "Circumference (inches)", "Length of major axis (inches)", "Length of minor axis (inches)", "Where made? Italy = 1, U.S.A. = 2", "Manufacturer: Beaver = 1, Borsalino = 2, Dobbs = 3, Stetson = 4"] }, { "description": "The dataset consists of samples of size six taken without replacement\nfrom the integers {1, 2, 3, ..., 42}. There are actually three\ndatasets from three different sources, and in each case the six-tuples\nare (in theory) random selections or samples. The observations in each\nsample are given in the order in which they were obtained or selected.", "description_url": "http://jse.amstat.org/datasets/lotto.txt", "url": "http://jse.amstat.org/datasets/lotto.dat.txt", "filename": "lotto", "name": "Lotto 6/42 Selections from Individuals, Irish National Lottery, and S-Plus Simulation", "separator": "TAB", "columns": ["Code for source of sample (1, 2, or 3)", "First selection in sample", "Second selection in sample", "Third selection in sample", "Fourth selection in sample", "Fifth selection in sample", "Sixth selection in sample"] }, { "description": "This file contains daily per theater box office receipts for 49 \nmovies. This data is to accompany the article entitled Movie Data.", "description_url": "http://jse.amstat.org/datasets/moviedaily.txt", "url": "http://jse.amstat.org/datasets/moviedaily.dat.txt", "filename": "moviedaily", "name": "Data Documentation Template for Daily Movie Box Office Receipts", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "This file contains total US gross box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.", "description_url": "http://jse.amstat.org/datasets/movietotal.txt", "url": "http://jse.amstat.org/datasets/movietotal.dat.txt", "filename": "movietotal", "name": "Data Documentation Template for Total Movie Box Office Receipts", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "This file contains weekend per theater box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.", "description_url": "http://jse.amstat.org/datasets/movieweekend.txt", "url": "http://jse.amstat.org/datasets/movieweekend.dat.txt", "filename": "movieweekend", "name": "Data Documentation Template for Weekend Movie Box Office Receipts", "separator": "TAB", "use_first_row_for_vectorname": true }, { "description": "Every year actors and actresses are chosen to receive the Oscars awards for best actor and for best actress. This dataset \ncontains information about each of the winners for each of the 77 annual Oscar awards.\n\nAlthough there have been only 77 Oscars, there are 78 male winners and 78 female winners because ties happened on two \noccasions (1933 for the best actor and 1969 for the best actress).", "description_url": "http://jse.amstat.org/datasets/oscars.txt", "url": "http://jse.amstat.org/datasets/oscars.dat.txt", "filename": "oscars", "name": "Oscars: Best Actors and Actresses", "separator": "SPACE", "columns": ["Gender (m=male f=female)", "Oscar Year Number (1-77)", "Year the Oscar Took Place", "Winner’s first and last name", "Name of the Movie in which the winner acted", "Age of winner (at the beginning of the winning year)", "Birth place (State if born in USA, else Country)", "Month in which the winner was born", "Day of month on which winner was born", "Year the winner was born"] }, { "description": "This dataset contains information collected from rolling the pair of\npigs (found in the game \"Pass the Pigs\") 6000 times. A description of\nthe rules, scoring configurations, and data collection method are\nincluded in the accompanying paper.", "description_url": "http://jse.amstat.org/datasets/pig.txt", "url": "http://jse.amstat.org/datasets/pig.dat.txt", "filename": "pig", "name": "Data from the game \"Pass the Pigs\"", "separator": "SPACE", "use_first_row_for_vectorname": true }, { "description": "In a residential home, energy consumption is closely related to the\noutdoor temperature and size of the house. In a home of a given size,\ntemperature fluctuations and energy consumption vary fairly predictably\nover time. When homeowners add a room, other things being equal,\nutility usage should increase. This dataset permits students to\nestimate the energy demand and make forecasts for future months, as\nwell as explore other relationships.\n\nThe dataset contains natural gas and electricity usage data for a\ngas-heated single-family residence in the Boston area from September\n1990 through May 1997, accompanied by monthly climatological data. \nThe dataset is useful for illustrating the concepts and techniques of\ncentral tendency, dispersion, elementary time series analysis,\ncorrelation, simple and multiple regression, and variable\ntransformations.", "description_url": "http://jse.amstat.org/datasets/utility.txt", "url": "http://jse.amstat.org/datasets/utility.dat.txt", "filename": "utility", "name": "What Does It Take to Heat a New Room?", "separator": "SPACE", "columns": ["Observation month", "Number of days in the month", "Mean monthly temperature in Boston, in degrees Fahrenheit", "Mean natural gas usage per day for the month, in therms", "Total therms used for the month", "Days in the gas company billing cycle for the month", "Total kilowatt hours consumed in the month", "Mean kilowatt hours per day for the month", "Days in the electric company billing cycle for the month", "Dummy variable for method of determining kwh for the month (0 = actual month-end meter reading, 1 = estimated reading)", "Total heating degree days for the month", "Total cooling degree days for the month", "Dummy variable for the new room (0 = pre-addition, 1 = post-addition)"] }, { "description": "For each person on board the fatal maiden voyage of the ocean liner Titanic, this dataset records sex, age [adult/child], economic status [first/second/third class, or crew] and whether or not that person survived.", "description_url": "http://jse.amstat.org/datasets/titanic.txt", "url": "http://jse.amstat.org/datasets/titanic.dat.txt", "filename": "titanic", "name": "Population at Risk and Death Rates for an Unusual Episode", "separator": "SPACE", "columns": ["Class (0 = crew, 1 = first, 2 = second, 3 = third)", "Age (1 = adult, 0 = child)", "Sex (1 = male, 0 = female)", "Survived (1 = yes, 0 = no)"] } ] }, { "name": "Travel", "datasets": [ { "description": "This dataset consists of all 135 large and medium sized air hubs in the United States as defined by the Federal Aviation Administration.", "description_url": "http://jse.amstat.org/datasets/airport.txt", "url": "http://jse.amstat.org/datasets/airport.dat.txt", "filename": "airport", "name": " US Airport Statistics", "separator": "SPACE", "columns": ["Airport", "City", "Scheduled departures", "Performed departures", "Enplaned passengers", "Enplaned revenue tons of freight", "Enplaned revenue tons of mail"] }, { "description": "The data is a set of 50000 (1.3 MB ) observations containing roughly 2 minutes of traffic from the one hour, larger\ndec-pkt-1.tcp file used in the paper. The larger file can be accessed from the author's web page or from its source. With \nonly 50000 observations, the data set ", "description_url": "http://jse.amstat.org/datasets/packetdata.txt", "url": "http://jse.amstat.org/datasets/packetdata.dat.txt", "filename": "packetdata", "name": "packetdata", "separator": "SPACE", "use_first_row_for_vectorname": true } ] }, { "name": "Population", "datasets": [ { "description": "For each of the forty largest countries in the world (according to 1990\npopulation figures), data are given for the country's life expectancy\nat birth, number of people per television set, and number of people per\nphysician.", "description_url": "http://jse.amstat.org/datasets/televisions.txt", "url": "http://jse.amstat.org/datasets/televisions.dat.txt", "filename": "televisions", "name": "Televisions, Physicians, and Life Expectancy", "separator": "SPACE", "columns": ["Country", "Life expectancy", "People per television", "People per physician", "Female life expectancy", "Male life expectancy"] }, { "description": "This dataset contains 21 body dimension measurements as well as age, weight, height, and gender on 507 individuals. The 247 men and 260 women were primarily individuals in their twenties and thirties, with a scattering of older men and women, all exercising several hours a week.", "description_url": "http://jse.amstat.org/datasets/body.txt", "url": "http://jse.amstat.org/datasets/body.dat.txt", "filename": "body", "name": " Exploring Relationships in Body Dimensions", "separator": "SPACE", "columns": ["Biacromial diameter", "Biiliac diameter, or \"pelvic breadth\"", "Bitrochanteric diameter", "Chest depth between spine and sternum at nipple level, mid-expiration", "Chest diameter at nipple level, mid-expiration", "Elbow diameter, sum of two elbows", "Wrist diameter, sum of two wrists", "Knee diameter, sum of two knees", "Ankle diameter, sum of two ankles", "Shoulder girth over deltoid muscles", "Chest girth, nipple line in males and just above breast tissue in females, mid-expiration", "Waist girth, narrowest part of torso below the rib cage, average of contracted and relaxed position", "Navel (or \"Abdominal\") girth at umbilicus and iliac crest, iliac crest as a landmark", "Hip girth at level of bitrochanteric diameter", "Thigh girth below gluteal fold, average of right and left girths", "Bicep girth, flexed, average of right and left girths", "Forearm girth, extended, palm up, average of right and left girths", "Knee girth over patella, slightly flexed position, average of right and left girths", "Calf maximum girth, average of right and left girths", "Ankle minimum girth, average of right and left girths", "Wrist minimum girth, average of right and left girths", "Age (years)", "Weight (kg)", "Height (cm)", "Gender (1 - male, 0 - female)"] } ] }, { "name": "Technology", "datasets": [ { "description": "Specifications are given for 428 new vehicles for the 2004 year. The variables recorded include price, measurements relating to the size of the vehicle, and fuel efficiency.", "description_url": "http://jse.amstat.org/datasets/04cars.txt", "url": "http://jse.amstat.org/datasets/04cars.dat.txt", "filename": "04cars", "name": "2004 New Car and Truck Data", "separator": "SPACE", "columns": ["Vehicle Name", "Sports Car? (1=yes, 0=no)", "Sport Utility Vehicle? (1=yes, 0=no)", "Wagon? (1=yes, 0=no)", "Minivan?(1=yes, 0=no)", "Pickup? (1=yes, 0=no)", "All-Wheel Drive? (1=yes, 0=no)", "Rear-Wheel Drive? (1=yes, 0=no)", "Suggested Retail Price (U.S. Dollars)", "Dealer Cost (or 'invoice price') (U.S. Dollars)", "Engine Size (liters)", "Number of Cylinders (=-1 if rotary engine)", "Horsepower", "City Miles Per Gallon", "Highway Miles Per Gallon", "Weight (Pounds)", "Wheel Base (inches)", "Length (inches)", "Width (inches)"] }, { "description": "The data set contains the results of a calibration experiment designed to estimate volume of oysters and to compare two computer vision systems (2-D vs. 3-D) for classification of oysters based on their image size in number of pixels. ", "description_url": "http://jse.amstat.org/datasets/30oysters.dat.txt", "url": "http://jse.amstat.org/datasets/30oysters.dat.txt", "filename": "30oysters", "name": "Oyster Volume Estimation Data", "separator": "SPACE", "columns": ["Oyster ID", "Oyster weight (g)", "Oyster volume (cc)", "Oyster size information from the 3-D imaging system (in volume pixels)", "Oyster size information from the 2-D imaging system (in pixels)"] }, { "description": "The data set gives a random sample of the length of visits of users entering the msnbc.com web site during September 28, 1999.\nThe length of the visit is an estimate of the total number of clicks or pages seen by each user and is based on web server \nlogs, thus it counts only pages recorded by the server. Pages cached in the user's browser or in a cache proxy server are \nunknown. The data set used in the paper is much larger than the one made available here but that larger data set is also \navailable in a page cited in the references.", "description_url": "http://jse.amstat.org/datasets/msnbclength.txt", "url": "http://jse.amstat.org/datasets/msnbclength.dat.txt", "filename": "msnbclength", "name": "Internet Data Analysis for Undergrad Curriculum", "separator": "," }, { "description": "The video lottery terminal dataset contains observations on the three\nwindows of an electronic slot machine for 345 plays together with the\nprize paid out for each play. The prize payout distribution is so\nbadly skewed that confidence intervals for expected payout based on the\ncentral limit theorem are not accurate. The dataset can be used at the\ngraduate or upper undergraduate level to illustrate parametric\nbootstrapping. The dataset can also be used in a graduate course to\nillustrate tests of independence for two and three-way contingency\ntables involving random zeroes, or these tables may be collapsed and\nused as examples in an introductory course.", "description_url": "http://jse.amstat.org/datasets/vlt.txt", "url": "http://jse.amstat.org/datasets/vlt.dat.txt", "filename": "vlt", "name": "Video Lottery Terminal Data", "separator": "SPACE" } ] }, { "name": "Politics", "datasets": [ { "description": "For each U.S. Senator, his or her votes on whether to remove President\nClinton on each of the two articles of impeachment (plus a summary\nvariable representing each Senator's number of \"guilty\" votes) are\nprovided, as well as each Senator's values on several variables that\ncould be predictive of vote (e.g., Senator's degree of conservatism,\nhow well Clinton did in the Senator's state in the 1996 Presidential\nelection).", "description_url": "http://jse.amstat.org/datasets/impeach.txt", "url": "http://jse.amstat.org/datasets/impeach.dat.txt", "filename": "impeach", "name": " U.S. Senate Votes on Clinton Removal", "separator": "SPACE", "columns": [" Name of senator", "State (postal code)", "Vote on Article I, Perjury: 0 = Not Guilty, 1 = Guilty", "Vote on Article II, Obstruction of Justice: 0 = NG, 1 = G", "Number of votes for guilt", "Party: 0 = Democrat, 1 = Republican", "Senator's degree of ideological conservativism (0-100)", "Percent of the vote Clinton received in the 1996 Presidential election in each state", "The year each Senator's seat is up and he/she must run for re-election (or retire)", "First-term senator? 0 = no, 1 = yes"] }, { "description": "The data consist of the numbers of days served in office for the 43 \nPresidents of the United States as of 4 February 2004.", "description_url": "http://jse.amstat.org/datasets/outlier.txt", "url": "http://jse.amstat.org/datasets/outlier.dat.txt", "filename": "outlier", "name": "A Dataset That Is 44% Outliers", "separator": "SPACE", "columns": ["Last name of President (text, spelled with no embedded spaces)", "days in office (counts)"] } ] }, { "name": "Education", "datasets": [ { "description": "This dataset contains variables that address the relationship between \npublic school expenditures and academic performance, as measured by the SAT.", "description_url": "http://jse.amstat.org/datasets/sat.txt", "url": "http://jse.amstat.org/datasets/sat.dat.txt", "filename": "sat", "name": "Getting What You Pay For: The Debate Over Equity in Public School Expenditures ", "separator": "SPACE", "remove_quotes": true, "columns": ["Name of state (in quotation marks)", "Current expenditure per pupil in average daily attendance in public elementary and secondary schools, 1994-95 (in thousands of dollars)", "Average pupil/teacher ratio in public elementary and secondary schools, Fall 1994", "Estimated average annual salary of teachers in public elementary and secondary schools, 1994-95 (in thousands of dollars)", "Percentage of all eligible students taking the SAT, 1994-95", "Average verbal SAT score, 1994-95", "Average math SAT score, 1994-95", "Average total score on the SAT, 1994-95"] }, { "description": "Data are from the 1995 U.S. News report on American colleges and\nuniversities. They include demographic information on tuition,\nroom & board costs, SAT or ACT scores, application/acceptance\nrates, student/faculty ratio, graduation rate, and more.", "description_url": "http://jse.amstat.org/datasets/usnews.txt", "url": "http://jse.amstat.org/datasets/usnews.dat.txt", "filename": "usnews", "name": "U.S. News College data", "separator": ",", "columns": ["FICE (Federal ID number)", "College name", "State (postal code)", "Public/private indicator (public=1, private=2)", "Average Math SAT score", "Average Verbal SAT score", "Average Combined SAT score", "Average ACT score", "First quartile - Math SAT", "Third quartile - Math SAT", "First quartile - Verbal SAT", "Third quartile - Verbal SAT", "First quartile - ACT", "Third quartile - ACT", "Number of applications received", "Number of applicants accepted", "Number of new students enrolled", "Pct. new students from top 10% of H.S. class", "Pct. new students from top 25% of H.S. class", "Number of full-time undergraduates", "Number of part-time undergraduates", "In-state tuition", "Out-of-state tuition", "Room and board costs", "Room costs", "Board costs", "Additional fees", "Estimated book costs", "Estimated personal spending", "Pct. of faculty with Ph.D.'s", "Pct. of faculty with terminal degree", "Student/faculty ratio", "Pct. alumni who donate", "Instructional expenditure per student", "Graduation rate"] } ] } ] } ] } diff --git a/data/datasets/OzDASL.json b/data/datasets/OzDASL.json index 9a1bbb793..dbed5ecf7 100644 --- a/data/datasets/OzDASL.json +++ b/data/datasets/OzDASL.json @@ -1,2425 +1,1223 @@ { "name": "OzDASL", "categories": [ { "name": "Medicine", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "West of Tokyo lies a large alluvial plain, dotted by a network of farming villages. Matui (1968) analysed the position of the 911 houses making up one of those villages. The area studied was a rectangle, 3 km by 4 km. A grid was superimposed over a map of the village, dividing its 12 square kilometres into 1200 plots, each 100 metres on a side. The numbers of houses on each of those plots are recorded in a 30 by 40 matrix of data.", "url": "http://www.statsci.org/data/general/matui.txt", "filename": "matui", "name": "Position of Houses in a Japanese Farming Village", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Larsen and Marx (1986) write \nSince Word War II, plutonium for use in atomic weapons has been produced at an Atomic Energy Commission facility in Hanford, Washington. One of the major safety problems encountered there has been the storage of radioactive wastes. Over the years, significant quantities of these substances - including strontium 90 and cesium 137 - have leaked from their open-pit storage areas into the nearby Columbia River, which flows along the Washington-Oregon border, and eventually empties into the Pacific Ocean. \nTo measure the health consequences of this contamination, an index of exposure was calculated for each of the nine Oregon counties having frontage on either the Columbia River or the Pacific Ocean. This particular index was based on several factors, including the county's stream distance from Hanford and the average distance of its population from any water frontage. As a covariate, the cancer mortality rate was determined for each of these same counties. \nThe data give the index of exposure and the cancer mortality rate during 1959-1964 for the nine Oregon counties affected. Higher index values represent higher levels of contamination. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCounty\n\nName of county\n\nExposure\n\nIndex of exposure\n\nMortality\n\nCancer mortality per 100,000 man-years\n\n\n\n", "url": "http://www.statsci.org/data/general/hanford.txt", "filename": "hanford", "name": "Cancer Mortality near Hanford Reactor", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data show the incidence of nonmelanoma skin cancer among women in Minneapolis-St Paul, Minnesota, and Dallas-Fort Worth, Texas. The towns are coded 0 for St Paul and 1 for Forth Worth. \nOne would expect sun exposure to be greater in Texas than in Minnesota.", "url": "http://www.statsci.org/data/general/skin.txt", "filename": "skin", "name": "Skin Cancer in Texas and Minnesota", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data comes from an experiment to measure the mortality of cancer cells under radiation under taken in the Department of Radiology, University of Cape Town. Four hundred cells were placed on a dish, and three dishes were irradiated at a time, or occasion. After the cells were irradiated, the surviving cells were counted. Since cells would also die naturally, dishes with cells were put into the radiation chamber without being irradiated, to establish the natural mortality. This data gives only these zero-dose data. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nOccasion\n\nIrradiation occasion (1-27)\n\nSurvived\n\nNumber of cells surviving out of 400 placed on dish\n", "url": "http://www.statsci.org/data/general/radiatio.txt", "filename": "radiatio", "name": "Mortality of Cancer Cells", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Oncology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives the time between 800 successive pulses along a nerve fibre. There are 799 observations rounded to the nearest half in units of 1/50 second. ", "url": "http://www.statsci.org/data/general/nerve.txt", "filename": "nerve", "name": "Time between Nerve Pulses", - "number_format": 31, - "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": false } ], "name": "Neurology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Students in an introductory statistics class (MS212 taught by Professor John Eccleston and Dr Richard Wilson at The University of Queensland) participated in a simple experiment. The students took their own pulse rate. They were then asked to flip a coin. If the coin came up heads, they were to run in place for one minute. Otherwise they sat for one minute. Then everyone took their pulse again. The pulse rates and other physiological and lifestyle data are given in the data. \nFive class groups between 1993 and 1998 participated in the experiment. The lecturer, Richard Wilson, was concerned that some students would choose the less strenuous option of sitting rather than running even if their coin came up heads, so in the years 1995-1998 a different method of random assignment was used. In these years, data forms were handed out to the class before the experiment. The forms were pre-assigned to either running or non-running and there were an equal number of each. In 1995 and 1998 not all of the forms were returned so the numbers running and sitting was still not entirely controlled.", "url": "http://www.statsci.org/data/oz/ms212.txt", "filename": "ms212", "name": "Pulse Rates before and after Exercise", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study was conducted at a major north eastern American medical centre regarding blood cholesterol levels and heart-attack incidents. A total of 28 heart-attack patients had their cholesterol levels measured two days, 4 days, and 14 days after the attack. In addition, cholesterol levels were recorded for a control group of 30 people who had not had a heart attack. The units of cholesterol measurement are not given in the original reference but are presumably mg/dL of blood.", "url": "http://www.statsci.org/data/general/cholestg.txt", "filename": "cholestg", "name": "Cholesterol Levels after Heart Attack", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This was a pilot study for the experiment described in Recovery of Patients from Stroke. The purpose of the study was to compare four evaluation tools for assessing the recovery of patients who had recently suffered a stroke. The four tools were (1) the Goteburg Assessment Form of Hemiplegia, (2) the Bobath Assessment Form, (3) the Barthel Index and (4) the Kenny Scoring System. The Goteburg Assessment was divided into seven components measuring motor function and balance, some sensation qualities, passive range of motion and occurrence of joint pain. The Bobath from evaluates three areas of motor performance, postural reactions, voluntary movement, and balance and automatic protective reactions. The Barthel index and the Kenny Scoring system evaluate ability to carry out activities of daily living such as dressing, feeding, toileting etc. \nTwenty subjects were selected from two large public hospitals in Brisbane. All subjects had recently suffered a cerebrovascular accident resulting in hemiplegia lasting at least 24 hours, had not previously been incapacitated from stroke or other disease and were currently receiving occupational therapy. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID (1-20)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime since occurrence of stroke in weeks\n\nArms\n\nArm and shoulder motor function (max 36)\n\nLegs\n\nLower limb motor function (max 30)\n\nHands\n\nWrist and hand motor function (max24)\n\nBalance\n\nBalance score (max 14)\n\nSensation\n\nSensation score (max 24)\n\nJointPain\n\nFreedom from joint pain (max 24)\n\nJointMotion\n\nPassive joint motion (max 24)\n\nBobath\n\nTotal of Bobath Assessment Form (max 266)\n\nBarthel\n\nBarthel Index (max 100)\n\nKenny\n\nKenny scoring system of dailing living (max 24)\n\n\n\n\nThe researcher chose the Barthel Index and the first five components of the Goteburg Evaluation for use in the later experiment.", "url": "www.statsci.org/data/oz/strokeass.txt", "filename": "strokeass", "name": "Evaluation Tools for Stroke Rehabilitation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This study compared three occupational therapy programs designed to help patients recover from the effects of a stroke. Eight stroke patients were assigned to each of the three treatment groups. The first group (E) was given an experimental program developed by the investigator from a model of intervention for stroke rehabilitation. The second group (F) was given a pre-existing program. The third group (G) was a non-treatment program. Each program lasted for 8 weeks. All subjects were evaluated at the start of the program and at weekly intervals until the next of the program. \nGroup E and F patients were treated in the Occupational Therapy Department of a large Brisbane repatriation hospital. Group G patients were located in the wards of a large State Hospital in Brisbane. \nThe recovery status of each subject at each time was evaluated using the Goteburg Evaluation of Hemiplegia and the Barthel Index. The Goteburg evalation form gave separate scores for three motor function variables (upper limbs, hand and wrist, lower limbs) and for balance while the Barthel Index gave a single overall score. Higher scores indicate better functional ability. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID\n\nGroup\n\nExperimental (E), pre-existing (F) or non-treatment (G)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime lapse from stroke to start of program in weeks\n\nUE1\n\nUpper extremities score (out of 36) at week 1\n\nUE2\n\n... week 2\n\nUE3\n\n... week 3\n\nUE4\n\n... week 4\n\nUE5\n\n... week 5\n\nUE6\n\n... week 6\n\nUE7\n\n... week 7\n\nUE8\n\n... week 8\n\nHW1\n\nHand-wrist score (out of 24) at week 1\n\nHW2\n\n... week 2\n\nHW3\n\n... week 3\n\nHW4\n\n... week 4\n\nHW5\n\n... week 5\n\nHW6\n\n... week 6\n\nHW7\n\n... week 7\n\nHW8\n\n... week 8\n\nLE1\n\nLower extremities score (out of 30) at week 1\n\nLE2\n\n... week 2\n\nLE3\n\n... week 3\n\nLE4\n\n... week 4\n\nLE5\n\n... week 5\n\nLE6\n\n... week 6\n\nLE7\n\n... week 7\n\nLE8\n\n... week 8\n\nBal1\n\nBalance score (out of 14) at week 1\n\nBal2\n\n... week 2\n\nBal3\n\n... week 3\n\nBal4\n\n... week 4\n\nBal5\n\n... week 5\n\nBal6\n\n... week 6\n\nBal7\n\n... week 7\n\nBal8\n\n... week 8\n\nBart1\n\nBarthel Index score (out of 100) at week 1\n\nBart2\n\n... week 2\n\nBart3\n\n... week 3\n\nBart4\n\n... week 4\n\nBart5\n\n... week 5\n\nBart6\n\n... week 6\n\nBart7\n\n... week 7\n\nBart8\n\n... week 8\n", "url": "http://www.statsci.org/data/oz/stroke.txt", "filename": "stroke", "name": "Recovery of Patients from Stroke", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Cardiology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "How difficult is it to maintain your balance while concentrating? It is more difficult when you are older? Nine elderly (6 men and 3 women) and eight young men were subjects in an experiment. Each subject stood barefoot on a \"force platform\" and was asked to maintain a stable upright position and to react as quickly as possible to an unpredictable noise by pressing a hand held button. The noise came randomly and the subject concentrated on reacting as quickly as possible. The platform automatically measured how much each subject swayed in millimetres in both the forward/backward and the side-to-side directions.", "url": "http://www.statsci.org/data/general/balaconc.txt", "filename": "balaconc", "name": "Maintaining Balance while Concentrating", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data comes from a small study in Western Australia of hypertension, alcohol, and obesity. This study was partly designed to mimic a previously reported U.S. study based on a larger sample. A log-linear interaction model is a convenient and effective way of investigating associations among the three variables. A prior-posterior analysis of this 3 x 2 x 4 contingency table using prior information from the previous study (Klatsky et al., 1977) may be appropriate. The previous study reported the general conclusion that alcohol intake and obesity were significantly and independently associated with hypertension (blood pressure). Although a few summary statistics were reported, the full data were not published. One difference between the two studies was in the definition of obesity categories.\nThe data is listed as follows: the first column (Obesity) contains a numerical value representing the level of obesity (1=low, 2=average, 3=high), the second column (BP) contains a numerical indicator of the presence of hypertension (0=no, 1 =yes). The next five columns are labelled with the levels of alcoholic intake of the subjects, in drinks per day. These columns contain the frequency of observations that have this level of intake, for each group of obesity level and hypertension presence.", "url": "http://www.statsci.org/data/oz/alchyp.txt", "filename": "alchyp", "name": "Alcohol, Hypertension and Obesity", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "FEV (forced expiratory volume) is an index of pulmonary function that measures the volume of air expelled after one second of constant effort. The data contains determinations of FEV on 654 children ages 6-22 who were seen in the Childhood Respiratory Desease Study in 1980 in East Boston, Massachusetts. The data are part of a larger study to follow the change in pulmonary function over time in children. \nID\n - \nID number\nAge\n - \nyears\nFEV\n - \nlitres\nHeight\n - \ninches\nSex\n - \nMale or Female\nSmoker\n - \nNon = nonsmoker, Current = current smoker\n", "url": "http://www.statsci.org/data/general/fev.txt", "filename": "fev", "name": "Childhood Respiratory Disease", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the results of an study aimed at reducing the risk of HIV infection among African-American adolescents. The subjects were 14-18 year old female and male adolescents in a Southern USA city. The study compared two interventions. The treatment intervention was an 8-week Behavioural Skills Training (BST) program. The control was an single 2-hour education session about HIV and AIDS. The subjects completed sexual attitude and activity questionnaires before and after the intervention and at 6-month and 12-month follow-ups. The data here are for 10 subjects for each intervention although the original study was much larger. The data given here appear to have been created by Howell (1999) based on summary statistics from the original study. The dependent variable is the logarithm-transformed frequency of condom-protected sex ( log(Y+1) ). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nBST\n\n1 = BST intervention, 0 = control\n\nPre\n\nLog-frequency of protected sex before the intervention\n\nPost\n\nLog-frequency of protected sex after the intervention\n\nFU6\n\nLog-frequency of protected sex reported at the 6 months follow-up\n\nFU6\n\nLog-frequency of protected sex reported at the 12 months follow-up\n", "url": "http://www.statsci.org/data/general/protsex.txt", "filename": "protsex", "name": "Behavioural Skills Training and Protected Sex", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "HARVEST (Hypertension and Ambulatory Recording Venetia Study) is a trial designed to assess whether ambulatory monitoring adds something to office (clinical) blood pressure in predicting the development of fixed hypertension and of cardiovascular complications in patients with borderline to mild hypertension. Ambulatory monitoring refers to the measuring of home blood pressure by an annotated device that the subject wears for 24 hours. The data give information on 1100 subjects compiled by Dr Paolo Palatini, Professor of Clinical Medicine at the University of Padua, Italy. \nPatients were eligible for the study if they satisfied the following criteria: \ndiastolic blood pressure (BP) between 90 and 100 mm Hg or isolated systolic hypertension (systolic BP greater than or equal to 140 mm Hg and diastolic BP less than 90 mm Hg) \nnever been treated for hypertension \naged 18 to 45 years old \nfree from other important risk factors for atherosclerosis\nThe subjects were followed for 5 years. Baseline examinations, including ECG and echocardiography, were repeated at the end of the study or upon development of hypertension, defined as BP persistently 100 mm Hg or greater or a systolic BP of 160 mm Hg or greater. Ambulatory monitoring was repeated 3 months and 5 years after the baseline evaluation.\nThe symbol C or A after the name of a variable means:\nC = clinical examination; A = ambulatory (home monitoring)\nThe last symbol of a variable name may be B, 3, 5 or E:\nB = baseline examination\n3 = 3-month examination\n5 = 5-year examination\nE = endpoint examination \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSmoke\n\nSmoking status at baseline examination:\n0 = non-smoking,\n1 = 1-5 cigarettes per day,\n2 = 6-10 cigarettes per day,\n3 = 11-20 cigarettes per day.\n\nSport\n\nSport activity at baseline examination:\n0 = only sedentary,\n1 = light activity (walking),\n2 = sports non-competitive,\n3 = sports competitiv.\n\nSBP\n\nSystolic blood pressure\n\nDBP\n\nDiastolic blood pressure\n\nHR\n\nHeart rate\n\nAge\n\nAge in years\n\nBMI\n\nBody mass index: 100 * weight (kg) / height (m)2\n\nEndPoint\n\nEndpoint status at the time the file was created:\n1 = blood pressure level hypertensive\n0 = blood pressure level not hypertensive\n\nTime\n\nTime in months from baseline examination to the date of endpoint or to May 30, 1999, whichever was earlier\n\nMale\n\nGender:\n1 = male\n0 = female\n", "url": "http://www.statsci.org/data/general/harvest.txt", "filename": "harvest", "name": "HARVEST Trial", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data is a subset from the Six Cities study, a longitudinal study of the health effects of air pollution. The data contain repeated binary measures of the wheezing status (1 = yes, 0 = no) for each of 537 children from Stuebenville, Ohio, at ages 7, 8, 9 and 10 years. Also measured is whether or not the mother was a smoker during the first year of the study.", "url": "http://www.statsci.org/data/general/wheeze.txt", "filename": "wheeze", "name": "Child's Wheeze and Mother's Smoking", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The National Trachoma and Eye Health Program (1980) reports on the prevalance of otitis media (an infection that produces pus within the middle ear) in both aboriginal and non-aboriginal communities in Australia. The Program surveyed all aboriginal communities in Australia and attempted to contact all aborigines. Simultaneously, contact was made with non-aborigines usually living in the same or adjacent locations. Because of the high prevalence of infection in the aboriginal community only severe cases were classified as infected, virtually all of them suffering bursting of the ear drum and consequent scarring. It was thought that scarring could be used to identify those people who previously had had sever infections, but were not currently infected. So it was possible to classify subjects as (a) either not currently infected and no scarring, (b) currently infected or (c) not currently infected but one or more drums scarred. The data give the number of aborigines examined in various age intervals and the proportions classified as (a), (b) or (c). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAge \n\nAge interval (years)\n\nExamined \n\nNumber of subjected examined\n\nNone \n\nProportion not currently infected and with no scarring\n\nCurrent \n\nProportion currently infected\n\nPast \n\nPropotion not currently infected but with one or both drums scarred\n", "url": "http://www.statsci.org/data/oz/otitis.txt", "filename": "otitis", "name": "Prevalence of Otitis Media in Aboriginal Communities", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In a study of the effect of ticks on cattle in North Queensland, the disease status of animals exposed to the tick-borne parasite Anaplasma marginale is of some concern. A symptom of infection from this parasite, the number of red blood cells can be redu ced by up to 80% at the point of peak anaemia. The problem to be considered here concerns a way of quantifying the change in red blood cell populations during the recovery stages of the disease. \nIn a laboratory trial, cows were inoculated with the parasite and their red blood cells monitored before and after inoculation. The data collected were in the form of red cell volume distributions obtained from a Coulter counter, truncated and sorted into groups. In work as yet unpublished, McLaren et al. have addressed the problem of fitting distributions to similar data from humans suffering myelodysplastic anaemia, and McLaren (private communication) has suggested the need to develop hypothesis testing procedures for this type of data. \nThe observed counts of red cell volume from one of the cows on days 21 (Freq1) and 23 (Freq2) after inoculation are listed. The counts are grouped into 18 intervals of equal width of 7.2 fl. The first column (Group) lists the group number, the second (Vol) lists the truncated lower endpoint of the cell volume interval. The lower and upper truncation values for these red cell volume counts were 21.6 fl and 151.2 fl respectively. A cursory inspection of the two sets of observed frequency counts in histogram form on the logarithmic scale suggest that the red blood cell volume distribution is bimodal, at least at 21 days after inoculation. \n", "url": "http://www.statsci.org/data/oz/rbcmix.txt", "filename": "rbcmix", "name": "Red Blood Cell Volume Data for Cows", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Common" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The effect of a single 600 mg dose of absorbic acid versus a sugar placebo on the muscular endurance (as measured by repetitive grip strength trials) of fifteen male volunteers (19-23 years old) was evaluated. The study was conducted in a double-blind manner with crossover. \nThree initial maximal contractions were performed for each subject, with the greatest value indicating maximal grip strength. Muscular endurance was measured by having the subjects squeeze the dynamometer, hold the contraction for three seconds, and repeat continuously until a value of 50% maximum grip strength was achieved for three consecutive contractions. Endurance time was defined as the number of repetitions required to go from maximum grip strength to the initial 50% value. Subjects were given frequent positive verbal encouragement in an effort to have them complete as many repetitions as possible. ", "url": "http://www.statsci.org/data/general/vitaminc.txt", "filename": "vitaminc", "name": "Effect of Vitamin C on Muscular Endurance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Proponents of Reiki, a type of touch therapy, hypothesize that Reiki re-establishes the energy balance in areas of the body experiencing disease and discomfort, thus promoting healing, reducing pain and increasing quality of life. The main feature that distinguishes Reiki from other couch therapies, such as therapeutic touch, is that Reiki therapists have physical contact with the body. Participants in Reiki are fully-clothed and may be covered with a blanket if they wish. The treatment, delivered to 18 specific areas of the body, begins with the participant lying on his or her back. The hands are placed on 10 distinct locations on the head and torso. The participant is then asked to lie on his or her stomach (or side, if this is more comfortable), where the hands are placed on 8 additional distinct locations covering the back, hip area and feet. The treatment takes approximately 1.25 hours to complete. \nTreatment of cancer pain usually focuses on opioids. Since high doses of opioids frequently aggravate other common symptoms of cancer patients, it is of interest to explore non-drug treatments that may allow control of cancer pain with lower doses of opioids. This project studied whether Reiki is beneficial in the management of pain for people from the community experiencing general chronic pain, as a prelimary step in deciding whether Reiki is worth trying for cancer patients. \nThe Sample \nThe eligibility criteria were that subjects must be at least 18 years old, not receiving chemotherapy or radiotherapy, be experiencing moderate pain (at least 3 on a VAS (0-10) or 2 on a Likert scale (0-5)), have normal cognitive function, be able to speak, read and write English, and be willing to complete the study rating scales. The sample size necessary was calculated using the binomial distribution with the assumption that 50% of the study participants might be expected to benefit from treatment. The probability of a decrease in pain following treatment in 14 or more cases out of 20 by chance alone is 0.058. [VAS means \"Visual Analogue Scale\". A Likert-type item consists of a single statement, followed by a usually five or six-point choice with each choice described in words.] \nNotices were placed in retail establishments and community centres. Potential participans identified themselves by telephoning the research assistant at a number provided on the recruitment posters. Individuals who met the eligibility criteria and who signed a consent form were scheduled to receive a treatment by a Reiki therapist. \nTwenty People were recruited (18 women and 2 men) who ranged in age from 23 to 62 years (mean 44 years). These participants were currently experiencing pain at 55 sites. Ten participants had pain in their upper body and 4 in their lower body. The remaining 6 participants had pain in both the upper and lower parts of their body. Eight participants attributed their pain to bone and muscle problems and 5 participants to chronic illness. Three of the participants included in the chronic illness group had cancer. Six participants had been experiencing pain for 1 year or less, and 7 had been experiencing pain for more than 1 year, up through 7 years. The remaining seven had been in pain for more than 7 years, one for 48 years. \nEighteen participants had asked their physician for help with their pain, and 19 were currently using at least 1 of the following strategies to manage it: analgesic preparations, anti-inflammatory medications, exercise, massage, acupuncture, therapeutic touch, chiropractic, homeopathy, meditation, vitamins, steam, muscle relaxation techniques and Tai Chi. \nTreatment and Data Collection \nParticipants were given 1 treatment by the Reiki therapist in her office. They lay on a massage table fully clothed and, if desired, were also covered with a sheet or blanket. The lights were dimmed, and a candle was lit; soft music played in the background. The environment was consistent through all 20 treatments. A pain VAS ranging from 0 to 10 and a Likert scale ranging from 0 to 5 were completed immediately before and after the Reiki treatment. ", "url": "http://www.statsci.org/data/general/reiki.txt", "filename": "reiki", "name": "Using Reiki to Manage Pain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data consist of measurements (x1, x2, Age in months) on 23 babies, collected in the Faculty of Medicine at the University of Hong Kong. It would be of great medical interest to find a relationship between x1 and x2. However, any correlation between them is likely spurious because both x1 and x2 tend to increase with age. See Chris Lloyd's original mailing to the ANZStat mailing list discussion.", "url": "http://www.statsci.org/data/general/babies.txt", "filename": "babies", "name": "Measurements on Babies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "When anthropologists analyze human skeletal remains, an important piece of information is living stature. Since skeletons are commonly based on statistical methods that utilize measurements on small bones. The following data was presented in a paper in the American Journal of Physical Anthropology to validate one such method. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMetaCarp\n\nMetacarpal bone I length in cm\n\nStature\n\nStature in cm\n\n\n\n", "url": "http://www.statsci.org/data/general/stature.txt", "filename": "stature", "name": "Prediction of Height from Metacarpal Bone Length", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "CPK (creatine phosphokinase) is a enzyme contained within muscle cells which is necessary for the storage and release of energy. It can be released into the blood in response to vigorous exercise from damaged (leaky) muscle cells. This occurs often even in healthy athletes. \nThis study intestigated the metabolic effect of cross-country skiing. Subjects were participants in a 24 hour cross-country relay. Age, weight (kg) and blood CPK concentration 12 hours into the relay were recorded.", "url": "http://www.statsci.org/data/general/bloodcpk.txt", "filename": "bloodcpk", "name": "Blood CPK in Cross-Country Skiers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Studies conducted at the University of Melbourne indicate that there may be a difference between the pain thresholds of blonds and brunettes. Men and women of various ages were divided into four categories according to hair colour: light blond, dark blond, light brunette, and dark brunette. The purpose of the experiment was to determine whether hair colour is related to the amount of pain produced by common types of mishaps and assorted types of trauma. Each person in the experiment was given a pain threshold score based on his or her performance in a pain sensitivity test (the higher the score, the higher the person’s pain tolerance). \n\nVariable\n\nValues\n\nHairColour\n\nLightBlond, DarkBlond, LightBrunette or DarkBrunette \n\nPain\n\nPain theshold score \n", + "description": "Studies conducted at the University of Melbourne indicate that there may be a difference between the pain thresholds of blonds and brunettes. Men and women of various ages were divided into four categories according to hair colour: light blond, dark blond, light brunette, and dark brunette. The purpose of the experiment was to determine whether hair colour is related to the amount of pain produced by common types of mishaps and assorted types of trauma. Each person in the experiment was given a pain threshold score based on his or her performance in a pain sensitivity test (the higher the score, the higher the person's pain tolerance). \n\nVariable\n\nValues\n\nHairColour\n\nLightBlond, DarkBlond, LightBrunette or DarkBrunette \n\nPain\n\nPain theshold score \n", "url": "http://www.statsci.org/data/oz/blonds.txt", "filename": "blonds", "name": "Pain Thresholds of Blonds and Brunettes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For his MS305 data project, Michael Larner measured the weight and various physical measurements for 22 male subjects aged 16 - 30. Subjects were randomly chosen volunteers, all in reasonable good health. Subjects were requested to slightly tense each muscle being measured to ensure measurement consistency. Apart from Mass, all measurements are in cm. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMass\n\nWeight in kg\n\nFore\n\nMaximum circumference of forearm\n\nBicep\n\nMaximum circumference of bicep\n\nChest\n\nDistance around chest directly under the armpits\n\nNeck\n\nDistance around neck, approximately halfway up\n\nWaist\n\nDistance around waist, approximately trouser line\n\nThigh\n\nCircumference of thigh, measured halfway between the knee and the top of the leg\n\nCalf\n\nMaximum circumference of calf\n\nHeight\n\nHeight from top to toe\n\nShoulders\n\nDistance around shoulders, measured around the peak of the shoulder blades\n", "url": "http://www.statsci.org/data/oz/physical.txt", "filename": "physical", "name": "Mass and Physical Measurements for Male Subjects", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Larsen and Marx (1986) write \nIn folklore, the full moon is often portrayed as something sinister, a kind of evil force possessing the power to control our behaviour. Over the centuries, many prominent writers and philosophers have shared this belief. Milton, in Paradise Lost, refers to \nDemoniac frenzy, moping melancholy\nAnd moon-struck madness. \nAnd Othello, after the murder of Desdemona, laments \nIt is the very error of the moon\nShe comes more near the earth than she was want\nAnd makes men mad. \nOn a more scholarly level, Sir William Blackstone, the renowned eighteenth centure English barrister, defined a \"lunatic\" as \none who hath ... lost the use of his reason and who hath lucid intervals, sometimes enjoying his senses and sometimes not, and that frequently depending upon changes of the moon. \nThe data give the admission rates to the emergency room of a Virginia mental health clinic before, during and after the 12 full moons from August 1971 to July 1972. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMonth\n\nMonth of year: Aug, Sep, ... Jul\n\nMoon\n\nBefore, During or After the full moon\n\nAdmission\n\nAdmission rate (patients/day)\n\n\n\n", "url": "http://www.statsci.org/data/general/fullmoon.txt", "filename": "fullmoon", "name": "Mental Hospital Admissions During Full Moons", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Five types of electrodes were applied to the arms of 16 subjects and the resistance measured. The experiment was designed to see whether all five electrode types performed similarly. \nAfter obtaining the results, the experimenters decided that the reason for the two large readings on subject 15 was the excessive amount of hair of those parts of the subject's arm. They concluded that this subject's data should be deleted. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nSubject number\n\nE1\n\nResistance measured by electrode type 1\n\nE2\n\nResistance measured by electrode type 2\n\nE3\n\nResistance measured by electrode type 3\n\nE4\n\nResistance measured by electrode type 4\n\nE5\n\nResistance measured by electrode type 5\n", "url": "http://www.statsci.org/data/general/resist.txt", "filename": "resist", "name": "Skin Resistance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Osteoarthritis is a mechanical degeneration of joint surfaces causing pain, swelling and loss of joint function in one or more joints. Physiotherapists treat the affected joints to reduce pain (VAS = visual analogue scale) and to increase the range of movement (ROM). In this study there were 10 subjects, each of whom was treated with continuous TENS (electric nerve stimulation) and short wave diathermy. Measurements were taken also after no treatment. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nSubject\n\nSubject identifier\n\nNoROM\n\nROM after no treatment\n\nNoVAS\n\nVAS after no treatment\n\nTENSROM\n\nROM after continuous TENS\n\nTENSVAS\n\nVAS after continuous TENS\n\nSWDROM\n\nROM after short wave diathermy\n\nSWDVAS\n\nVAS after short wave diathermy\n", "url": "http://www.statsci.org/data/oz/oa.txt", "filename": "oa", "name": "Treatment for Osteoarthritis", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The balance of subjects were observed for two different surfaces and for restricted and unrestricted vision. Balance was assessed qualitatively on an ordinal 4-point scale based on observation by the experimenter. Subjects were expected to be better balanced (show less sway) when standing on the normal surface than on foam, and when their eyes were open rather than closed or when their vision was restricted by a dome. \nEqual numbers of male and female subjects were chosen. For both males and females, ten older (more than 24 years old) and ten younger subjects were selected. \nThe data is available in two formats. The is in univariate or \"strung out form\" which is suitable for entry to Minitab or S-Plus and to most mixed model programs. The second is in repeated measures format which is suitable for SPSS and for most special purpose repeated measures programs. \nUnivariate format: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1 to 40\n\nSex\n\nmale or female\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight in cm\n\nWeight\n\nWeight in kg\n\nSurface\n\nnormal or foam\n\nVision\n\neyes open, eyes closed, or closed dome\n\nCTSIB\n\nQualitive measure of balance, 1 (stable) - 4 (unstable) \n\n\n\n", "url": "http://www.statsci.org/data/oz/ctsibuni.txt", "filename": "ctsibuni", "name": "Effect of Surface and Vision on Balance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data related to the transport of sulfite ions from blood cells suspended in a salt solution. The chloride concentration (%) was measured over a period of about 8 minutes as a continuous curve generated from electrical potentials. The data given here were digitized from the curve at 10 second intervals. \nThe theory of ion transport suggested that the concentration asymptote exponentially, i.e., \nChloride = q1{1 - q2exp(- q3Time)} \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTime\n\nElapsed time in minutes\n\nChloride\n\nChloride concentration (%)\n", "url": "http://www.statsci.org/data/general/chloride.txt", "filename": "chloride", "name": "Transport of Sulfite Ions from Blood Cells", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are the times, in days, that heroin addicts spend in a clinic. There are two clinics and the covariates are believed to affect the times spent in the clinic by addicts. \n \nVariable\n \nDescription\n\nClinic\n\n1 or 2\n\nStatus\n\n0 = still in clinic at end of study (censored) or 1 = departed from clinic\n\nTime\n\ndays spent in clinic\n\nPrison\n\n1 = prison record or 0 = no record\n\nDose\n\nmethadone dosage (mg/day)\n", "url": "http://www.statsci.org/data/oz/heroin.txt", "filename": "heroin", "name": "Methadone Treatment of Heroin Addicts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Why do older people often seem not to remember things as well as younger people? Do they not pay attention? Do they just not process the material as thoroughly? One theory regarding memory is that verbal material is remembered as a function of the degree to which is was processed when it was initially presented. Eysenck (1974) randomly assigned 50 younger subjects and 50 older (between 55 and 65 years old) to one of five learning groups. The Counting group was asked to read through a list of words and count the number of letters in each word. This involved the lowest level of processing. The Rhyming group was asked to read each word and think of a word that rhymed with it. The Adjective group was asked to give an adjective that could reasonably be used to modify each word in the list. The Imagery group was instructed to form vivid images of each word, and this was assumed to require the deepest level of processing. None of these four groups was told they would later be asked to recall the items. Finally, the Intentional group was asked to memorize the words for later recall. After the subjects had gone through the list of 27 items three times they were asked to write down all the words they could remember. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nYounger or Older\n\nProcess\n\nThe level of processing: Counting, Rhyming, Adjective, Imagery or Intentional\n\nWords\n\nNumber of words recalled\n", "url": "http://www.statsci.org/data/general/eysenck.txt", "filename": "eysenck", "name": "Age and Memory", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Nolen-Hoeksema and Morrow (1991) had the good fortune to have measured depression among college students 2 weeks before the Loma Prieta earthquake in California in 1989. Nolen-Hoeksema and Morrow collected repeat data to track the students’ adjustments to the earthquake. Measurements were taken every 3 weeks starting 2 weeks before the earthquake to 10 weeks after. The data were recreated by Howell (1999) based on the Nolen-Hoeksema and Morrow findings. Each row gives the depression scores for one student.\n\n\nVariable\n\nDescription\n\n\n\n\n\nWeek0\n\nDepression scores 2 weeks before the earthquake\n\nWeek3\n\nDepression scores one week the quake\n\nWeek6\n\nDepression scores 4 weeks after the quake\n\nWeek9\n\nDepression scores 7 weeks after the quake\n\nWeek12\n\nDepression scores 10 weeks after the quake\n", + "description": "Nolen-Hoeksema and Morrow (1991) had the good fortune to have measured depression among college students 2 weeks before the Loma Prieta earthquake in California in 1989. Nolen-Hoeksema and Morrow collected repeat data to track the students' adjustments to the earthquake. Measurements were taken every 3 weeks starting 2 weeks before the earthquake to 10 weeks after. The data were recreated by Howell (1999) based on the Nolen-Hoeksema and Morrow findings. Each row gives the depression scores for one student.\n\n\nVariable\n\nDescription\n\n\n\n\n\nWeek0\n\nDepression scores 2 weeks before the earthquake\n\nWeek3\n\nDepression scores one week the quake\n\nWeek6\n\nDepression scores 4 weeks after the quake\n\nWeek9\n\nDepression scores 7 weeks after the quake\n\nWeek12\n\nDepression scores 10 weeks after the quake\n", "url": "http://www.statsci.org/data/general/lomaprie.txt", "filename": "lomaprie", "name": "Depression Before and After an Earthquake", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Psychology" } ] }, { "name": "Nature", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily rainfall (in millimetres) was recorded over a 47-year period in Turramurra, Sydney, Australia. For each year, the wettest day was identified (that having the greatest rainfall). The data show the rainfall recorded for the 47 annual maxima.", "url": "http://www.statsci.org/data/oz/sydrain.txt", "filename": "sydrain", "name": "Annual Maximums of Daily Rainfall in Sydney", - "number_format": 31, - "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data were collected in a cloud-seeding experiment in Tasmania between mid-1964 and January 1971. The rainfalls are period rainfalls in inches. \nSeeded\n - \nS = seeded, U = unseeded\nSeason\n - \nAutumn, Winter, Spring Summer\nTE\n - \nrainfall in east target area\nTW\n - \nrainfall in west target area\nNC\n - \nrainfall in north control area\nSC\n - \nrainfall in south control area\nNWC\n - \nrainfall in north-west conrol area\n\n", "url": "http://www.statsci.org/data/oz/cloudtas.txt", "filename": "cloudtas", "name": "Cloud Seeding in Tasmania", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are monthly averaged atmospheric pressure differences between Easter Island and Darwin, Australia. This difference drives the trade winds in the southern hemisphere. An annual cycle may be expected, and also longer cycles corresponding to the El Nino and to the Southern Oscillations. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPressure\n\nMonthly average atmospheric pressure differences\n", "url": "http://www.statsci.org/data/oz/enso.txt", "filename": "enso", "name": "Pressure Difference between Easter Island and Darwin", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily rainfall for Melbourne, from 1 January 1981 to 31 December 1990. Note that this series is 3 observations longer than the temperature series.", "url": "http://www.statsci.org/data/oz/melbrain.txt", "filename": "melbrain", "name": "Melbourne Daily Rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily minimum and maximum temperatures for Melbourne, from 1 January 1981 to 31 December 1990. The two February 29 leap days are excluded, so there are 10 x 365 = 3650 observations.", "url": "http://www.statsci.org/data/oz/melbtemp.txt", "filename": "melbtemp", "name": "Melbourne Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rainfall for each 6-day period for Adelaide from 1839 to 1977 inclusive. December 31 of the previous year is included in the non-leap years to make 15 6-day periods for each year. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1839 - 1977\n\nPeriod\n\n1 - 61 for each year\n\nRainfall\n\nRainfall in \n", "url": "http://www.statsci.org/data/oz/adelrain.txt", "filename": "adelrain", "name": "Adelaide Rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily 6am and 3pm temperatures for Brisbane for the decade 1977 - 1986. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDay\n\nDay as YearMonthDay\n\nTemp06\n\n6am Temperature in degrees Celsius x 10\n\nTemp15\n\n3pm Temperature in degrees Celsius x 10\n", "url": "http://www.statsci.org/data/oz/bristemp.txt", "filename": "bristemp", "name": "Brisbane Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The columns in the data set represent the precipitation weighted mean concentrations of ions for the year 1986, for 47 sites in the United Kingdom. \n \nVariable\n \nDescription\n\nSite\n\nSite number \n\nRain\n\nRain (measured in mm) \n\nH\n\nH+ \n\nSO4\n\nSO4-2 \n\nNO3\n\nNO3- \n\nNH4\n\nNH4+ \n\nx\n\nx-coordinate (measured in cm) \n\ny\n\ny-coordinate (cm) \n\nThe measurement of NH4+ for site number 35 was not available and is represented by NA in the data set. The x- and y-cordinates were measured in cm from a map of the UK. ", "url": "http://www.statsci.org/data/general/rainuk.txt", "filename": "rainuk", "name": "Acid Rain in the UK", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Weather" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ocean swell produces spectacular eruptions of water through a hole in the cliff at Kiama, about 120km south of Sydney, known as the Blowhole. The times at which 65 successive eruptions occurred from 1340 hours on 12 July 1998 were observed using a digital watch. \nJim Irish writes \nAnyone who has visited the Blowhole more than once knows that the rate and volume of eruptions varies. This variation occurs at several timescales. We might expect that part is explained by the tides, so that eruptions are more frequent and spectacular when the tide is very high, and eruptions obviously depend on the presence of a large ocean swell generated by prolonged strong winds over the ocean well offshore from Kiama. Hence, any stochastic model fitted to data observed over a short period of time is only applicable to that period, and perhaps a few hours either side of the observations. But we might infer from the model fitted to those data that a similar model applies more generally. ", "url": "http://www.statsci.org/data/oz/kiama.txt", "filename": "kiama", "name": "Kiama Blowhole Eruptions", - "number_format": 31, - "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data records the length of rivers in the South Island of New Zealand. The lengths are given in kilometres. The second variable, FlowsInto, indicates whether the river flows into the Pacific Ocean (0) or the Tasman Sea (1). A map of the island's rivers is included here.", "url": "http://www.statsci.org/data/oz/nzrivers.txt", "filename": "nzrivers", "name": "Length of New Zealand Rivers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Date on the concentration of polychlorinated biphenyl (PCB) residues in a series of lake trout from Cayuga Lake, NY, were reported in Bache et al (1972). The ages of the fish were accurately known, because the fish were annually stocked as yearlings and distinctly marked as to year class. Each whole fish was mechanically chopped, ground, and thoroughly mixed, and 5-gram samples taken. The samples were treated and PCB residues in parts per million (ppm) were estimated using column chromatography. \nBates and Watts (1988) use a linear model \nlog(PCB) = b1 + b2 Age1/3 \nbut they remark that the nonlinear model \nlog(PCB) = b1 + b2 Ageq \nis slightly better. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of trout (years)\n\nPCB\n\nPCB concentration (ppm)\n", "url": "http://www.statsci.org/data/general/troutpcb.txt", "filename": "troutpcb", "name": "PCB Concentrations in Lake Trout", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Jaffe, Parker and Wilson have investigated the concentration of several hydrophobic organic substances (such as hexachlorobenzene, chlordane, heptachlor, aldrin, dieldrin, endrin) in the Wolf River in Tennessee. Measurements were taken downstream of an abandoned dump site that had previously been used by the pesticide industry to dispose of its waste products. \nIt was expected that these hydrophic substances might have a nonhomogeneous vertical distribution in the river because of differences in density between these compounds and water and because of the adsorption of these compounds on sediments, which could lead to higher concentrations on the bottom. It is important to check this hypothesis because the standard procedure of sampling at six-tenths of the depth could miss the bulk of these pollutants if the distribution were not uniform. \nGrab samples were taken with a La Motte-Vandorn water sampler of 1 litre capacity at various depths of the river. This sampler consists of a horizontal plexiglas tube of 7 centimetres diameter and a plunger of each side which shuts the sampler when the sampler is at the desired depth. Ten surface, 10 mid-depth and 10 bottom samples were collected, all within a relatively short period. Until they were analysed the samples were stored in 1-quart mason jars at low temperature. \nIn the analysis of the samples, a 250-millilitre water sample was taken from each mason jar and was extracted with 1 millilitre of either hexanes or petroleum ether. A sample of the extract was then injected into a gas chromatograph and the output was compared against standards of known concentrations. The test procedure was repeated two more times, injecting different samples of the extract in the gas chromatograph. The average aldrin and hexachlorobenzene (HCB) concentrations (in nanograms per liter) in these 30 samples are given in the data.", "url": "http://www.statsci.org/data/general/wolfrive.txt", "filename": "wolfrive", "name": "Wolf River Pollution", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The following data from the Statistical Abstract of the United States give the number of accidental oil spills at sea and the amount of oil lost in these spills for the years 1973 - 1985. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nYear\n\nYear\n\nSpills\n\nNumber of spills\n\nOil\n\nAmount of oil lost (thousands of metric tonnes)\n\n\n\n", "url": "http://www.statsci.org/data/general/spills.txt", "filename": "spills", "name": "Accidental Oil Spills", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data refer to a survey of the fauna on the sea bed lying between the coast of northern Queensland and the Great Barrier Reef. The sampling region covered a zone which was closed to commercial fishing, as well as neighbouring zones where fishing was permitted. In view of the large numbers and types of species captured in the survey the catch was summarized as a score, on a log weight scale, which combines information across species. Two such scores are available. The details of the survey, and a full analysis of the data, are in Poiner et al (1997). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nZone\n\nan indicator for the closed (1) and open (0) zones \n\nYear\n\nan indicator of 1992 (0) or 1993 (1) \n\nLatitude\n\nlatitude of the sampling position \n\nLongitude\n\nlongitude of the sampling position \n\nDepth\n\nbottom depth \n\nScore1\n\ncatch score 1 \n\nScore2\n\ncatch score 2 \n", "url": "http://www.statsci.org/data/oz/reef.txt", "filename": "reef", "name": "Prawn Trawling in the Great Barrier Reef", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Waters" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Groups of dolphins were observed off the coast of Iceland near Keflavik in 1998. The data here give the time of the day and the main activity of the group, whether travelling quickly, feeding or socializing. The dolphin groups varied in size - usually feeding or socializing groups were larger than travelling groups. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nActivity\n\nMain activity of group: travelling (Travel), feeding (Feed) or socializing (Social)\n\nPeriod\n\nTime of the day: Morning, Noon, Afternoon or Evening\n\nGroups\n\nNumber of groups observed\n\n\n\n", "url": "http://www.statsci.org/data/general/dolpacti.txt", "filename": "dolpacti", "name": "Activities of Dolphin Groups", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cairns (1988) analysed the relation between population and foraging area for seabird colonies. The following table presents their data for 22 black-legged kittiwake (a northern gull) colonies of Scotland's Shetland and Orkney Islands. Area is km2 and Population is the number of breeding pairs. ", "url": "http://www.statsci.org/data/general/kittiwak.txt", "filename": "kittiwak", "name": "Kittiwake Colonies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Some handicapped people have access to trained monkey helpers that can perform household tasks like switching things on and off. This data set gives the number of tasks each of nine monkeys can perform along with the number of years the monkeys have been working with handicapped people. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName \n\nName of monkey\n\nYears \n\nNumber of years the monkey has worked with handicapped people\n\nTasks \n\nNumber of tasks the monkey can perform\n", "url": "http://www.statsci.org/data/general/monkeys.txt", "filename": "monkeys", "name": "Trained Monkeys", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Includes brain and body weight, life span, gestation time, time sleeping, and predation and danger indices for 62 species of mammals. Of interest is to predict the time spent sleeping and the proportion of sleep time in dream sleep. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nBodyWt\n\nbody weight (kg)\n\nBrainWt\n\nbrain weight (g)\n\nNonDreaming\n\nslow wave (\"nondreaming\") sleep (hrs/day)\n\nDreaming\n\nparadoxical (\"dreaming\") sleep (hrs/day)\n\nTotalSleep\n\ntotal sleep, sum of slow wave and paradoxical sleep (hrs/day)\n\nLifeSpan\n\nmaximum life span (years)\n\nGestation\n\ngestation time (days)\n\nPredation\n\npredation index (1-5)\n1 = minimum (least likely to be preyed upon); 5 = maximum (most likely to be preyed upon)\n\nExposure\n\nsleep exposure index (1-5)\n1 = least exposed (e.g. animal sleeps in a well-protected den); 5 = most exposed\n\nDanger\n\noverall danger index (1-5) (based on the above two indices and other information)\n1 = least danger (from other animals); 5 = most danger (from other animals)\n\n\n\n", "url": "http://www.statsci.org/data/general/sleep.txt", "filename": "sleep_", "name": "Sleep in Mammals", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results of horse races at Eagle Farm, Brisbane, on 31 August 1998. The data, collected by Donald Forbes for his MS305 Data Analysis Project, give results for each horse in a sequence of 8 races. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPosition\n\nFinishing position\n\nStarters\n\nNumber of horses in race\n\nLast\n\nFinishing position in last race\n\nSince\n\nDays since last race\n\nNumber\n\nIdentifying number of horse in race\n\nCarried\n\nWeight carried\n\nWeight\n\nHandicap weight\n\nBarrier\n\nBarrier position at start of race\n\nDistance\n\nLength of race\n\nLengths\n\nNumber of lengths that horse finished from winner\n\nOdds\n\nStarting odds\n\nStarts\n\nNumber of races previously started in\n\nAge\n\nAge of horse in years\n\nRatio\n\nProportion of wins in previous starts\n", "url": "http://www.statsci.org/data/oz/horses.txt", "filename": "horses", "name": "Horse Racing at Eagle Farm", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the survival times (in 10 hour units) in a 3 x 4 factorial experiment, the factors being (a) three poisons and (b) four treatments. Each combination of the two factors is used for four animals, the allocation to animals being completely randomized. \n", "url": "http://www.statsci.org/data/general/poison.txt", "filename": "poison", "name": "Poison Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data was collected by Peter Drew and Matt Seidemann, statistics students at the Queensland University of Technology, in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \n\"As keen fishermen out and about on a fairly regular basis, the common arguments arise between anglers on the best rigging set up for various conditions. We decided that upon our next group outing that we would back up our opinions with hard statistical facts. Our interest led us to test the most obvious variables in the fishing rig. \n\"Of interest were firstly the rod length, as between fisherman there always tends to be a variety of rods of different sizes; secondly the type of line, in that the larger the line it would be logical that the weight would increase; thirdly the sinker weight and how it affected the casting distance. \n\"In deciding on the three variables a 2^3 factorial design seemed obvious and for our purposes seemed to be quite adequate. So the question was placed as to whether or not the above variables in any combination made any difference to the overall distance the line was cast. The rods used were 6ft and 7ft two piece boat rods, fitted with the same type of spinning reel. The variable sinkers were 8oz and 12oz round ball sinkers and the line used was either the 1kg or 2kg line of the same make. \n\"The experiment was carried out on a day that was close to windless thus lowering the relative influence of the wind. The series of casts was conducted by the same person as were the measurements thus giving uniformity to the total experiment. A break of five minutes was timed between casts so as to allow the caster to allocate the same amount of energy to each cast. The rods were not rigged by the caster; a rigger would set the rod up with a combination of sinker, line and rod, and an effort was made to keep the caster oblivious to the changes in the rig. \n\"The experiment was conducted on the rugby ovals on Oleria St, Brookside (a western surburb of Brisbane) adjacent to the RSL (Returned Serviceman League club), which for all intents and purposes would be classified as a level surface. A line was placed at one end of the field and from it the caster would cast the rod as he would given normal fishing conditions. A spotter who was also the measurer would mark the point of impact of the sinker and from it measure back to the line from which it was cast. The distance observed was subsequently rounded up to the nearest 0.5 of a metre. Two runs were made of each combination. \n\"Possible improvements: Because of the time the rigging took, both casts with each rig were done at the same time. If we did it again it would be better to use random numbers to decide the order of all sixteen casts.\" ", "url": "http://www.statsci.org/data/oz/fishing.txt", "filename": "fishing_", "name": "Fishing Rod Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Four male and four female turtles had their plasma protein measured while they were well fed and after ten and twenty days of fasting. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-8\n\nSex\n\nMale or Female\n\nFed\n\nPlasma protein while well fed (mg/ml)\n\nFasted10\n\nPlasma protein after fasting 10 days\n\nFasted20\n\nPlasma protein after fasting 20 days\n", "url": "http://www.statsci.org/data/general/turtles.txt", "filename": "turtles", "name": "Plasma Protein of Fasting Turtles", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Frogs of four species had their oxygen consumption measured at two temperatures and two exercise levels. There were two frogs of each species at each temperature, and each of the two was measured both at rest and during forced exercise. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-16\n\nSpecies\n\n1-4\n\nTemperature\n\nLow or High\n\nRest\n\nOxygen consumption (ml O2/g/hr) at rest\n\nExercise\n\nOxygen consumption during exercise\n\n\n\n\n", "url": "http://www.statsci.org/data/general/frogs.txt", "filename": "frogs_", "name": "Oxygen Consumption of Frogs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the age and the length of dugongs Dugong dugon (M�ller) captured near Townsville in north Queensland, Australia. The lifespan of a dugong is 50-60 years.\nThese data were working estimates. In particular the method of determining the age of dugong has changed somewhat since the data were recorded. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge in years\n\nLength\n\nLength in metres\n\n\n\n", + "description": "The data give the age and the length of dugongs Dugong dugon (Müller) captured near Townsville in north Queensland, Australia. The lifespan of a dugong is 50-60 years.\nThese data were working estimates. In particular the method of determining the age of dugong has changed somewhat since the data were recorded. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge in years\n\nLength\n\nLength in metres\n\n\n\n", "url": "http://www.statsci.org/data/oz/dugongs.txt", "filename": "dugongs", "name": "Age and Length of Dugongs near Townsville", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the sound pressure of sonar signals (\"clicks\") from a dolphin at various ranges to target. The measurements were made off the coast of Iceland near Keflavik in 1998. The pressure measurement given is \nraw pressure + a Range \nwhere a is a known constant depending on the water density. Pressure is expected to increase with distance even after the adjustment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRange \n\nDistance to dolphin in metres\n\nSoundPressure \n\nWater sound pressure adjusted for water density\n", "url": "http://www.statsci.org/data/general/dolphin.txt", "filename": "dolphin", "name": "Sound Pressure of Dolphin Sonar", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The observed responses are Geiger counter counts (times 10-4) used to measure the amount of radioactively tagged sulfate drug in the blood of a baboon named Brunhilda after an injection of the drug. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nTime in hours since injection\n\nSulfate\n\nGeiger counter counts � 10-4\n", + "description": "The observed responses are Geiger counter counts (times 10-4) used to measure the amount of radioactively tagged sulfate drug in the blood of a baboon named Brunhilda after an injection of the drug. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nTime in hours since injection\n\nSulfate\n\nGeiger counter counts ° 10-4\n", "url": "http://www.statsci.org/data/general/brunhild.txt", "filename": "brunhild", "name": "Blood Sulfate in a Baboon Named Brunhilda", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The European rabbit Oryctolagus cuniculus is a major pest in Australia. A reliable method of age determination for rabbits caught in the wild would be of importance in ecological studies. In this study, the dry weight of the eye lens was measured for 71 free-living wild rabbits of known age. Eye lens weight tends to vary much less with environmental conditions than does total body weight, and therefore may be a much better indicator of age \nThe rabbits were born and lived free in an experimental 1.7 acre enclosure at Gungahlin, ACT. The birth data and history of each individual were accurately known. Rabbits in the enclosure depended on the natural food supply. In this experiment, 18 of the eye lenses were collected from rabbits that died in the course of the study from various causes such as coccidiosis, bird predation or starvation. The remaining 53 rabbits were deliberately killed, immediately after being caught in the enclosure or after they had been kept for some time in cages. The lenses were preserved and their dry weight determined. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of rabbit in days\n\nLens\n\nDry weight of eye lens in milligrams\n", "url": "http://www.statsci.org/data/oz/rabbit.txt", "filename": "rabbit", "name": "Age and Eye Lens Weight for Rabbits in Australia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Insects were exposed to gaseous carbon disulphide for a period of 5 hours. Eight experiments were run with different concentrations of carbon disulphide. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDose\n\nDose of carbon disulphide\n\nExposed\n\nNumber of beetles exposed\n\nMortality\n\nNumber of beetles killed\n", "url": "http://www.statsci.org/data/general/beetles.txt", "filename": "beetles", "name": "Beetle Mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Bill Venables writes: \nGroups of 20 snails were held for periods of 1, 2, 3 or 4 weeks in carefully controlled conditions of temperature and relative humidity. There were two species of snail, A and B, and the experiment was designed as a 4 by 3 by 4 by 2 completely randomized design. At the end of the exposure time the snails were tested to see if they had survived; the process itself is fatal for the animals. The object of the exercise was to model the probability of survival in terms of the stimulus variables, and in particular to test for differences between species. The data are unusual in that in most cases fatalities during the experiment were fairly small. \nSpecies\n \nSnail species A or B \nExposure\n \nExposure in weeks (4 levels) \nHumidity\n \nRelative humidity (4 levels) \nTemp\n \nTemperature in degrees Celsius (3 levels) \nDeaths\n \nNumber of deaths \nN \n \nNumber of snails exposed \n", "url": "http://www.statsci.org/data/oz/snails.txt", "filename": "snails_", "name": "Snail Mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Activity of individually caged fiddler crabs under constant conditions for 225 consecutive hours (225 = 9*25 = 9*24 + 8). The activity scale is log(y+1) where y is mean minutes per hour. Examination of the data suggests that the logarithm was base 10. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nActivity\n\nlog(Minutes per hour+1)\n", "url": "http://www.statsci.org/data/general/fiddler.txt", "filename": "fiddler", "name": "Activity of Fiddler Crabs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tidal shrimps from the Brisbane River move up and down the tidal area (harbour pylon for example) in accordance with the movement of the tides. In this experiment shrimps were removed from their natural environment and isolated from environmental stimulae which would allow them to measure time. Their vertical position on an inclined slope was recorded every half hour starting 20 hours after removal and continuing for one week. Also recorded is the actual tide height during the same period, and six other measures of the shrimps' activity. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime \n\nHours since isolation\n\nVertical \n\nVertical displacement from original position\n\nY2 - Y7 \n\nOther activity measurements\n\nTide \n\nActual tide height\n", "url": "http://www.statsci.org/data/oz/shrimp.txt", "filename": "shrimp_", "name": "Movement of Tidal Shrimps in Isolation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly total number of pigs slaughtered in Victoria, from January 1980 to August 1995.", "url": "http://www.statsci.org/data/oz/pigs.txt", "filename": "pigs", "name": "Pigs Slaughtered in Victoria", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Seed predators and herbivores can operate as strong selective agents in the evolution of plant defence. In this context, Delpino (1886) posed the \"ant-guard\" hypothesis to explain the role of extrafloral nectaries on plants. Extrafloral nectaries (EFN), distributed on species in over 80 plant families, occur on vegetative organs and \"outer floral parts\" not directly associated with pollination. Basically, the hypothesis states that extrafloral nectar production attracts pugnacious \"bodyguards\" (usually ants) which by their foraging activities deter the activities of herbivorous insects and seed predators. \nSince its inception, the ant-guard hypothesis has remained controversial. A few careful studies have experimentally demonstrated that ants attending EFN protect plants (von Wettstein, 1889; Inouye and Taylor, 1979; Schemske, 1980) while several recent studies showed no effect (O’Dowd and Catchpole, 1983; Tempel, 1983; Boecklen, 1984). O’Dowd and Catchpole (1983), for example, found that attendance of ants at EFN deterred other insects from developing flowerheads but that their presence decreased neither the numbers of seed predators nor damage to developing flowerheads. The object of this paper is to describe the ant-insect interactions by means of a simple probability model. \nFull experimental detail is provided by O'’Dowd and Catchpole (1983) but an outline is as follows. The plants studied were helichrysum bracteatum. Three sites were chosen in clearings in the Tallaganda State forest, 40 km. southeast of Canberra, and at each site ten pairs of plants were studied. Plants within each pair were of similar initial size and less than 1 metre apart. Within each pair, ants were excluded from one plant, while the other served as a control. The plants were censused once a week for 17 weeks over the reproductive season (from initiation of flowerheads through the postflowering phase). The data recorded for each plant included the number of flowerheads (capitula), the number of capitula with ants, and the total number of other insects. Different species of ants (predominantly Iridomyrmex spp.) and other insects were observed, but in the data here are pooled within each general category. \nTo clarify: the first column (Week) lists the week the observation was made, the second (Index) lists the index given to the pair of plants observed, the third (AntCap) is the number of capitula on the plant with ant access, the fourth (ExcCap) is the number of capitula on the plant excluded from ant access, the fifth (Ants) is the number of capitula that have ants present on them, the sixth column (AntIns) is the number of insects on the plant with ant access, and the seventh (ExcIns) is the number of insects on the plant excluded from ant access. Index number 1-10 refer to Site 1, 11-20 to Site 2 and 21-30 to Site 3.", + "description": "Seed predators and herbivores can operate as strong selective agents in the evolution of plant defence. In this context, Delpino (1886) posed the \"ant-guard\" hypothesis to explain the role of extrafloral nectaries on plants. Extrafloral nectaries (EFN), distributed on species in over 80 plant families, occur on vegetative organs and \"outer floral parts\" not directly associated with pollination. Basically, the hypothesis states that extrafloral nectar production attracts pugnacious \"bodyguards\" (usually ants) which by their foraging activities deter the activities of herbivorous insects and seed predators. \nSince its inception, the ant-guard hypothesis has remained controversial. A few careful studies have experimentally demonstrated that ants attending EFN protect plants (von Wettstein, 1889; Inouye and Taylor, 1979; Schemske, 1980) while several recent studies showed no effect (O'Dowd and Catchpole, 1983; Tempel, 1983; Boecklen, 1984). O'Dowd and Catchpole (1983), for example, found that attendance of ants at EFN deterred other insects from developing flowerheads but that their presence decreased neither the numbers of seed predators nor damage to developing flowerheads. The object of this paper is to describe the ant-insect interactions by means of a simple probability model. \nFull experimental detail is provided by O''Dowd and Catchpole (1983) but an outline is as follows. The plants studied were helichrysum bracteatum. Three sites were chosen in clearings in the Tallaganda State forest, 40 km. southeast of Canberra, and at each site ten pairs of plants were studied. Plants within each pair were of similar initial size and less than 1 metre apart. Within each pair, ants were excluded from one plant, while the other served as a control. The plants were censused once a week for 17 weeks over the reproductive season (from initiation of flowerheads through the postflowering phase). The data recorded for each plant included the number of flowerheads (capitula), the number of capitula with ants, and the total number of other insects. Different species of ants (predominantly Iridomyrmex spp.) and other insects were observed, but in the data here are pooled within each general category. \nTo clarify: the first column (Week) lists the week the observation was made, the second (Index) lists the index given to the pair of plants observed, the third (AntCap) is the number of capitula on the plant with ant access, the fourth (ExcCap) is the number of capitula on the plant excluded from ant access, the fifth (Ants) is the number of capitula that have ants present on them, the sixth column (AntIns) is the number of insects on the plant with ant access, and the seventh (ExcIns) is the number of insects on the plant excluded from ant access. Index number 1-10 refer to Site 1, 11-20 to Site 2 and 21-30 to Site 3.", "url": "http://www.statsci.org/data/oz/ants.txt", "filename": "ants", "name": "Ant-Insect Interactions on Flowerheads", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A new type of heart valve has been developed and is implanted in 63 dogs that have been raised on various levels of exercise. The numbers of valve transplants that succeed are recorded. Is the proportion of successful implants the same for dogs on all exercise regimens? Is there a trend with amount of exercise in the proportion of successful implants? \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nExercise\n\nAmount of exercise: 1=None, 2=Slight, 3=Moderate, 4=Vigorous\n\nImplant\n\n1=Successful, 2=Unsuccessful\n\nFrequency\n\nNumber of dogs\n\n\n\n", "url": "http://www.statsci.org/data/general/exervalv.txt", "filename": "exervalv", "name": "Heart Valves in Dogs on Different Exercise Regimens", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give growth measurements on Tammar wallabies (Macropus eugenii). Each line is a set of measurements on an animal at a particular time. Most lengths are in tenths of millimetres. The data from some animals is very fragmentary. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAnim\n\nAnimal number\n\nSex\n\n1=male, 2=female\n\nLoca\n\nLocation of animal\n\nLeng\n\nLength of animal (tenths of a millimetre)\n\nHead\n\nHead length\n\nEar\n\nEar Length\n\nArm\n\nArm length\n\nLeg\n\nLeg length\n\nPres\n\nPes (foot) length\n\nTail\n\nTail length\n\nWeight\n\nWeight (tenths of a gram)\n\nAge\n\nAge in days from birth\n", "url": "http://www.statsci.org/data/oz/wallaby.txt", "filename": "wallaby", "name": "Dryandra Tammar WallabyGrowth of Tammar Wallabies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Following the Second World War, D. L. Serventy carried out a detailed study of the lifecycle of the Tasmanian muttonbird (Puffinus tenuirostris, often called the short-tailed shearwater). The data here concerns the growth pattern of fledgling birds of this species. \nAfter the eggs hatch, the parent birds spend much time away from the next, and with increasing time their returns become rarer and rarer. When they return the young bird feed copiously, and there is very rapid weight-gain; whilst they are absent, the offspring loses weight. The result is not a smooth growth curve such as one finds in most measurements in developing animals and birds, but a 'sawtooth' effect. The data were collected in 1954 as weighings each morning of two fledgling chicks on Fisher Island, Bass Strait, and each set terminates on the day the chick left the nest. \nMuch of the interest in these curves comes not from the description they give of the weight of the chick, but from the information they contain on the feeding patterns of the parents. There are three obvious features of the data; the timing of the feeds and the size of the feeds when they occur, both of which represent aspects of the feeding pattern of the parents; and the loss in weight of the chicks between feeds. Henstridge and Tweedie (1984) proposed a model, similar to those used in storage theory, which describes each of these phenomena separately.", "url": "http://www.statsci.org/data/oz/muttonbi.txt", "filename": "muttonbi", "name": "Growth of Tasmanian Muttonbirds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dimensions in millimetres are given of two samples of jellyfish from Hawkesbury River in New South Wales, Australia. One of the samples came from Dangar Island and the other from Salamander Bay. The first column contains a \"D\" if the measurement came from Dangar Island and a \"S\" if it came from Salamander Bay. The dimensions measured were length and width. What can one learn from graphing the two principal components? Try graphing principal components of the logarithms of the measurements. Can the dimensions determine the location?", "url": "http://www.statsci.org/data/oz/jellfish.txt", "filename": "jellfish", "name": "Dimensions of Jellyfish", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study was conducted concerning the counts of lesions produced on membranes of chick embryos by viruses of the pox group. The data give the numbers of lesions formed at a series of dilutions of the viral medium. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDilution\n\nDilution of viral medium, from 1 to 32\n\nCount\n\nNumber of lesions\n", "url": "http://www.statsci.org/data/general/pocklesi.txt", "filename": "pocklesi", "name": "Pock Lesions on Chick Embryos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data comes from an experiment on induction of flowering of cyclamen. Plants of 4 varieties of cyclamen were subject to a combination of 6 temperature regimens and 4 levels of fertilization. The temperature regimens are combinations of five temperatures during the day (14, 16, 18, 20 and 26 degrees C) and four temperatures during the night (14, 16, 18 and 20 C). Not all the combinations of temperatures are present. The response is the number of flowers, which vary from 4 to 26, with mode 8. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nVariety\n\nVariety of cyclamen\n\nRegimem\n\nTemperature regimen (combination of the temperature during the day and the temperature during the night)\n\nDay\n\nTemperature during the day (Centigrade)\n\nNight\n\nTemperature during the night\n\nFertilizer\n\nLevel of fertilization\n\nFlowers\n\nNumber of flowers\n", "url": "http://www.statsci.org/data/general/cyclamen.txt", "filename": "cyclamen", "name": "Number of Cyclamen Flowers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In an experiment where pregnant mice were exposed to the herbicide 2,4,5-T (the active component in Agent Orange), the number of fetal implants in utero were recorded. The data give the frequency distribution of implants at each of seven dose levels measured in mg/kg of body weight. \nOn days 6-14 after mating, pregnant dams were dosed by gavage with one of the doses of 2,4,5-T. Prior to giving birth, the dams were sacrificed and the number of viable, dead and reabsorbed foetuses in the uterus of the dam were determined. The data here gives the number of surviving viable implants. An outcome of zero implants cannot be distinguished from a non-pregnant outcome so any zero implant outcomes were excluded. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDose\n\nDose of 2,4,5-T in mg/kg/day\n\nImplants\n\nNumber of surviving implants\n\nFrequency\n\nNumber of mice with that number of implants\n", "url": "http://www.statsci.org/data/general/fetaimpl.txt", "filename": "fetaimpl", "name": "Fetal Implants in Mice Utero", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Animals" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Risk and Sammarco (1991) found that the density of the Great Barrier Reef coral Porites lobata increases with distance from the Australian shore, due to differences between inshore and offshore environments. They made three measurements at each of nine reefs at various distances from the shore. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nReef\n\nName of reef\n\nDistance\n\nDistance to shore (km)\n\nDensity\n\nCoral head density (g/cm3)\n", "url": "http://www.statsci.org/data/oz/coralden.txt", "filename": "coralden", "name": "Density of Great Barrier Reef Coral Heads", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the volume (cubic feet), height (feet) and diameter (inches) (at 54 inches above ground) for a sample of 31 black cherry trees in the Allegheny National Forest, Pennsylvania. The data were collected in order to find an estimate for the volume of a tree (and therefore the timber yield), given its height and diameter. ", "url": "http://www.statsci.org/data/general/cherry.txt", "filename": "cherry", "name": "Volume of Black Cherry Trees", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data is from a dew-retting experiment in Ballarat 1942-43, in which flax was laid out under various climactic conditions and for various periods. Retting involves softening the flax stems by soaking in water, thus enabling the separation of the linen fibres from the wooden material by a process called scrutching. The flax variety used was \"Liral Crown\". Two samples were taken from each trial and the ret loss, as a percentage, was calculated. The other three variables are the mean daily rainfall (in points), the retting period (in days) and the mean daily temperature (in degrees Farenheit).", "url": "http://www.statsci.org/data/oz/retloss.txt", "filename": "retloss", "name": "Ret Loss in Flax", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A production plant cost-control engineer is responsible for cost reduction. One of the costly items in his plant is the amount of water used by the production facilities each month. He decided to investigate water usage by collecting seventeen observations on his plant's water usage and other variables. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTemperature\n\nAverage monthly temperate (F)\n\nProduction\n\nAmount of production (M pounds)\n\nDays\n\nNumber of plant operating days in the month\n\nPersons\n\nNumber of persons on the monthly plant payroll\n\nWater\n\nMonthly water usage (gallons)\n", "url": "http://www.statsci.org/data/general/water.txt", "filename": "water_", "name": "Water Usage of Production Plant", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ryan et al (1994) describe the data as follows: \nIn autumn, small winged fruit called samara fall off maple trees, spinning as they go. A forest scientist studied the relationship between how fast they fell and their \"disk loading\" (a quantity based on their size and weight). The samara disk loading is related to the aerodynamics of helicopters. \nThe data give the loadings and fall velocities for fruit from three trees. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTree\n\n1 to 3\n\nLoading\n\nDisk loading\n\nVelocity\n\nFall velocity\n", "url": "http://www.statsci.org/data/general/samara.txt", "filename": "samara", "name": "Fall Velocities for Samara Fruit", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The yield of pasture regrowth was measured together with the number of days since last grazing. The measurements were done on different experimental units so it is reasonable to assume the errors independent. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDays\n\nDays since last grazing\n\nYield\n\nYield of pasture\n", "url": "http://www.statsci.org/data/general/regrowth.txt", "filename": "regrowth", "name": "Pasture Regrowth after Grazing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Herbicide bioassay is concerned with the reduction in plant growth as a function of the herbicide dose applied. This is a interest when developing new herbicides, assessing environmental effects on non-target species or estimating the residual herbicides in a treated soil before planting a new, herbicide suspectible crop. A typical experiment would comprise a series of doses ranging from ineffective to severely damaging to establish a dose-response relationship. In this experiment the callus area of a tissue culture of Brassica napus was measured corresponding to different doses of a sulfonylurea herbicide, metsulfuron methyl. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nChlorsulfuron\n\nConcentration of herbicide in nmol/L\n\nCallus\n\nLogarithm of callus area\n", "url": "http://www.statsci.org/data/general/brassica.txt", "filename": "brassica", "name": "Response of Brassica napus to Chlorsulfuron", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data concerns the underground root system of eight separate apple trees. Three different root stocks are considered (Mark, MM106 and M26) and two plant spacing (4x2 meters and 5x3 meters). For each plant, soil core sampling units taken have been classified as belonging to an inner or outer zone. The response variable is the density of fine roots, also called the root length density, which can have zeros as well as continuous positive values. There are 511 observations, of which 193 or 38% have a zero response. \nThe design is not a full factorial design: plants 1 and 2 are tested only with the Mark root stock and at a spacing of 5x3; plants 3 and 4 are tested only with Mark root stock at a spacing of 4x2; plants 5 and 6 are tested only with root stock MM106 at a spacing of 5x3; and plants 7 and 8 are tested only with M26 root stock at a spacing of 4x2. The Mark root stock is tested at both plant spacings but the MM106 only at 5x3 and M26 only at 4x2. So there are four unique treatment combinations: Mark stock at 5x3 and 4x2, MM106 at 5x3, and M26 at 4x2. \nIt is of interest to (1) compare effects of spacing within Mark rootstock, (2) compare root stocks within same spacing and (3) to look for any difference in RLD between inner and out zones. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPlant \n\n1 to 8\n\nStock\n\nRoot stock: Mark, MM106 or M26\n\nSpacing\n\nPlant spacing: 5x3 or 4x2 meters\n\nZone\n\nZone relative to the plant the soil core is taken from: Inner or Outer\n\nRLD\n\nRoot length density in cm/cm3\n", "url": "http://www.statsci.org/data/oz/fineroot.txt", "filename": "fineroot", "name": "Root Length Density of Apple Trees", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Plants" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data were collected from a mine in Cobar, NSW, Australia. At each of 38 sampling points, several measurements were taken, one of which is the 'true-width' of an ore-bearing rock layer. Also given are the co-ordinates t1 and t2 of of the data sites. Green and Silverman (1994) use this data set to illustrate thin-plate splines for fitting a smooth surface.", "url": "http://www.statsci.org/data/oz/ore.txt", "filename": "ore", "name": "Wide of Ore-Bearing Layer", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The proportions of sand, silt and clay in soil samples are given for 8 contiguous sites. The sites extended over the crest and flank of a low rise in a valley underlain by marl near Albudeite in the province of Murcia, Spain. The sites were small areas of ground surface of uniform shape internally and delimited by relative discontinuities externally. Soil samples were obtained for each site at 11 random points within a 10m by 10m area centred on the mid-point of the site. All samples were taken from the same depth. The data give the sand, silt and clay content of each sample, expressed as a percentage of the total sand, silt and clay content. \nThe purpose of the study by Wright and Wilson (1979) was to determine whether the sites could be differentiated on the basis of their soil composition. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSite \n\n1-8\n\nSand \n\nPercent sand\n\nSilt\n\nPercent silt\n\nClay\n\nPercent clay\n", "url": "http://www.statsci.org/data/general/murcia.txt", "filename": "murcia", "name": "Composition of Soil from Murcia Province, Spain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Geology" } ] }, { "name": "Statistics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the year of founding for 40 New Zealand wineries.", "url": "http://www.statsci.org/data/oz/wineries.txt", "filename": "wineries", "name": "Founding Dates of NZ Wineries", - "number_format": 31, - "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. ", "url": "http://www.statsci.org/data/general/auction.txt", "filename": "auction", "name": "Selling Price of Antique Grandfather Clocks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The two columns of the data are the prices and year purchased for 124 Mazda cars, as taken from the classified section of the Melbourne Age during the course of 1991. Hence the age of the car at the time can be calculated and used to model car price. ", "url": "http://www.statsci.org/data/oz/mazdas.txt", "filename": "mazdas", "name": "Age and Price of Mazda Cars", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data show the capital value and annual rental value of 96 domestic properties in Auckland in 1991. The aim was to explore their relationship in the hope of being able to predict capital value from rental value, thus the latter is the explanatory variable in this case.", "url": "http://www.statsci.org/data/oz/rentcap.txt", "filename": "rentcap", "name": "Capital and Rental Values of Auckland Properties", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of the clock (years)\n\nBidders\n\nNumber of individuals participating in the bidding\n\nPrice\n\nSelling price (pounds sterling)\n", "url": "http://www.statsci.org/data/general/auction.txt", "filename": "auction_", "name": "Selling Price of Antique Grandfather Clocks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data were collected to study the variation in rent paid in 1977 for agricultural land planted to alfalfa in Minnesota. The data include: \n\n\nVariable\n\nDescription\n\n\nRent\n \naverage rent per acre planted to alfalfa\nAllRent\n \naverage rent paid for all tillable land\nCows\n \ndensity of diary cows (number per square mile)\nPasture\n \nproportion of farmland used as pasture\nLiming\n \nYes if liming is required to grow alfalfa; No otherwise\n", "url": "http://www.statsci.org/data/general/landrent.txt", "filename": "landrent", "name": "Rent for Land Planted to Alfalfa", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly observations on various share price and financial variables were recorded from October 1991 to August 1997. Data collected by Francine Pritchard and Glen Dixon for their MS305 data analysis project in 1997. \n\n\nVariable\n\nDescription\n\n\nBank\n\nShare Price Index\nAllOrds\n\n\nDevelop\n\n\nMining\n\n\nGold\n\n\nBuild\n\n\nProp\n\n\nIndust\n\n\nEnergy\n\n\nFinance\n\n\nResource\n\n\nTransport\n\n\nRetail\n\n\nUnemploy\n\nUnemployment Rate\nCPI\n\nConsumer Price Index\nBankBill\n\n90 Day Bank Bill Interest Rate\n", "url": "http://www.statsci.org/data/oz/bankbill.txt", "filename": "bankbill", "name": "90 Day Bank Bills", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The following data was collected in the 1960s at a house in south-east England. The weekly gas consumption (in 1000 cubic feet) and the average outside temperature (in degrees Celsius) was recorded for 26 weeks before and 30 weeks after cavity-wall insulation had been installed. The house thermostat was set at 20�C throughout. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nInsulate\n\nBefore or After\n\nTemp\n\nAverage outside temperature (C)\n\nGas\n\nGas consumption (1000's of cubic feet)\n", + "description": "The following data was collected in the 1960s at a house in south-east England. The weekly gas consumption (in 1000 cubic feet) and the average outside temperature (in degrees Celsius) was recorded for 26 weeks before and 30 weeks after cavity-wall insulation had been installed. The house thermostat was set at 20°C throughout. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nInsulate\n\nBefore or After\n\nTemp\n\nAverage outside temperature (C)\n\nGas\n\nGas consumption (1000's of cubic feet)\n", "url": "http://www.statsci.org/data/general/insulgas.txt", "filename": "insulgas", "name": "House Insulation and Gas Consumption", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Age specific term life premium rates for a sum insured of $50,000 are given in the table. The first column is the age of insured, the next two columns are the rates for male smokers and non-smokers, and the last two columns are the rates for female smokers and non-smokers. The four separate sets of points may be plotted and cubic spline regression used to fit them.", "url": "http://www.statsci.org/data/oz/insure.txt", "filename": "insure", "name": "Insurance Premiums", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the Canadian automobile insurance experience for policy years 1956 and 1957 as of June 30, 1959. The data includes virtually every insurance company operating in Canada and was collated by the Statistical Agency (Canadian Underwriters' Association - Statistical Department) acting under instructions from the Superintendent of Insurance. The data given here is for private passenger automobile liability for non-farmers for all of Canada excluding Saskatchewan. \nThe variable Merit measures the number of years since the last claim on the policy. The variable Class is a collation of age, sex, use and marital status. The variables Insured and Premium are two measures of the risk exposure of the insurance companies. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMerit\n\nMerit Rating:\n3 - licensed and accident free 3 or more years\n2 - licensed and accident free 2 years\n1 - licensed and accident free 1 year\n0 - all others\n\nClass\n\n1 - pleasure, no male operator under 25\n2 - pleasure, non-principal male operator under 25\n3 - business use\n4 - unmarried owner or principal operator under 25\n5 - married owner or principal operator under 25\n\nInsured\n\nEarned car years\n\nPremium\n\nEarned premium in 1000's\n(adjusted to what the premium would have been had all cars been written at 01 rates)\n\nClaims\n\nNumber of claims\n\nCost\n\nTotal cost of the claim in 1000's of dollars\n", "url": "http://www.statsci.org/data/general/carinsca.txt", "filename": "carinsca", "name": "Canadian Automobile Insurance Claims for 1957-1958", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give details of third party motor insurance claims in Sweden for the year 1977. \n\"In Sweden all motor insurance companies apply identical risk arguments to classify customers, and thus their portfolios and their claims statistics can be combined. The data were compiled by a Swedish Committee on the Analysis of Risk Premium in Motor Insurance. The Committee was asked to look into the problem of analyzing the real influence on claims of the risk arguments and to compare this structure with the actual tariff.\" \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nKilometres \n\nKilometres travelled per year\n1: < 1000\n2: 1000-15000\n3: 15000-20000\n4: 20000-25000\n5: > 25000\n\nZone \n\nGeographical zone\n1: Stockholm, Göteborg, Malmö with surroundings\n2: Other large cities with surroundings\n3: Smaller cities with surroundings in southern Sweden\n4: Rural areas in southern Sweden\n5: Smaller cities with surroundings in northern Sweden\n6: Rural areas in northern Sweden\n7: Gotland \n\nBonus\n\nNo claims bonus. Equal to the number of years, plus one, since last claim\n\nMake\n\n1-8 represent eight different common car models. All other models are combined in class 9\n\nInsured\n\nNumber of insured in policy-years\n\nClaims\n\nNumber of claims\n\nPayment\n\nTotal value of payments in Skr\n\n\n\n", "url": "http://www.statsci.org/data/general/motorins.txt", "filename": "motorins", "name": "Third Party Motor Insurance in Sweden", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the average claims for damage to the owner's car for privately owned and comprehensively insured vehicles in Britain in 1975. Averages are given in pounds sterling adjusted for inflation. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nOwnerAge\n\nPolicy-holder's age in years, categorized into 8 levels\n\nModel\n\nType of car, in 4 groups\n\nCarAge\n\nVehicle age in years, categorized into 4 levels\n\nNClaims\n\nNumber of claims\n\nAveCost\n\nAverage cost of each claim in pounds\n", "url": "http://www.statsci.org/data/general/carinsuk.txt", "filename": "carinsuk", "name": "British Car Insurance Claims for 1975", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly data relating to hotels, motels and guesthouses in Victoria, from January 1980 to June 1995. First column: total number of room nights occupied; Second column: total takings from accommodation. ", "url": "http://www.statsci.org/data/oz/motel.txt", "filename": "motel", "name": "Hotels, Motels and Guesthouses in Victoria", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data list the CPI (Consumer Price Index) figures for five countries from 1985 to 1994. The countries are Australia, Canada, New Zealand, the United Kingdom and the United States. Each index is based on the December Quarter 1993 (1000).", "url": "http://www.statsci.org/data/oz/cpifive.txt", "filename": "cpifive", "name": "CPI for Five Countries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly CPI indices for Brisbane for food, clothing, housing etc, from June 1972 to September 1997. \nThe groups are: Food, Clothing, Housing, Household equipment and operation, Transportation, Tobacco and Alcohol, Health and personal care, Recreation and education, and All groups. The CPI are standardized so that the year 1989-90 is 100.0. ", "url": "http://www.statsci.org/data/oz/cpibris.txt", "filename": "cpibris", "name": "Brisbane Consumer Price Indices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly price indices for established homes in Australian capital cities, from June 1986 to June 1997. The price indices are standardized so that the year 1989-1990 is 100.0 for each city. ", "url": "http://www.statsci.org/data/oz/houses.txt", "filename": "houses", "name": "House Price Indexes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Economics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) ", "url": "http://www.statsci.org/data/oz/rugby.txt", "filename": "rugby", "name": "Time of Passages of Play in Rugby", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Mark Taylor was Captain of the Australian test cricket team from May 1994 until February 1999. By the middle of 1997, the Australian team has won its 7 consecutive international test series, making Taylor the most successful Australian Captain in history. However his poor batting form from mid 1996 to mid 1997 gave the Australian selectors a dilemma in deciding whether his excellent Captaincy made up for the run of poor scores off his own bat. \nThe data below gives Mark Taylor's test scores from the middle of 1989 to the middle of 1995, a period over which he was batting well. Scores were made in Australia's first or second innings of each match. Sometimes Australia was not required to bat twice, in which case the second innings is marked as missing. There are also a number of `not outs'.", "url": "http://www.statsci.org/data/oz/taylor.txt", "filename": "taylor_", "name": "Mark Taylor's Test Cricket Scores", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Sydney-Hobart yacht race starts from Sydney Harbour on Boxing day (December 26) and finishes several days later in Hobart. It is a 630 nautical mile ocean race. The data give the winning times from 1945 to 1993, as they appeared in the Sydney Morning Herald on 24 December, 1994, plus the winning times for 1994 to 1997. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYacht\n\nName of winning yacht\n\nYear\n\nYear\n\nDays\n\nDays unit of winning time\n\nHours\n\nHours unit of winning time\n\nMinutes\n\nMinutes unit of winning time\n\nTime\n\nWinning time in minutes (should match time in Days, Hours and Minutes)\n", "url": "http://www.statsci.org/data/oz/sydhob.txt", "filename": "sydhob", "name": "Sydney to Hobart Yacht Race Winning Times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Winning heights or distances (inches) for the High Jump, Discus and Long Jump events at the Olympics up to 1996. ", "url": "http://www.statsci.org/data/general/olympic.txt", "filename": "olympic", "name": "Olympic Records for High Jump, Discus and Long Jump", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the number of medals won by each medal-winning country in the 1992 Summary Olympic Games in Barcelona, Spain, and the 1994 Winter Olympic Games in Lillehammer, Norway. Also given is the population and latitude of each country. Griffiths et al write: \n... the media spent a lot of time discussing the number of medals won by each country's athletes. The implication was that the comparison was of some importance. However, larger countries would be expected to win more medals than smaller countries, simply because of their larger populations. \n... some viewers, especially those from the smaller countries, felt that the number of medals should be standardised to account for the very wide range of populations, and that a per capita number of medals for a country was a fairer comparison. Others felt that this was unfair to the countries with larger populations - that having twice as many people did not lead to twice as many medals. If standardisation is performed adequately, there should be no systematic relationship between the adjusted medal count and population. \nAlso countries further from the equator might be expected to do better in the winter olympics. \nThe data is incomplete in that countries with no medals are not included. These would be mostly smaller population countries. ", "url": "http://www.statsci.org/data/oz/medals.txt", "filename": "medals", "name": "Olympic Medals", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set was assembled by Rowan Todd and Mark McNaughton, two students studying Statistics at QUT in a class taught by Dr Margaret Mackisack. For a class project they decided to investigate the effect on football game attendance of various covariates. They collected data involving Saturday Australian Football League (AFL) matches at the Melbourne Cricket Ground (MCG). They looked only at matches during the normal home and away season (i.e. not including finals). They used statistics from all such games in 1993 and 1994 (nineteen relevant matches in 1993 and twenty-two in 1994). The response variable measured was attendance at the MCG, and after consideration, they came up with the following covariates: \n\n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMCG \n\nAttendance at the MCG in 1000's.\n\nTemp \n\nTemperature. The forecast maximum temperature on the day of the match, in whole degrees C, found in The Weekend Australian.\n\nOther\n\nAttendance at other matches in 1000's. The sum of the attendances at other AFL matches in Melbourne and Geelong on the same day as the match in question.\n\nMembers\n\nMembership. The sum of the memberships of the two clubs whose teams were playing the match in question in 1000's.\n\nTop50\n\nNumber of players from the top fifty. The number of players in the top 50 in the AFL who happened to be playing in the match in question.\n\nDate\n\nDate of the match in the format dd/mm/yy.\n\nHome\n\nAbbreviation for home team.\n\nAway\n\nAbbreviation for away team.\n", "url": "http://www.statsci.org/data/oz/afl.txt", "filename": "afl", "name": "AFL Crowd Attendance at the MCG", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) \n", "url": "http://www.statsci.org/data/oz/rugby.txt", "filename": "rugby_", "name": "Time of Passages of Play in Rugby", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the record-winning times for 35 hill races in Scotland, as reported by Atkinson (1986). The distance travelled and the height climbed in each race is also given. The data contains a known error - Atkinson (1986) reports that the record for Knock Hill (observation 18) should actually be 18 minutes rather than 78 minutes. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRace\n\nName of race\n\nDistance\n\nDistance covered in miles\n\nClimb\n\nElevation climbed during race in feet\n\nTime\n\nRecord time for race in minutes\n", "url": "http://www.statsci.org/data/general/hills.txt", "filename": "hills_", "name": "Scottish Hill Races", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Investigators studied physical characteristics and ability in 13 football punters. Each volunteer punted a football ten times. The investigators recorded the average distance for the ten punts, in feet. They also recorded the average hang time (time the ball is in the air before the receiver catches it) for the ten punts, in seconds. In addition, the investigators recorded five measures of strength and flexibility for each punter: right leg strength (pounds), left leg strength (pounds), right hamstring muscle flexibility (degrees), left hamstring muscle flexibility (degrees), and overall leg strength (foot-pounds). From the study \"The relationship between selected physical performance variables and football punting ability\" by the Department of Health, Physical Education and Recreation at the Virginia Polytechnic Institute and State University, 1983. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in feet\n\nHang\n\nTime in air in seconds\n\nR_Strength\n\nRight leg strength in pounds\n\nL_Strength\n\nLeft leg strength in pounds\n\nR_Flexibility\n\nRight leg flexibility in degrees\n\nL_Flexibility\n\nLeft leg flexibility in degrees\n\nO_Strength\n\nOverall leg strength in pounds\n", "url": "http://www.statsci.org/data/general/punting.txt", "filename": "punting", "name": "American Football Punters", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on 102 male and 100 female athletes collected at the Australian Institute of Sport, courtesy of Richard Telford and Ross Cunningham. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSport\n\nSport\n\nSex\n\nmale or female\n\nHt\n\nHeight in cm\n\nWt\n\nWeight in kg\n\nLBM\n\nLean body mass\n\nRCC\n\nRed cell count\n\nWCC\n\nWhite cell count\n\nHc\n\nHematocrit\n\nHg\n\nHemoglobin\n\nFerr\n\nPlasma ferritin concentration\n\nBMI\n\nBody mass index = weight/height^2\n\nSSF\n\nSum of skin folds\n\n%Bfat\n\n% body fat\n\n\n\n", "url": "http://www.statsci.org/data/oz/ais.txt", "filename": "ais_", "name": "Australian Institute of Sport", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data was collected by Grant Elliott, a statistics student at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is his description of the data and its collection: \nLiving at a squash court spurred on the idea of this experiment. Frustrated playing squash one night, I thought that the squash ball I was playing with seemed to bounce and react differently to what I was previously used to. So I conducted this experiment on the squash ball, looking at the type of ball, temperature of the ball and the age of the ball. \nBall type: In this experiment I used a 'yellow dot' squash ball and a 'double x' squash ball. A 'yellow dot' is super slow and a 'double x' is termed extra super slow. \nTemperature: When playing with a squash ball it tends to heat up. So I took it to extremes where I had 'room temperature' and 'playing temperature'. To duplicate 'playing temperature' the ball was placed in a cup of boiling water for 45 sec. \nAge: I expected age to be my most significant factor. Squash balls, being a sealed ball, shouldn't vary when they get older, so I used a new ball and compared it to an old ball. \nProcedure: I first thought of dropping the balls from a set height and seeing how far they bounced against a tape measure. This idea was scrapped as too much error came into it because you couldn't accurately measure when the maximum height of the bounce was. I then thought of a ball machine. I set the ball machine up and measured how far back did the ball come off the front wall when shot out of the ball machine. This eliminated a lot of varying in my figures as the ball machine shoots the balls out at roughly the same speed and trajectory. It doesn't take all the varying out as I wouldn't know whether the ball machine does shoot it out at exactly the same speed, but it keeps variation to a minimum. \nCriticism: Measuring the distance from the wall was done by my friend and I. We both would watch from different angles and would see where the ball landed. This means our figures are probably out by a couple of centimetres. When the balls were dropped into the water I forgot to take some of them out after 45 sec. Also with some I moved them around in the water to get the heat distributed evenly but others I forgot to move as I was collecting and organising the next ball. Another criticism is the temperature of the water. I put new boiling water into the cup after 4 balls had been in it. Therefore the last ball to go in wouldn't be the same temperature as the first ball.", "url": "http://www.statsci.org/data/oz/squash.txt", "filename": "squash", "name": "Squash Ball Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dynamic and repetitive nature of running means that runners are particularly prone to over-use injuries such as lower back pain. Chronic pain is often caused by muscle imbalances, which result in faulty alignment of and abnormal stresses applied to the spinal column. Muscle imbalances originate as adapations in motor control due to pain or external stimuli, and are then reinforced and preserved by repetition. \nThis study, conducted by Physiotherapy student Andrew Mooney, examined the flexibility of four major muscle groups associated with movement of the hip, with particular attention to imbalances between the left and right sides or between the dominant and non-dominant sides. \nA total of 33 male subjects were included in the study. The subjects were divided into three groups: 11 runners with low back pain, 11 runners without low back pain and 11 sedentary individuals without low back pain. (Runners were recruited from the Ashgrove and Toowong athletics clubs, non runners from the University of Queensland and the general community. Runners with lower back pain were recruited first. Once this subject group was tested, subjects for the two control groups were recruited to match the runners with low back pain according to age, height and weight.) \nThe muscle groups examined were \nthe iliopsoas, \nthe rectus femoris, \nthe tensor fascia lata/iliotibial band (ITB/TFL), and \nthe hamstrings \nFor each muscle group, two measures of flexibility were used. The first, relative flexibility, was related to the range of movement of the joint before postural compensations occurred, and the second was a measure of the maximal functional length of the muscle. Relative flexibility and functional length were measured for each muscle group on both the left and right sides of the body. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject number, 1 - 33\n\nGroup\n\nPain, NoPain or Sedentary\n\nMatch\n\n1 - 11, indicating matched triples\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight of subject in cm\n\nWeight\n\nWeight of subject in kg\n\nDistance\n\nType of running event: Sprint, middle distance (Mid) or long distance (Long)\n\nYears\n\nNumber of years running\n\nDominant\n\nDominant side, Left or Right\n\nDF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on dominant side in degrees. Positive numbers indicate above the horizontal, negative numbers below the horizontal.\n\nDF.Rectus\n\nRelative flexibility of rectus femoris muscle on dominant side in degrees\n\nDF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on dominant side in degrees\n\nDF.Hamstring\n\nRelative flexibility of hamstring muscles on dominant side in degrees\n\nDL.Iliopsoas\n\nFunctional length of iliopsoas muscle on dominant side\n\nDL.Rectus\n\nFunctional length of rectus femoris muscle on dominant side\n\nDL.ITBTFL\n\nFunctional length of ITB/TFL muscle on dominant side\n\nDL.Hamstring\n\nFunctional length of hamstring muscles on dominant side\n\nNF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on nondominant side in degrees\n\nNF.Rectus\n\nRelative flexibility of rectus femoris muscle on nondominant side in degrees\n\nNF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on nondominant side in degrees\n\nNF.Hamstring\n\nRelative flexibility of hamstring muscles on nondominant side in degrees\n\nNL.Iliopsoas\n\nFunctional length of iliopsoas muscle on nondominant side\n\nNL.Rectus\n\nFunctional length of rectus femoris muscle on nondominant side\n\nNL.ITBTFL\n\nFunctional length of ITB/TFL muscle on nondominant side\n\nNL.Hamstring\n\nFunctional length of hamstring muscles on nondominant side\n", "url": "http://www.statsci.org/data/oz/backpain.txt", "filename": "backpain", "name": "Runners with Low Back Pain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data come from the 1990 Pilot Surf/Health Study of NSW Water Board. The first column takes values 1 or 2 according to the recruit's perception of whether (s)he is a Frequent OCean Swimmer, the second column has values 1 or 4 according to recruit's usually chosen swimming location (1 for non-beach, 4 for beach), the third column has values 2 (aged 15-19), 3 (aged 20-25), or 4 (aged 25-29), the fourth column has values 1 (male) or 2 (female) and finally, the fifth column has the number of self-diagnosed ear infections that were reported by the recruit.", "url": "http://www.statsci.org/data/oz/earinf.txt", "filename": "earinf", "name": "Ear Infections in Swimmers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Sport" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "After purchasing a batch of flight helmets that did not fit the heads of many pilots, the NZ Airforce decided to mesure the headsizes of all recruits. Before this was carried out, information was collected to determine the feasibility of using cheap cardboard callipers to make the measurements, instead of metal ones which were expensive and uncomfortable. The data lists the head diameters of 18 recruits measured once using cardboard callipers and again using metal callipers. One question is whether there is any systematic difference between the two sets of callipers. One might also ask whether there is more variability in the cardboard callipers measurement than that of the metal callipers. ", "url": "http://www.statsci.org/data/oz/nzhelmet.txt", "filename": "nzhelmet", "name": "Helmet Sizes for New Zealand Airforce", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data were collected as part of a project for the Federal Office for Road Safety conducted by the Research Institute of Gender and Health at the University of Newcastle. There is evidence that women drivers who are involved in motor vehicle accidents are more likely than men to be injured. A possible reason is that women often drive smaller cars that provide less protection in a collision. One of the aims of the project was to examine preferences for cars among men and women and investigate the extent to which safety was a factor in determining preferences. \nThe survey was conducted by research assistants who asked people in car parks to participate and administered a structured questionnaire. They were instructed to obtain data from men and women with small, medium and large cars, with 50 people per group for a total of 300 respondents. (The sample size was based on power requirements for another part of the survey that involved anthropometric measurements.) The research assistants approached people in car parks of the University of Newcastle and nearby shopping centres during December 1997 and January 1998. \nThe data consist of 300 records each with 22 variables. The variables are: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nID\n\nIdentification number of respondent\n\nAge\n\nAge of respondent (years)\n\nSex\n\n1=female, 2=male\n\nLicYr\n\nTime they have held a full driving licence, in years and months (years)\n\nLicMth\n\nTime they have held a full driving licence, in years and months (months)\n\nActCar\n\nMake, model and year of car most often driven, coded to size of car 1=small, 2=medium, 3=large\n\nKids5\n\nChildren under five, 1=yes, 2=no\n\nKids6\n\nChildren 6 to 16, 1=yes, 2=no\n\nPrefCar\n\nPreferred car, coded to size of car 1=small, 2=medium, 3=large\n\nCar15k\n\nPreferred type of car if cost $15000, 1=small new car; 2=large second-hand car\n\nReason\n\n1=safety, 2=reliability, 3=cost, 4=performance, 5=comfort, 6=looks\n\nCost\n\nHow important is cost when buying a car? 1=not important, 2=little importance, 3=important, 4=very important\n\nReliable\n\nHow important is reliability ...?\n\nPerform\n\nHow important is performance ...?\n\nFuel\n\nHow important is fuel consumption ...?\n\nSafety\n\nHow important is safety ...?\n\nAC/PS\n\nHow important is air conditioning/power steering ...?\n\nPark\n\nHow important is ease of parking ...?\n \nRoom\n \nHow important is space/roominess ...?\n \nDoors\n \nHow important is the number of doors ...?\n \nPrestige\n \nHow important is prestige/style ...?\n \nColour\n \nHow important is colour ...?\n", + "description": "These data were collected as part of a project for the Federal Office for Road Safety conducted by the Research Institute of Gender and Health at the University of Newcastle. There is evidence that women drivers who are involved in motor vehicle accidents are more likely than men to be injured. A possible reason is that women often drive smaller cars that provide less protection in a collision. One of the aims of the project was to examine preferences for cars among men and women and investigate the extent to which safety was a factor in determining preferences. \nThe survey was conducted by research assistants who asked people in car parks to participate and administered a structured questionnaire\ufffc. They were instructed to obtain data from men and women with small, medium and large cars, with 50 people per group for a total of 300 respondents. (The sample size was based on power requirements for another part of the survey that involved anthropometric measurements.) The research assistants approached people in car parks of the University of Newcastle and nearby shopping centres during December 1997 and January 1998. \nThe data consist of 300 records each with 22 variables. The variables are: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nID\n\nIdentification number of respondent\n\nAge\n\nAge of respondent (years)\n\nSex\n\n1=female, 2=male\n\nLicYr\n\nTime they have held a full driving licence, in years and months (years)\n\nLicMth\n\nTime they have held a full driving licence, in years and months (months)\n\nActCar\n\nMake, model and year of car most often driven, coded to size of car 1=small, 2=medium, 3=large\n\nKids5\n\nChildren under five, 1=yes, 2=no\n\nKids6\n\nChildren 6 to 16, 1=yes, 2=no\n\nPrefCar\n\nPreferred car, coded to size of car 1=small, 2=medium, 3=large\n\nCar15k\n\nPreferred type of car if cost $15000, 1=small new car; 2=large second-hand car\n\nReason\n\n1=safety, 2=reliability, 3=cost, 4=performance, 5=comfort, 6=looks\n\nCost\n\nHow important is cost when buying a car? 1=not important, 2=little importance, 3=important, 4=very important\n\nReliable\n\nHow important is reliability ...?\n\nPerform\n\nHow important is performance ...?\n\nFuel\n\nHow important is fuel consumption ...?\n\nSafety\n\nHow important is safety ...?\n\nAC/PS\n\nHow important is air conditioning/power steering ...?\n\nPark\n\nHow important is ease of parking ...?\n \nRoom\n \nHow important is space/roominess ...?\n \nDoors\n \nHow important is the number of doors ...?\n \nPrestige\n \nHow important is prestige/style ...?\n \nColour\n \nHow important is colour ...?\n", "url": "http://www.statsci.org/data/oz/carprefs.txt", "filename": "carprefs", "name": "Car Preferences", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Do you use up the same amount of the soap in the shower each morning, or does it depend on the size of the bar of soap? This data was collected by Rex Boggs of Glenmore State High School in Rockhampton, Queensland. Rex writes: \nI had a hypothesis that the daily weight of my bar of soap in my shower wasn't a linear function, the reason being that the tiny little bar of soap at the end of its life seemed to hang around for just about ever. I wanted to throw it out, but I felt I shouldn't do so until it became unusable. And that seemed to take weeks. \nAlso I had recently bought some digital kitchen scales and felt I needed to use them to justify the cost. I hypothesised that the daily weight of a bar of soap might be dependent upon surface area, and hence would be a quadratic function. \nI kept records for three weeks (the life of the bar), and was amazed to find that the data was linear with a very high R2 value, until the last few days of its life. \nThe data ends at day 22. On day 23 the soap broke into two pieces and one piece went down the plughole ... \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDate \n\nDate of observation\n\nDay \n\nNumber of days since beginning of experiment\n\nWeight \n\nWeight of soap bar (grams)\n\n\n\n", "url": "http://www.statsci.org/data/oz/soap.txt", "filename": "soap", "name": "Bar of Soap", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are for specimens of 50 varieties of timber, for modulus of rigidity, modulus of elasticity and air dried density, arranged in increasing order of magnitude of the density. ", "url": "http://www.statsci.org/data/oz/timber.txt", "filename": "timber", "name": "Timber Data", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A soft drink bottler is analyzing vending machine service routes in his distribution system. He is interested in predicting the amount of time required by the route driver to service the vending machines in an outlet. This service activity including stocking the machine with beverage products and minor maintenance or housekeeping. The industrial engineer responsible for the study has suggested that the two most important variables affecting the delivery time are the number of cases of product stocked and the distance walked by the route driver. The engineer has collected 25 observations on delivery time (minutes), number of cases and distance walked (feet).", "url": "http://www.statsci.org/data/general/softdrin.txt", "filename": "softdrin", "name": "Soft Drink Delivery Times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Impact strength of insulation cuts in foot-pounds. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nLot\n\nLot of insulating material\n\nCut\n\nLengthwise (Length) or crosswise (Cross)\n\nStrength\n\nImpact strength in foot-pounds\n\n\n\n", "url": "http://www.statsci.org/data/general/insulate.txt", "filename": "insulate", "name": "Impact Strength Of Insulation Cuts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data was collected by Stewart Fischer and David Tippetts, statistics students at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \nThe experiment decided upon was to see if by using two different designs of paper aeroplane, how far the plane would travel. In considering this, the question arose, whether different types of paper and different angles of release would have any effect on the distance travelled. Knowing that paper aeroplanes are greatly influenced by wind, we had to find a way to eliminate this factor. We decided to perform the experiment in a hallway of the University, where the effects of wind can be controlled to some extent by closing doors. \nIn order to make the experimental units as homogeneous as possible we allocated one person to a task, so person 1 folded and threw all planes, person 2 calculated the random order assignment, measured all the distances, checked that the angles of flight were right, and checked that the plane release was the same each time. \nThe factors that we considered each had two levels as follows: \nPaper: A4 size, 80gms and 50gms\nDesign: High Performance Dual Glider, and Incredibly Simple Glider (patterns attached to original report)\nAngle of release: Horizontal, or 45 degrees upward. \nThe random order assignment was calculated using the random number function of a calculator. Each combination of factors was assigned a number from one to eight, the random numbers were generated and accordingly the order of the experiment was found. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in mm\n\nPaper\n\n80gms = 1, 50gms = 2\n\nAngle\n\nHorizontal = 1, 45 degrees = 2\n\nDesign\n\nHigh-performance = 1, Incredibly simple = 2\n\nOrder\n\nOrder in which the runs were conducted\n", "url": "http://www.statsci.org/data/oz/planes.txt", "filename": "planes", "name": "Paper Plane Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "\"Discovery Day\" is a day set aside by the United States Naval Postgraduate School in Monterey, California, to invite the general public into its laboratories. On Discovery Day, 21 October 1995, data on reaction time and hand-eye coordination were collected on 118 members of the public who visited the Human Systems Integration Laboratory. The age and sex of each subject were also recorded. Visitors were mostly in family groups. \nOne experiment which demonstrates motor learning and hand-eye coordination, is rotary pursuit tracking. The equipment used has a rotating disk with a 3/4\" target spot. The subject’s task is to maintain contact with the target spot with a metal wand. Trials were conducted for 15 seconds at a time, and the total contact time during the 15 seconds was recorded. Four trials were recorded for each of 108 subjects. \nThe target spot on the Circle tracker keeps constant speed in a circular path. The target spot on the Box tracker has varying speeds as it traverses the box, making the task potentially more difficult. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSex\n\nMale (M) or female (F)\n\nAge\n\nAge of subject in years\n\nShape\n\nBox or Circle\n\nTrial1\n\nContact time for 1st trial\n\nTrial2\n\nContact time for 2nd trial\n\nTrial3\n\nContact time for 3rd trial\n\nTrial4\n\nContact time for 4th trial\n\n\n\n", + "description": "\"Discovery Day\" is a day set aside by the United States Naval Postgraduate School in Monterey, California, to invite the general public into its laboratories. On Discovery Day, 21 October 1995, data on reaction time and hand-eye coordination were collected on 118 members of the public who visited the Human Systems Integration Laboratory. The age and sex of each subject were also recorded. Visitors were mostly in family groups. \nOne experiment which demonstrates motor learning and hand-eye coordination, is rotary pursuit tracking. The equipment used has a rotating disk with a 3/4\" target spot. The subject's task is to maintain contact with the target spot with a metal wand. Trials were conducted for 15 seconds at a time, and the total contact time during the 15 seconds was recorded. Four trials were recorded for each of 108 subjects. \nThe target spot on the Circle tracker keeps constant speed in a circular path. The target spot on the Box tracker has varying speeds as it traverses the box, making the task potentially more difficult. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSex\n\nMale (M) or female (F)\n\nAge\n\nAge of subject in years\n\nShape\n\nBox or Circle\n\nTrial1\n\nContact time for 1st trial\n\nTrial2\n\nContact time for 2nd trial\n\nTrial3\n\nContact time for 3rd trial\n\nTrial4\n\nContact time for 4th trial\n\n\n\n", "url": "http://www.statsci.org/data/general/tracking.txt", "filename": "tracking", "name": "Rotary Pursuit Tracking", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiment conducted by Bill Afantenou, second year statistics student at QUT. Here is his description of the experiment: \n``As I am a big pizza lover, I had much pleasure in involving pizza in my experiment. I became curious to find out the time it took for a pizza to be delivered to the front door of my house. I was interested to see how, by varying whether I ordered thick or thin crust, whether Coke was ordered with the pizza and whether garlic bread was ordered with the pizza, the response would be affected. \n``Because of my current financial status and limitation of time, I decided to have only two replicates, just to get a reasonable estimate of the variance. To decrease my financial burden I managed a deal with the manager of the pizza shop. I managed to get the pickup special, delivered to my house, which was the cheapest and smallest pizza made. I tried to repeat the experiment in as nearly as possible identical conditions to reduce `noise'. \n``I ordered the pizza from the same shop, being Domino's Pizza. To be consistent I ordered a Supreme pizza each time at approximately the same time of day. The response was measured from the time I closed the telephone to the time the pizza was delivered to the front door of my house. \n``I wrote each of the eight treatments on a piece of paper twice, put them all into a hat, mixed them up, and took them out one at a time to allocate the order in which each treatment was done. \n``As well as the response and treatment for each pizza delivery the actual hour of delivery was recorded, also the order in which the treatments were done and whether the driver was male or female.'' \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nCrust\n\nThin=0, Thick=1\n\nCoke\n\nNo=0, Yes=1\n\nBread\n\nGarlic bread. No=0, Yes=1\n\nDriver\n\nMale=M, Female=F\n\nHour\n\nTime of order in hours since midnight\n\nDelivery\n\nDelivery time in minutes\n", "url": "http://www.statsci.org/data/oz/pizza.txt", "filename": "pizza", "name": "Pizza Delivery Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment is conducted to compare the energy requirements of three physical activities: running, walking and bicycle riding. Eight subjects are asked to run, walk and bicycle a measured distance, and the number of kilocalories expended per kilometre is determined for each subject during each activity. The activities are run in random order with time for recovery between activities. Each activity was monitored exactly once for each individual. ", "url": "http://www.statsci.org/data/general/energy.txt", "filename": "energy", "name": "Energy Requirements Running, Walking and Cycling", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data set comprises the results of a saturated 215-11 fractional factorial with 4 observations per run. There were 15 controllable factors. The responses are the proportional shrinkage of four samples taken from 3000-foot lengths of speedometer cable manufactured at each set of conditions. The objective was to reduce the post-extrusion shrinkage of the speedometer casing. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nA\n\nline OD\n\nB\n\nliner die\n\nC\n\nliner material\n\nD\n\nliner line speed\n\nE\n\nwire braid type\n\nF\n\nbraiding tension\n\nG\n\nwire diameter\n\nH\n\nliner tension\n\nI\n\nliner temperature\n\nJ\n\ncosting material\n\nK\n\ncoating die type\n\nL\n\nmelt temperature\n\nM\n\nscreen pack\n\nN\n\ncooling method\n\nO\n\nline speed\n\ny1\n\nshrinkage value of first sample\n\ny2\n\nshrinkage value of second sample\n\ny3\n\nshrinkage value of third sample\n\ny4\n\nshrinkage value of fourth sample\n", "url": "http://www.statsci.org/data/general/speedome.txt", "filename": "speedome", "name": "Speedometer-Cable Shrinkage", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are from a Proctor and Gamble study reported by Smith and Dubey (1964) on the amount of available chlorine in a product as a function of time since manufacture. Theoretical considerations lead to the model \nChlorine = a + (0.49 - a) exp{ -b (Weeks - 8) } \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nWeeks\n\nTime in weeks since manufacture\n\nChlorine\n\nAvailable chlorine\n", "url": "http://www.statsci.org/data/general/chlorine.txt", "filename": "chlorine", "name": "Available Chlorine", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the normalized magnitudes of the voice data when the vowel 'ooh' was sung at a pitch of 290 Hz. A Kurzweil K2500 Sampler/Synthesizer was used to capture and to store the data. \nThe frequencies found in the signal can be used to identify the phonetical vowel, and are of interest in voice synthesis, therapy and training. Further details are given in Oliver (1997). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMagnitude\n\nNormalized Magnitudes at equi-spaced time intervals\n\n\n\n", "url": "http://www.statsci.org/data/general/ooh.txt", "filename": "ooh", "name": "Voice Data from Singing the Vowel 'ooh'", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In studies aimed at characterising an author's style, samples of n words are taken and the number of function words in each sample counted. Often binomial or Poisson distributions are assumed to hold for the proportions of function words. The table shows the combined frequencies (x) of the articles \"the\", \"a\" and \"an\" in samples from Macauley's \"Essay on Milton\", taken from the Oxford edition of Macualey's (1923) literary essays. Non-overlapping samples were drawn from opening words of two randomly chosen lines from each of 50 pages of printed text, 10 word samples being simply extensions of 5 word samples. The data show clear evidence of underdispersion.", "url": "http://www.statsci.org/data/oz/wdcount.txt", "filename": "wdcount", "name": "Underdispersed Word Counts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the ‘underlying issues’: the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission’s National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission’s discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", + "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the \u2018underlying issues': the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission's National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission's discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", "url": "http://www.statsci.org/data/oz/custody.txt", "filename": "custody", "name": "Aboriginal Deaths in Custody", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Facts on the countries of Asia. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCountry \n\nName\n\nArea \n\nTotal area (sq km)\n\nPopulation \n\nPopulation July 1995 est.\n\nLife \n\nLife Expectancy 1995 est. (years)\n\nGDP \n\nGDP 1994 (US$ billions)\n\nGDP/caput \n\nGDP per person 1994 est (US$)\n\n\n\n", "url": "http://www.statsci.org/data/oz/asia.txt", "filename": "asia", "name": "Countries of Asia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Administration" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the survival status of passengers on the Titanic, together with their names, age, sex and passenger class. \nAbout half of the ages for the 3rd Class passengers are missing, although a good many of these could be filled in from the original source below. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName\n\nRecorded name of passenger\n\nPClass\n\nPassenger class: 1st, 2nd or 3rd\n\nAge\n\nAge in years\n\nSex\n\nmale or female\n\nSurvived\n\n1 = Yes, 0 = No\n\n\n\n", "url": "http://www.statsci.org/data/general/titanic.txt", "filename": "titanic_", "name": "Passengers on the Titanic", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For each of ten streets with bike lanes, investigators measured the distance between the centre line and a cylist in the bike lane. They used photography to determine the distance between the cyclist and a passing car on those same ten streets, recording all distances in feet. \n", "url": "http://www.statsci.org/data/general/cyclist.txt", "filename": "cyclist", "name": "Distance of Cars from Cyclists", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hourly carbon monoxide (CO) averages were recorded on summer weekdays at a measurement station in Los Angeles. The station was established by the Environmental Protection Agency as part of a larger study to assess the effectiveness of the catalytic converter. It was located about 25 feet from the San Diego Freeway, which in this particular area is located at 145 degrees north. It was located such that winds from 145 to 325 degress (which in the summer are the prevalent wind directions during the daylight hours) transport the CO emissions from the highway toward the measurement station. Aggregate measurements were recored for each hour of the day 1 to 24. \nHour \n- \nhour of the day, from midnight to midnight \nCO \n- \naverage summer weekday CO concentration (parts per million) \nTD \n- \naverage weekday traffic density (traffic count/traffic speed) \nWS \n- \naverage perpendicular wind-speed component,\nwind speed x cos(wind direction - 235 degrees) \n\nIt would be interesting to have wind speed and direction recorded separately. ", "url": "http://www.statsci.org/data/general/cofreewy.txt", "filename": "cofreewy", "name": "Carbon Monoxide from a Freeway", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a highly fractionated two-level factorial design employed as a screening design in an off-line welding experiment performed by the National Railway Corporation of Japan. There were 16 runs and 9 experimental factors. The response variable is the observed tensile strength of the weld, one of several quality characteristics measured. All other variables are at plus and minus levels. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nRods\n\nKind of welding rods\n\nDrying\n\nPeriod of drying\n\nMaterial\n\nWelded material\n\nThickness\n\nThickness\n\nAngle\n\nAngle\n\nOpening\n\nOpening\n\nCurrent\n\nCurrent\n\nMethod\n\nWelding method\n\nPreheating\n\nPreheating\n\nStrength\n\nTensile strength of the weld in kg/mm\n", "url": "http://www.statsci.org/data/general/welding.txt", "filename": "welding", "name": "Tensile Strength of Welds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Military pilots sometimes black out when their brains are deprived of oxygen due to G-forces during violent maneuvers. Glaister and Miller (1990) produced similar symptoms by exposing volunteers’ lower bodies to negative air pressure, likewise decreasing oxygen to the brain. The data lists the subjects' ages and whether they showed syncopal blackout related signs (pallor, sweating, slow heartbeat, unconsciousness) during an 18 minute period. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nInitials of the subject's name\n\nAge\n\nSubject's age in years\n\nSigns\n\nWhether subject showed blackout-related signs (0=No, 1=Yes)\n", + "description": "Military pilots sometimes black out when their brains are deprived of oxygen due to G-forces during violent maneuvers. Glaister and Miller (1990) produced similar symptoms by exposing volunteers' lower bodies to negative air pressure, likewise decreasing oxygen to the brain. The data lists the subjects' ages and whether they showed syncopal blackout related signs (pallor, sweating, slow heartbeat, unconsciousness) during an 18 minute period. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nInitials of the subject's name\n\nAge\n\nSubject's age in years\n\nSigns\n\nWhether subject showed blackout-related signs (0=No, 1=Yes)\n", "url": "http://www.statsci.org/data/general/gforces.txt", "filename": "gforces", "name": "G-Induced Loss of Consciousness", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Travel" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the number of deaths cuased by firearms in Australia from 1983 to 1997, expressed as a rate per 100,000 of population. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\nYear\n\nRate \n\nNumber of deaths caused by firearms per 100,000 population\n", "url": "http://www.statsci.org/data/oz/firearms.txt", "filename": "firearms", "name": "Deaths Caused by Firearms", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Criminologists are interested in the effect of punishment regimes on crime rates. This has been studied using aggregate data on 47 states of the USA for 1960. The data set contains the following columns: \n \nVariable\n \nDescription\n\nM\n\npercentage of males aged 14–24 in total state population\n\nSo\n\nindicator variable for a southern state \n\nEd\n\nmean years of schooling of the population aged 25 years or over\n\nPo1\n\nper capita expenditure on police protection in 1960 \n\nPo2\n\nper capita expenditure on police protection in 1959 \n\nLF\n\nlabour force participation rate of civilian urban males in the age-group 14-24\n\nM.F\n\nnumber of males per 100 females \n\nPop\n\nstate population in 1960 in hundred thousands\n\nNW\n\npercentage of nonwhites in the population \n\nU1\n\nunemployment rate of urban males 14–24 \n\nU2\n\nunemployment rate of urban males 35–39 \n\nWealth\n\nwealth: median value of transferable assets or family income\n\nIneq\n\nincome inequality: percentage of families earning below half the median income\n\nProb\n\nprobability of imprisonment: ratio of number of commitments to number of offenses\n\nTime\n\naverage time in months served by offenders in state prisons before their first release\n\nCrime\n\ncrime rate: number of offenses per 100,000 population in 1960\n", + "description": "Criminologists are interested in the effect of punishment regimes on crime rates. This has been studied using aggregate data on 47 states of the USA for 1960. The data set contains the following columns: \n \nVariable\n \nDescription\n\nM\n\npercentage of males aged 14\u201324 in total state population\n\nSo\n\nindicator variable for a southern state \n\nEd\n\nmean years of schooling of the population aged 25 years or over\n\nPo1\n\nper capita expenditure on police protection in 1960 \n\nPo2\n\nper capita expenditure on police protection in 1959 \n\nLF\n\nlabour force participation rate of civilian urban males in the age-group 14-24\n\nM.F\n\nnumber of males per 100 females \n\nPop\n\nstate population in 1960 in hundred thousands\n\nNW\n\npercentage of nonwhites in the population \n\nU1\n\nunemployment rate of urban males 14\u201324 \n\nU2\n\nunemployment rate of urban males 35\u201339 \n\nWealth\n\nwealth: median value of transferable assets or family income\n\nIneq\n\nincome inequality: percentage of families earning below half the median income\n\nProb\n\nprobability of imprisonment: ratio of number of commitments to number of offenses\n\nTime\n\naverage time in months served by offenders in state prisons before their first release\n\nCrime\n\ncrime rate: number of offenses per 100,000 population in 1960\n", "url": "http://www.statsci.org/data/general/uscrime.txt", "filename": "uscrime", "name": "Effect of Punishment Regimes on Crime Rates", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A number of homicide incidents in Australia have involved multiple killings. A multiple killing is defined as any incident where two or more persons are murdered. According to available literature, there have been 24 multiple killings by firearm between 1987 and 1996. These resulted in 128 deaths. The data give the number of multiple killings which have been recorded for the period 1987 to 28 April 1996. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\n1987 - 1996.\n\nIncidents \n\nNumber of multiple killings\n\nDeaths \n\nTotal number of deaths\n\n\n\n\nThe data for the year 1996 include killings only up to and including 28 April.", "url": "http://www.statsci.org/data/oz/multkill.txt", "filename": "multkill", "name": "Multiple Killings Committed with a Firearm", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Crime" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An individual's critical flicker frequency is the highest frequency at which the flicker in a flickering light source can be detected. At frequencies above the critical frequency, the light source appears to be continuous even though it is actually flickering. This investigation recorded critical flicker frequency and iris colour of the eye for 19 subjects. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nColour\n\nEye colour: Brown, Green or Blue\n\nFlicker\n\nCritical flicker frequency in cycles/sec\n", "url": "http://www.statsci.org/data/general/flicker.txt", "filename": "flicker", "name": "Eye Colour and Flicker Frequency", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Population" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data were collected as part of a time study for Telecom, now known as Telstra. The purpose if the study was to model the total hours worked in a section of Telecom in terms of the counts of various tasks. It was hoped that such a model could be used to predict hours worked and hence staffing requirements in changing circumstances. The number of hours worked by employees in a fault reporting centre were recorded, together with the number of faults of each type which were recorded. \nEmployees often work on a flexitime system which allows them to build up time and to leave early every second Friday. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nNumber of hours worked\n\nByDa\n\nNumber of talks of a certain type\n\nPR\n\n\n\nRWT\n\nA type of fault variable \n\nFault\n\n\n\nSOA\n\nNumber of service orders of type A \n\nSOB\n\nNumber of service orders of type B \n\nSOC\n\nNumber of service orders of type C \n\nCable\n\n\n\nField\n\nField call \n\nHot\n\nHotline \n\nREST\n\n\n\nSpec\n\n\n\nApp\n\n\n\nProb\n\n\n\nSC\n\n\n\nHO\n\n\n\nMO\n\n\n\nDay\n\nDay of the week: 1-Monday, 2-Tuesday, 3-Wednesday, 4-Thursday, 5-Friday \n", "url": "http://www.statsci.org/data/oz/telecom.txt", "filename": "telecom", "name": "Telecom Work Measurement Study", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An industrial Taguchi experiment was performed to study the influence of several controllable factors on the mean value and the variation in the percentage of shrinkage of products made by injection moulding. For studying the variation, three noise factors were also included in the design. All factors were set at two levels. \nThe problem is a `nominal-is-best' problem where the aim is to reach a certain tartet for the percentage shrinkage, at the same time having as small as variation as possible about the target value. The design that was applied is a so-called Taguchi L8(27)-design with seven controllable factors. At each setting of the controllable factors, the noise factors were varied according to a Taguchi L4(23)-design. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nControllable Factors:\n\nCycle\n\nCycle time\n\nMould\n\nMould temperature\n\nCavity\n\nCavity thickness\n\nPressure\n\nHolding pressure\n\nSpeed\n\nInjection speed\n\nTime\n\nHolding time\n\nGate\n\nGate size\n\nNoise Factors:\n\nRegrind\n\nPercentage regrind\n\nMoisture\n\nMoisture content\n\nTemperature\n\nAmbient temperature\n\nResponse:\n\nShrinkage\n\nPercentage shrinkage\n", "url": "http://www.statsci.org/data/general/injmould.txt", "filename": "injmould", "name": "Injection Moulding Shrinkage", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the ambient temperature and the number of primary O-rings damaged for 23 of the 24 space shuttle launches before the launch of the space shuttle Challenger on January 20, 1986. (Challenger was the 25th shuttle. One engine was lost at sea and could not be examined.) Each space shuttle contains 6 primary O-rings. \nThe forecast temperate of the launching day of the Challenger was 31 degrees F. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTemp\n\nAmbient temperature\n\nDamaged\n\nNumber of O-rings damaged\n", "url": "http://www.statsci.org/data/general/challenger.txt", "filename": "challenger", "name": "Space Shuttle Challenger", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data consist of failures of a piece of electronic equipment operating in two modes. For each operating period, Mode1 is the time spent operating in one mode and Mode2 is the time spent operating in the other. The total number of failures recorded in each period is recorded. \n \n\n\n \nVariable \n \nDescription\n \n\n\n \nMode1 \n \nTime in operating mode 1\n \nMode2 \n \nTime in operating mode 2\n \nFailures\n \nNumber of failures\n \n\n\n", "url": "http://www.statsci.org/data/general/twomodes.txt", "filename": "twomodes", "name": "Failures of Electronic Equipment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Technology" } ] }, { "name": "Physics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Why does the moon appear to be so much larger when it is near the horizon than when it is directly overhead? This question has produced a wide variety of theories from psychologists. An important early hypothesis was put forth by Holway and Boring (1940) who suggested that the illusion was due to the fact that when the moon was on the horizon, the observer looked straight at it with eyes level, whereas when it was at its zenith, the observer had to elevate his or her eyes as well as his or her head to see it. To test this hypothesis, Kaufman and Rock (1962) devised an apparatus that allowed them to present two artificial moons, one at the horizon and one at the zenith, and to control whether the subjects elevated their eyes or kept them level to see the zenith moon. The horizon, or comparison, moon was always viewed with eyes level. Subjects were asked to adjust the variable horizon moon to match the size of the zenith moon or vice versa. For each subject the ratio of the perceived size of the horizon moon to the perceived size of the zenith moon was recorded with eyes elevated and with eyes level. A ratio of 1.00 would represent no illusion. If Holway and Boring were correct, there should be a greater illusion in the eyes-elevated condition than in the eyes-level condition.", "url": "http://www.statsci.org/data/general/moon.txt", "filename": "moon", "name": "The Moon Illusion", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the magnitudes of a variable star at midnight on 600 consecutive nights. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStar\n\nMagnitude on each night\n", "url": "http://www.statsci.org/data/general/star.txt", "filename": "star", "name": "Magnitudes of a Variable Star", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives a sequence of observations on the magnitude of a variable Cepheid star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Cepheids are very bright stars with periods of 1-70 days. The light curve has an asymmetric shape, and rises more rapidly than it falls. Cepheids with periods of about 1 week tend to have a bump in the descending part of the curve. For periods of about 10 days, the bump is at the peak of the curve, and for longer periods it is on the rising part of the curve. The brightness changes are caused by periodic pulsation (contraction and expansion) of the stars and their outer layers. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 Cepheid (magnitude -9.166) at coordinates (1541.5,1395.1). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", "url": "http://www.statsci.org/data/oz/ceph1.txt", "filename": "ceph1", "name": "Magnitudes of Variable Star Cepheid 1", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives observations on the magnitude of a Cepheid variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is Cepheid star number 2. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", "url": "http://www.statsci.org/data/oz/ceph2.txt", "filename": "ceph2", "name": "Magnitudes of Variable Star Cepheid 2", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives a sequence of observations on the magnitude of an eclipsing binary variable star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Eclipsing binaries consist of two stars orbiting each other in a conformation relative to the observer such that brightness variability occurs as one star passes in front of the other in turn; as the stars may be of different brightness, the drop in light flux depends on which star is in the front. These stars have periods of between 3 hours and 24 years, although 0.5 to 10 days is the most common range. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 eclipsing binary (magnitude -10.26) at coordinates (1617.8, 669.35). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", "url": "http://www.statsci.org/data/oz/ecbi1041.txt", "filename": "ecbi1041", "name": "Magnitudes of an Eclipsing Binary Variable Star", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRab Lyrae star number 1061, a category 1 star with an asymmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", "url": "http://www.statsci.org/data/oz/rrl1061.txt", "filename": "rrl1061", "name": "Magnitudes of Variable Star RR Lyrae 1061", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1198, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", "url": "http://www.statsci.org/data/oz/rrl1198.txt", "filename": "rrl1198", "name": "Magnitudes of Variable Star RR Lyrae 1198", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1263, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", "url": "http://www.statsci.org/data/oz/rrl1263.txt", "filename": "rrl1263", "name": "Magnitudes of Variable Star RR Lyrae 1263", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Astronomy" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are the result of a study involving the analysis of performance degradation data from accelerated tests. The response variable is dialectric breakdown strength in kilo-volts, and the predictor variables are time in weeks and temperature in degrees Celcius. The study can be viewed as an 8 by 4 factorial experiment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStrength\n\nDialectric breakdown strength in kilovolts\n\nTime\n\nDuration of testing in weeks (8 levels)\n\nTemperature\n\nTemperature in degrees Celsius (4 levels)\n", "url": "http://www.statsci.org/data/general/dialectr.txt", "filename": "dialectr", "name": "Dialectric Breakdown Strength", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Electronics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give time series measurements on waves emanating from a cylinder suspended in a tank of water. The waves are believed to show a high frequency vibration, which is an artifact of the experiment equipment, as well as lower frequency vibration which reflects forces acting on the cylinder. It is of interest to identify and to filter out the high frequency vibration. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nWaves\n\nRelative vertical displacement at equi-spaced times\n", "url": "http://www.statsci.org/data/general/waves.txt", "filename": "waves_", "name": "Forces on a Cylinder Suspended in Water", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" } ] }, { "name": "Chemistry", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the concentrations at equi-spaced times of an intermediate compound during a chemical experiment involving a catalyst. The experiment was conducted in the Department of Chemistry at the Australian National University. The compound is producing exponentially during the first stage of the experiment and then is consumed exponentially during the second stage. Theoretically the process can be described by a compartment model, and the expected concentration of the compound over time is decribed by a second order differential equation. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nConcentration\n\nConcentration of intermediate compound\n", "url": "http://www.statsci.org/data/oz/sargeson.txt", "filename": "sargeson", "name": "Chemical Experiment with Catalyst", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset is distributed with S-Plus as the dataframe ethanol. \nThe engine exhaust was analysed in an experiment in which ethanol was burned in a single cylinder automobile test engine. The response variable is NOx, the concentration of nitric oxide (NO) and nitrogen dioxide (NO2) in the engine exhaust, normalized by the work done by the engine. The explanatory variables are the compression ratio of the engine and the equivalence ratio at which the engine was run - a measure of the richness of the air/ethanol mix. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nNOx\n\nConcentration of nitric oxide (NO) and nitrogen dioxide (NO2)\n\nCompression\n\nCompression ratio\n\nEquivalence\n\nEquivalence ratio\n", "url": "http://www.statsci.org/data/general/ethanol.txt", "filename": "ethanol", "name": "Exhaust from Burning Ethanol", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "General" } ] } ] } diff --git a/data/datasets/Rdatasets.json b/data/datasets/Rdatasets.json index 28a9f7a37..2ccba5626 100644 --- a/data/datasets/Rdatasets.json +++ b/data/datasets/Rdatasets.json @@ -1,19349 +1,12897 @@ { "name": "Rdatasets", "categories": [ { "name": "Medicine", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A clinical trial to evaluate the efficacy of maintenance chemotherapy for acute myelogenous leukaemia was conducted by Embury et al. (1977) at Stanford University. After reaching a stage of remission through treatment by chemotherapy, patients were randomized into two groups. The first group received maintenance chemotherapy and the second group did not. The aim of the study was to see if maintenance chemotherapy increased the length of the remission. The data here formed a preliminary analysis which was conducted in October 1974.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/bigcity.csv", "filename": "aml", "name": "Remission Times for Acute Myelogenous Leukaemia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data consist of measurements made on patients with malignant melanoma. Each patient had their tumour removed by surgery at the Department of Plastic Surgery, University Hospital of Odense, Denmark during the period 1962 to 1977. The surgery consisted of complete removal of the tumour together with about 2.5cm of the surrounding skin. Among the measurements taken were the thickness of the tumour and whether it was ulcerated or not. These are thought to be important prognostic variables in that patients with a thick and/or ulcerated tumour have an increased chance of death from melanoma. Patients were followed until the end of 1977. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/melanoma.csv", "filename": "melanoma", "name": " Survival from Malignant Melanoma ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The treatment strategy for a patient diagnosed with cancer of the prostate depend highly on whether the cancer has spread to the surrounding lymph nodes. It is common to operate on the patient to get samples from the nodes which can then be analysed under a microscope but clearly it would be preferable if an accurate assessment of nodal involvement could be made without surgery. \nFor a sample of 53 prostate cancer patients, a number of possible predictor variables were measured before surgery. The patients then had surgery to determine nodal involvement. It was required to see if nodal involvement could be accurately predicted from the predictor variables and which ones were most important. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/nodal.csv", "filename": "nodal", "name": "Nodal Involvement in Prostate Cancer ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The remission data frame has 27 rows and 3 columns.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/remission.csv", "filename": "remission", "name": "Cancer Remission and Cell Activity ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A portion of an experiment to determine the limit of blank/limit of detection in a biochemical assay. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/LoBD.csv", "filename": "LoBD", "name": " Cancer drug data use to provide an example of the use of the skew power distributions. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a case-control study of (o)esophageal cancer in Ille-et-Vilaine, France. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/esoph.csv", "filename": "esoph", "name": "Smoking, Alcohol and (O)esophageal Cancer", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains a case-control indicator and 23 SNPs. \nThe inter-marker distances (Morgan) are as follows \n0.000090, 0.000158, 0.005000, 0.000100, 0.000200, 0.000150, 0.000250, 0.000200, 0.000050, 0.000350, 0.000300, 0.000250, 0.000350, 0.000350, 0.000800, 0.000100, 0.000200, 0.000150, 0.000550, 0.006000, 0.000700, 0.001000 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/cf.csv", "filename": "cf", "name": "Cystic fibrosis data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data arise from 31 male patients who have been treated for superficial bladder cancer, and give the number of recurrent tumours during a particular time after the removal of the primary tumour, along with the size of the original tumour. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/bladdercancer.csv", "filename": "bladdercancer", "name": " Bladder Cancer Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survival times in months after mastectomy of women with breast cancer. The cancers are classified as having metastized or not based on a histochemical marker. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/mastectomy.csv", "filename": "mastectomy", "name": " Survival Times after Mastectomy of Breast Cancer Patients ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NCI microarray data. The data contains expression levels on 6830 genes from 64 cancer cell lines. Cancer type is also recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/NCI60.csv", "filename": "NCI60", "name": "NCI 60 Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The alloauto data frame has 90 rows and 5 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/alloauto.csv", "filename": "alloauto", "name": "data from Section 1.9", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The bcdeter data frame has 92 rows and 3 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/bcdeter.csv", "filename": "bcdeter", "name": "data from Section 1.18", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data from the Connecticut Tumor Registry present age-adjusted numbers of melanoma skin-cancer incidences per 100,000 people in Connectict for the years from 1936 to 1972. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lattice/melanoma.csv", "filename": "H_melanoma", "name": " Melanoma skin cancer incidence ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame from a trial of 42 leukaemia patients. Some were treated with the drug 6-mercaptopurine and the rest are controls. The trial was designed as matched pairs, both withdrawn from the trial when either came out of remission. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/gehan.csv", "filename": "gehan", "name": " Remission Times of Leukaemia Patients ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame of data from 33 leukaemia patients. \nSurvival times are given for 33 patients who died from acute myelogenous leukaemia. Also measured was the patient's white blood cell count at the time of diagnosis. The patients were also factored into 2 groups according to the presence or absence of a morphologic characteristic of white blood cells. Patients termed AG positive were identified by the presence of Auer rods and/or significant granulation of the leukaemic cells in the bone marrow at the time of diagnosis. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/leuk.csv", "filename": "leuk", "name": " Survival Times and White Blood Counts for Leukaemia Patients ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Melanoma data frame has data on 205 patients in Denmark with malignant melanoma. ", "url": "http://vincentarelbundock.github.io/Rdatasets/doc/MASS/Melanoma.html", "filename": "Melanoma", "name": " Survival from Malignant Melanoma ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Veteran's Administration lung cancer trial from Kalbfleisch & Prentice. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/VA.csv", "filename": "VA", "name": " Veteran's Administration Lung Cancer Trial ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A set of 146 patients with stage C prostate cancer, from a study exploring the prognostic value of flow cytometry.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/rpart/stagec.csv", "filename": "stagec", "name": "Stage C Prostate Cancer", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cancer survival with ascorbate supplement ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CancerSurvival.csv", "filename": "CancerSurvival", "name": "Survival Times for Different Cancers", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Treatment results for leukemia patients \nA study involved 51 untreated adult patients with acute myeloblastic leukemia who were given a course of treatment, after which they were assessed as to their response. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Leukemia.csv", "filename": "Leukemia", "name": "Responses to Treatment for Leukemia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on recurrences of bladder cancer, used by many people to demonstrate methodology for recurrent event modelling. \nBladder1 is the full data set from the study. It contains all three treatment arms and all recurrences for 118 subjects; the maximum observed number of recurrences is 9. \nBladder is the data set that appears most commonly in the literature. It uses only the 85 subjects with nonzero follow-up who were assigned to either thiotepa or placebo, and only the first four recurrences for any patient. The status variable is 1 for recurrence and 0 for everything else (including death for any reason). The data set is laid out in the competing risks format of the paper by Wei, Lin, and Weissfeld. \nBladder2 uses the same subset of subjects as bladder, but formatted in the (start, stop] or Anderson-Gill style. Note that in transforming from the WLW to the AG style data set there is a quite common programming mistake that leads to extra follow-up time for 12 subjects: all those with follow-up beyond their 4th recurrence. This \"follow-up\" is a side effect of throwing away all events after the fourth while retaining the last follow-up time variable from the original data. The bladder2 data set found here does not make this mistake, but some analyses in the literature have done so; it results in the addition of a small amount of immortal time bias and shrinks the fitted coefficients towards zero. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/bladder.csv", "filename": "bladder", "name": "Bladder Cancer Recurrences", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survival in patients with advanced lung cancer from the North Central Cancer Treatment Group. Performance scores rate how well the patient can perform usual daily activities. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/cancer.csv", "filename": "cancer", "name": "NCCTG Lung Cancer Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These are data from one of the first successful trials of adjuvant chemotherapy for colon cancer. Levamisole is a low-toxicity compound previously used to treat worm infestations in animals; 5-FU is a moderately toxic (as these things go) chemotherapy agent. There are two records per person, one for recurrence and one for death", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/colon.csv", "filename": "colon", "name": "Chemotherapy for Stage B/C colon cancer", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survival in patients with Acute Myelogenous Leukemia. The question at the time was whether the standard course of chemotherapy should be extended ('maintainance') for additional cycles.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/leukemia.csv", "filename": "leukemia", "name": "Acute Myelogenous Leukemia survival data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survival in patients with advanced lung cancer from the North Central Cancer Treatment Group. Performance scores rate how well the patient can perform usual daily activities. \nThe use of 1/2 for alive/dead instead of the usual 0/1 is a historical footnote. For data contained on punch cards, IBM 360 Fortran treated blank as a zero, which led to a policy within the section of Biostatistics to never use \"0\" as a data value since one could not distinguish it from a missing value. The policy became a habit, as is often the case; and the 1/2 coding endured long beyond the demise of punch cards and Fortran. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/lung.csv", "filename": "lung_", "name": "NCCTG Lung Cancer Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This simulated data set is based on a trial in acute myeloid leukemia. \nThis data set is used to illustrate multi-state survival curves. The correlation between within-subject event times strongly resembles that from an actual trial, but none of the actual data values are from that source. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/myeloid.csv", "filename": "myeloid", "name": "Acute myeloid leukemia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurement error example. Tumor histology predicts survival, but prediction is stronger with central lab histology than with the local institution determination. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/nwtco.csv", "filename": "nwtco", "name": "Data from the National Wilm's Tumor Study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survival in a randomised trial comparing two treatments for ovarian cancer", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/ovarian.csv", "filename": "ovarian", "name": "Ovarian Cancer Survival Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Randomised trial of two treatment regimens for lung cancer. This is a standard survival analysis data set.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/veteran.csv", "filename": "veteran", "name": "Veterans' Administration Lung Cancer study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Obel (1975) about a retrospective study of ovary cancer carried out in 1973. Information was obtained from 299 women, who were operated for ovary cancer 10 years before. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/OvaryCancer.csv", "filename": "OvaryCancer", "name": "Ovary Cancer Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Oncology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Although all cases of AIDS in England and Wales must be reported to the Communicable Disease Surveillance Centre, there is often a considerable delay between the time of diagnosis and the time that it is reported. In estimating the prevalence of AIDS, account must be taken of the unknown number of cases which have been diagnosed but not reported. The data set here records the reported cases of AIDS diagnosed from July 1983 and until the end of 1992. The data are cross-classified by the date of diagnosis and the time delay in the reporting of the cases. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/aids.csv", "filename": "aids", "name": "Delay in AIDS Reporting in England and Wales", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "CD4 cells are carried in the blood as part of the human immune system. One of the effects of the HIV virus is that these cells die. The count of CD4 cells is used in determining the onset of full-blown AIDS in a patient. In this study of the effectiveness of a new anti-viral drug on HIV, 20 HIV-positive patients had their CD4 counts recorded and then were put on a course of treatment with this drug. After using the drug for one year, their CD4 counts were again recorded. The aim of the experiment was to show that patients taking the drug had increased CD4 counts which is not generally seen in HIV-positive patients.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/cd4.csv", "filename": "CD4", "name": "CD4 Counts for HIV-Positive Patients", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The aids data frame has 295 rows and 3 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/aids.csv", "filename": "aids_", "name": "data from Section 1.19", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The azt data frame has 45 rows and 4 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/azt.csv", "filename": "azt", "name": "data from Exercise 4.7, p122", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data set from a study of Unstructured Treatment Interruption in HIV-infected adolescents in four institutions in the US. The main outcome is the HIV-1 RNA viral load, which is subject to censoring below the lower limit of detection of the assay (50 copies/mL). The censored observations are indicated by the variable RNAcens", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lmec/UTIdata.csv", "filename": "UTIdata", "name": "Data set for Unstructured Treatment Interruption Study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on patients diagnosed with AIDS in Australia before 1 July 1991. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Aids2.csv", "filename": "Aids2", "name": " Australian AIDS Survival Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The CHAIN project was a longitudinal cohort study of people living with HIV in New York City, which was recruited in 1994 from a large number of medical care and social service agencies serving HIV in New York City. This subset of data pertain to the sixth round of interviews. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mi/CHAIN.csv", "filename": "CHAIN", "name": " Subset of variables from the CHAIN project ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "AIDS" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In 1961 Doll and Hill sent out a questionnaire to all men on the British Medical Register enquiring about their smoking habits. Almost 70% of such men replied. Death certificates were obtained for medical practitioners and causes of death were assigned on the basis of these certificates. The breslow data set contains the person-years of observations and deaths from coronary artery disease accumulated during the first ten years of the study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/breslow.csv", "filename": "breslow", "name": "Smoking Deaths Among Doctors ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The co.transfer data frame has 7 rows and 2 columns. Seven smokers with chickenpox had their levels of carbon monoxide transfer measured on entry to hospital and then again after 1 week. The main question being whether one week of hospitalization has changed the carbon monoxide transfer factor. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/co.transfer.csv", "filename": "co_transfer", "name": "Carbon Monoxide Transfer ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A simple data set with only 6 observations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/smoking.csv", "filename": "smoking", "name": "smoking", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 46 observations from 1963 to 1992 \nnumber of observations : 1380 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Cigar.csv", "filename": "Cigar", "name": "Cigarette Consumption ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 48 observations from 1985 to 1995 \nnumber of observations : 528 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Cigarette.csv", "filename": "Cigarette", "name": "The Cigarette Consumption Panel Data Set ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a meta-analysis on nicotine gum and smoking cessation ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/smoking.csv", "filename": "smoking2", "name": " Nicotine Gum and Smoking Cessation ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on age, smoking, and mortality from a one-in-six survey of the electoral roll in Whickham, a mixed urban and rural district near Newcastle upon Tyne, in the UK. The survey was conducted in 1972-1974 to study heart disease and thyroid disease. A follow-up on those in the survey was conducted twenty years later. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Whickham.csv", "filename": "Whickham", "name": "Data from the Whickham survey", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Morality data over 20 years for 1314 women from Whickham, England \nTwenty-year mortality, smoking status, and age for 1314 women in Whickham, England. We have named this Whickham2 to distinguish it from Whickham, which is a file in the mosaicData package. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Whickham2.csv", "filename": "Whickham2", "name": "Whickham Health Study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cigarette Consumption Panel Data ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/CigarettesSW.csv", "filename": "CigarettesSW", "name": "Cigarette Consumption Panel Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Smoking" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Howard Grimes from the Botany Department, North Carolina State University, conducted an experiment for biochemical analysis of intracellular storage and transport of calcium across plasma membrane. Cells were suspended in a solution of radioactive calcium for a certain length of time and then the amount of radioactive calcium that was absorbed by the cells was measured. The experiment was repeated independently with 9 different times of suspension each replicated 3 times. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/calcium.csv", "filename": "calcium", "name": "Calcium Uptake Data ", "number_format": 0, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are from an experiment which was designed to look for a relationship between a certain genetic characteristic and handedness. The 37 subjects were women who had a son with mental retardation due to inheriting a defective X-chromosome. For each such mother a genetic measurement of their DNA was made. Larger values of this measurement are known to be linked to the defective gene and it was hypothesized that larger values might also be linked to a progressive shift away from right-handednesss. Each woman also filled in a questionnaire regarding which hand they used for various tasks. From these questionnaires a measure of hand preference was found for each mother. The scale of this measure goes from 1, indicating someone who always favours their right hand, to 8, indicating someone who always favours their left hand. Between these two extremes are people who favour one hand for some tasks and the other for other tasks. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/claridge.csv", "filename": "claridge", "name": "Genetic Links to Left-handedness ", "number_format": 0, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data consist of measurements of the length and breadth of the heads of pairs of adult brothers in 25 randomly sampled families. All measurements are expressed in millimetres. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/frets.csv", "filename": "frets", "name": "Head Dimensions in Brothers ", "number_format": 0, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data gives the positions of the individual caveolae in a square region with sides of length 500 units. This grid was originally on a 2.65mum square of muscle fibre. The data are those points falling in the lower left hand quarter of the region used for the dataset caveolae.dat in the spatial package by B.D. Ripley (1994). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/cav.csv", "filename": "cav", "name": "Position of Muscle Caveolae ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "79 urine specimens were analyzed in an effort to determine if certain physical characteristics of the urine might be related to the formation of calcium oxalate crystals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/urine.csv", "filename": "urine", "name": "Urine Analysis Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Blackmore data frame has 945 rows and 4 columns. Blackmore and Davis's data on exercise histories of 138 teenaged girls hospitalized for eating disorders and 98 control subjects. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Blackmore.csv", "filename": "Blackmore", "name": "Exercise Histories of Eating-Disordered and Control Subjects", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Davis data frame has 200 rows and 5 columns. The subjects were men and women engaged in regular exercise. There are some missing data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Davis.csv", "filename": "Davis", "name": "Self-Reports of Height and Weight", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The DavisThin data frame has 191 rows and 7 columns. This is part of a larger dataset for a study of eating disorders. The seven variables in the data frame comprise a \"drive for thinness\" scale, to be formed by summing the items. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/DavisThin.csv", "filename": "DavisThin", "name": "Davis's Data on Drive for Thinness", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Subset of data on migraine treatments collected by Tammy Kostecki-Dillon. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/KosteckiDillon.csv", "filename": "KosteckiDillon", "name": "Treatment of Migraine Headaches", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These contrived repeated-measures data are taken from O'Brien and Kaiser (1985). The data are from an imaginary study in which 16 female and male subjects, who are divided into three treatments, are measured at a pretest, postest, and a follow-up session; during each session, they are measured at five occasions at intervals of one hour. The design, therefore, has two between-subject and two within-subject factors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/OBrienKaiser.csv", "filename": "OBrienKaiser", "name": "O'Brien and Kaiser's Repeated-Measures Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Robey data frame has 50 rows and 3 columns. The observations are developing nations around 1990. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Robey.csv", "filename": "Robey", "name": "Fertility and Contraception", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Contrived data on weight loss and self esteem over three months, for three groups of individuals: Control, Diet and Diet + Exercise. The data constitute a double-multivariate design. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/WeightLoss.csv", "filename": "WeightLoss", "name": " Weight Loss Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Random subset of the 1991 Arizona Medicare data for patients hospitalized subsequent to undergoing a CABG (DRGs 106, 107) or PTCA (DRG 112) cardiovascular procedure. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/azcabgptca.csv", "filename": "azcabgptca", "name": "azcabgptca", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data set relates to the hospital length of stay for patients having a CABG or PTCA (typel) heart procedure. The data comes from the 1995 Arizona Medicare data for DRG (Diagnostic Related Group) 112. Other predictors include gender(1=female) and age75 (1-age 75+). Type is labeled as 1=emergency or urgent admission; 0= elective. Length of stay (los) ranges from 1 to 53 days. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/azdrg112.csv", "filename": "azdrg112", "name": "azdrg112", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data come from the 1991 Arizona cardiovascular patient files. A subset of the fields was selected to model the differential length of stay for patients entering the hospital to receive one of two standard cardiovascular procedures: CABG and PTCA. CABG is the standard acronym for Coronary Artery Bypass Graft, where the flow of blood in a diseased or blocked coronary artery or vein has been grafted to bypass the diseased sections. PTCA, or Percutaneous Transluminal Coronary Angioplasty, is a method of placing a balloon in a blocked coronary artery to open it to blood flow. It is a much less severe method of treatment for those having coronary blockage, with a corresponding reduction in risk. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/azpro.csv", "filename": "azpro", "name": "azpro", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data come from the 1991 Arizona cardiovascular patient files. A subset of the fields was selected to model the differential length of stay for patients entering the hospital to receive one of two standard cardiovascular procedures: CABG and PTCA. CABG is the standard acronym for Coronary Artery Bypass Graft, where the flow of blood in a diseased or blocked coronary artery or vein has been grafted to bypass the diseased sections. PTCA, or Percutaneous Transluminal Coronary Angioplasty, is a method of placing a balloon in a blocked coronary artery to open it to blood flow. It is a much less severe method of treatment for those having coronary blockage, with a corresponding reduction in risk. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/azprocedure.csv", "filename": "azprocedure", "name": "azprocedure", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "From German health survey data for the year 1998 only. badhealth is saved as a data frame. Count models use numvisit as the response variable, 0 counts are included. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/badhealth.csv", "filename": "badhealth", "name": "badhealth", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from the Canadian National Cardiovascular Disease registry called, FASTRAK. years covered at 1996-1998. They have been grouped by covariate patterns from individual observations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/fasttrakg.csv", "filename": "fasttrakg", "name": "fasttrakg", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "grouped format of the lbw data. The observation level data come to us form Hosmer and Lemeshow (2000). Grouping is such that lowbw is the numerator, and cases the denominator of a binomial model, or cases may be an offset to the count variable, lowbw. Birthweights under 2500g classifies a low birthweight baby. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/lbwgrp.csv", "filename": "lbwgrp", "name": "lbwgrp", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The US national Medicare inpatient hospital database is referred to as the Medpar data, which is prepared yearly from hospital filing records. Medpar files for each state are also prepared. The full Medpar data consists of 115 variables. The national Medpar has some 14 million records, with one record for each hospilitiztion. The data in the medpar file comes from 1991 Medicare files for the state of Arizona. The data are limited to only one diagnostic group (DRG 112). Patient data have been randomly selected from the original data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/medpar.csv", "filename": "medpar", "name": "medpar", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "German health registry for the years 1984-1988. Health information for years prior to health reform. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/rwm.csv", "filename": "rwm", "name": "rwm", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "German health registry for the year 1984. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/rwm1984.csv", "filename": "rwm1984", "name": "rwm1984", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "German health registry for the years 1984-1988. Health information for years immediately prior to health reform. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/rwm5yr.csv", "filename": "rwm5yr", "name": "rwm5yr", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Thirty patients were given an anesthetic agent maintained at a predetermined level (conc) for 15 minutes before making an incision. It was then noted whether the patient moved, i.e. jerked or twisted. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/anesthetic.csv", "filename": "anesthetic", "name": "Anesthetic Effectiveness", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The table shows, separately for males and females, the effect of pentazocine on post-operative pain profiles (average VAS scores), with (mbac and fbac) and without (mpl and fpl) preoperatively administered baclofen. Pain scores are recorded every 20 minutes, from 10 minutes to 170 minutes. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/gaba.csv", "filename": "gaba", "name": "Effect of pentazocine on post-operative pain (average VAS scores)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data set from Daedalus project. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/humanpower1.csv", "filename": "humanpower", "name": "Oxygen uptake versus mechanical power, for humans", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data in humanpower1 are from investigations (Bussolari 1987) designed to assess the feasibility of a proposed 119 kilometer human powered flight from the island of Crete – in the initial phase of the Daedalus project. Data are for five athletes – a female hockey player, a male amateur tri-athlete, a female amateur triathlete, a male wrestler and a male cyclist – who were selected from volunteers who were recruited through the news media, Data in humanpower2) are for four out of the 25 applicants who were selected for further testing, in the lead-up to the eventual selection of a pilot for the Daedalus project (Nadel and Bussolari 1988). ", + "description": "Data in humanpower1 are from investigations (Bussolari 1987) designed to assess the feasibility of a proposed 119 kilometer human powered flight from the island of Crete - in the initial phase of the Daedalus project. Data are for five athletes - a female hockey player, a male amateur tri-athlete, a female amateur triathlete, a male wrestler and a male cyclist - who were selected from volunteers who were recruited through the news media, Data in humanpower2) are for four out of the 25 applicants who were selected for further testing, in the lead-up to the eventual selection of a pilot for the Daedalus project (Nadel and Bussolari 1988). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/humanpower2.csv", "filename": "humanpower2", "name": "Oxygen uptake versus mechanical power, for humans", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Median blood pressure, as a fuction of salt intake, for each of 52 human populations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/intersalt.csv", "filename": "intersalt", "name": "Blood pressure versus Salt; inter-population data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Deaths in London from measles: 1629 – 1939, with gaps. ", + "description": "Deaths in London from measles: 1629 - 1939, with gaps. ", "url": "http://vincentarelbundock.github.io/Rdatasets/doc/DAAG/measles.csv", "filename": "measles", "name": "Deaths in London from measles", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The mifem data frame has 1295 rows and 10 columns. This is the female subset of the 'monica' data frame ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/mifem.csv", "filename": "mifem", "name": "Mortality Outcomes for Females Suffering Myocardial Infarction", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The monica data frame has 6357 rows and 12 columns. Note that mifem is the female subset of this data frame. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/monica.csv", "filename": "monica", "name": "WHO Monica Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Deaths from \"flux\" or smallpox, measles, all causes, and ratios of the the first two categories to total deaths. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/poxetc.csv", "filename": "poxetc", "name": "Deaths from various causes, in London from 1629 till 1881, with gaps", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Deaths from whooping cough, in London from 1740 to 1881. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/whoops.csv", "filename": "whoops", "name": "Deaths from whooping cough, in London", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a matched case-control study dating from before the availability of conditional logistic regression. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/infert.csv", "filename": "infert", "name": "Infertility after Spontaneous and Induced Abortion", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A regular time series giving the luteinizing hormone in blood samples at 10 mins intervals from a human female, 48 samples. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/lh.csv", "filename": "lh", "name": " Luteinizing Hormone in Blood Samples ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data which show the effect of two soporific drugs (increase in hours of sleep compared to control) on 10 patients. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/sleep.csv", "filename": "sleep", "name": "Student's Sleep Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Standardized fertility measure and socio-economic indicators for each of 47 French-speaking provinces of Switzerland at about 1888. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/swiss.csv", "filename": "swiss", "name": "Swiss Fertility and Socioeconomic Indicators (1888) Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data set gives the average heights and weights for American women aged 30–39. ", + "description": "This data set gives the average heights and weights for American women aged 30-39. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/women.csv", "filename": "women", "name": "Average Heights and Weights for American Women", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1986 \nnumber of observations : 485 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Doctor.csv", "filename": "Doctor", "name": "Number of Doctor Visits ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "a cross-section from 1977–1978 \nnumber of observations : 5190 \nobservation : individuals \ncountry : Australia ", + "description": "a cross-section from 1977-1978 \nnumber of observations : 5190 \nobservation : individuals \ncountry : Australia ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/DoctorAUS.csv", "filename": "DoctorAUS", "name": "Doctor Visits in Australia ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "a cross-section from 1977–1978 \nnumber of observations : 20186 ", + "description": "a cross-section from 1977-1978 \nnumber of observations : 20186 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/DoctorContacts.csv", "filename": "DoctorContacts", "name": "Contacts With Medical Doctor ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Journal of Applied Econometrics data archive : http://qed.econ.queensu.ca/jae/ \nnumber of observations : 5574 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/MedExp.csv", "filename": "MedExp", "name": "Structure of Demand for Medical Care ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 4406 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/OFP.csv", "filename": "OFP", "name": "Visits to Physician Office ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1997 \nnumber of observations : 5999 \nobservation : households \ncountry : Vietnam ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/VietNamH.csv", "filename": "VietNamH", - "name": "Medical Expenses in Viet–nam (household Level) ", - "number_format": 31, - "remove_quotes": true, + "name": "Medical Expenses in Viet-nam (household Level) ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1997 \nnumber of observations : 27765 \nobservation : individuals \ncountry : Vietnam ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/VietNamI.csv", "filename": "VietNamI", - "name": "Medical Expenses in Viet–nam (individual Level) ", - "number_format": 31, - "remove_quotes": true, + "name": "Medical Expenses in Viet-nam (individual Level) ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total monthly scripts for pharmaceutical products falling under ATC code A10, as recorded by the Australian Health Insurance Commission. July 1991 - June 2008. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/a10.csv", "filename": "a10", "name": "Monthly anti-diabetic drug sales in Australia from 1991 to 2008.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total monthly scripts for pharmaceutical products falling under ATC code H02, as recorded by the Australian Health Insurance Commission. Measured in millions of scripts. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/h02.csv", "filename": "h02", "name": "Monthly corticosteroid drug sales in Australia from July 1991 to June 2008.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data consist of observations on three variables for each of 212 men in a sample of Cardiff enumeration districts. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/bronchitis.csv", "filename": "bronchitis", "name": "Chronic bronchitis in a sample of men in Cardiff", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains eight ALDH2 markers and Japanese alcohlic patients (y=1) and controls (y=0). There are genotypes for 8 loci, with a prefix name (e.g., \"EXON12\") and a suffix for each of two alleles (\".a1\" and \".a2\"). \nThe eight markers loci follows the following map (base pairs) \nD12S2070 \n(> 450 000),\nD12S839 \n(> 450 000),\nD12S821 \n(~ 400 000),\nD12S1344 \n( 83 853),\nEXON12 \n( 0),\nEXON1 \n( 37 335),\nD12S2263 \n( 38 927),\nD12S1341 \n(> 450 000) \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/aldh2.csv", "filename": "aldh2", "name": "ALDH2 markers and Alcoholism", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data set consist of 103 common (>5% minor allele frequency) SNPs genotyped in 129 trios from an European-derived population. These SNPs are in a 500-kb region on human chromosome 5q31 implicated as containing a genetic risk factor for Crohn disease. \n\nHowever it has been updated after the paper was published (posted on http://www.broad.mit.edu/humgen/IBD5/haplodata.html) \nAn example use of the data is with the following paper, Kelly M. Burkett, Celia M. T. Greenwood, BradMcNeney, Jinko Graham. Gene genealogies for genetic association mapping, with application to Crohn's disease. Fron Genet 2013, 4(260) doi: 10.3389/fgene.2013.00260 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/crohn.csv", "filename": "crohn", "name": "Crohn's disease data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ohio data frame has 2148 rows and 4 columns. The dataset is a subset of the six-city study, a longitudinal study of the health effects of air pollution. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/ohio.csv", "filename": "ohio", "name": "Ohio Children Wheeze Status", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The respdis data frame has 111 rows and 3 columns. The study described in Miller et. al. (1993) is a randomized clinical trial of a new treatment of respiratory disorder. The study was conducted in 111 patients who were randomly assigned to one of two treatments (active, placebo). At each of four visits during the follow-up period, the response status of each patients was classified on an ordinal scale. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/respdis.csv", "filename": "respdis", "name": "Clustered Ordinal Respiratory Disorder", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are from a clinical trial of patients with respiratory illness, where 111 patients from two different clinics were randomized to receive either placebo or an active treatment. Patients were examined at baseline and at four visits during treatment. At each examination, respiratory status (categorized as 1 = good, 0 = poor) was determined. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/respiratory.csv", "filename": "respiratory", "name": "Data from a clinical trial comparing two treatments for a respiratory illness", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "John Arbuthnot (1710) used these time series data on the ratios of male to female christenings in London from 1629-1710 to carry out the first known significance test, comparing observed data to a null hypothesis. The data for these 81 years showed that in every year there were more male than female christenings. \nOn the assumption that male and female births were equally likely, he showed that the probability of observing 82 years with more males than females was vanishingly small (~ 4.14 x 10^{-25}). He used this to argue that a nearly constant birth ratio > 1 could be interpreted to show the guiding hand of a devine being. The data set adds variables of deaths from the plague and total mortality obtained by Campbell and from Creighton (1965). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Arbuthnot.csv", "filename": "Arbuthnot", "name": " Arbuthnot's data on male and female birth ratios in London from 1629-1710. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quetelet's data on chest measurements of 5738 Scottish Militiamen. Quetelet (1846) used this data as a demonstration of the normal distribution of physical characteristics. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/ChestSizes.csv", "filename": "ChestSizes", "name": " Chest measurements of 5738 Scottish Militiamen ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In 1852, William Farr, published a report of the Registrar-General on mortality due to cholera in England in the years 1848-1849, during which there was a large epidemic throughout the country. Farr initially believed that cholera arose from bad air (\"miasma\") associated with low elevation above the River Thames. John Snow (1855) later showed that the disease was principally spread by contaminated water. \nThis data set comes from a paper by Brigham et al. (2003) that analyses some tables from Farr's report to examine the prevalence of death from cholera in the districts of London in relation to the available predictors from Farr's table. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Cholera.csv", "filename": "Cholera", "name": " William Farr's Data on Cholera in London, 1849 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In the second issue of Biometrika, W. R. Macdonell (1902) published an extensive paper, On Criminal Anthropometry and the Identification of Criminals in which he included numerous tables of physical characteristics 3000 non-habitual male criminals serving their sentences in England and Wales. His Table III (p. 216) recorded a bivariate frequency distribution of height by finger length. His main purpose was to show that Scotland Yard could have indexed their material more efficiently, and find a given profile more quickly. \nW. S. Gosset (aka \"Student\") used these data in two classic papers in 1908, in which he derived various characteristics of the sampling distributions of the mean, standard deviation and Pearson's r. He said, \"Before I had succeeded in solving my problem analytically, I had endeavoured to do so empirically.\" Among his experiments, he randomly shuffled the 3000 observations from Macdonell's table, and then grouped them into samples of size 4, 8, ..., calculating the sample means, standard deviations and correlations for each sample", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Macdonell.csv", "filename": "Macdonell", "name": " Macdonell's Data on Height and Finger Length of Criminals, used by Gosset (1908) ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In the history of data visualization, Florence Nightingale is best remembered for her role as a social activist and her view that statistical data, presented in charts and diagrams, could be used as powerful arguments for medical reform. \nAfter witnessing deplorable sanitary conditions in the Crimea, she wrote several influential texts (Nightingale, 1858, 1859), including polar-area graphs (sometimes called \"Coxcombs\" or rose diagrams), showing the number of deaths in the Crimean from battle compared to disease or preventable causes that could be reduced by better battlefield nursing care. \nHer Diagram of the Causes of Mortality in the Army in the East showed that most of the British soldiers who died during the Crimean War died of sickness rather than of wounds or other causes. It also showed that the death rate was higher in the first year of the war, before a Sanitary Commissioners arrived in March 1855 to improve hygiene in the camps and hospitals.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Nightingale.csv", "filename": "Nightingale", "name": " Florence Nightingale's data on deaths from various causes in the Crimean War ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data frame PolioTrials gives the results of the 1954 field trials to test the Salk polio vaccine (named for the developer, Jonas Salk), conducted by the National Foundation for Infantile Paralysis (NFIP). It is adapted from data in the article by Francis et al. (1955). There were actually two clinical trials, corresponding to two statistical designs (Experiment), discussed by Brownlee (1955). The comparison of designs and results represented a milestone in the development of randomized clinical trials. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/PolioTrials.csv", "filename": "PolioTrials", "name": " Polio Field Trials Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Snow data consists of the relevant 1854 London streets, the location of 578 deaths from cholera, and the position of 13 water pumps (wells) that can be used to re-create John Snow's map showing deaths from cholera in the area surrounding Broad Street, London in the 1854 outbreak. Another data frame provides boundaries of a tesselation of the map into Thiessen (Voronoi) regions which include all cholera deaths nearer to a given pump than to any other. \nThe apocryphal story of the significance of Snow's map is that, by closing the Broad Street pump (by removing its handle), Dr. Snow stopped the epidemic, and demonstrated that cholera is a water borne disease. The method of contagion of cholera was not previously understood. Snow's map is the most famous and classical example in the field of medical cartography, even if it didn't happen exactly this way. (the apocryphal part is that the epidemic ended when the pump handle was removed.) At any rate, the map, together with various statistical annotations, is compelling because it points to the Broad Street pump as the source of the outbreak. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Snow.dates.csv", "filename": "Snow", "name": " John Snow's Map and Data on the 1854 London Cholera Outbreak ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Snow data consists of the relevant 1854 London streets, the location of 578 deaths from cholera, and the position of 13 water pumps (wells) that can be used to re-create John Snow's map showing deaths from cholera in the area surrounding Broad Street, London in the 1854 outbreak. Another data frame provides boundaries of a tesselation of the map into Thiessen (Voronoi) regions which include all cholera deaths nearer to a given pump than to any other. \nThe apocryphal story of the significance of Snow's map is that, by closing the Broad Street pump (by removing its handle), Dr. Snow stopped the epidemic, and demonstrated that cholera is a water borne disease. The method of contagion of cholera was not previously understood. Snow's map is the most famous and classical example in the field of medical cartography, even if it didn't happen exactly this way. (the apocryphal part is that the epidemic ended when the pump handle was removed.) At any rate, the map, together with various statistical annotations, is compelling because it points to the Broad Street pump as the source of the outbreak. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Snow.deaths.csv", "filename": "Snow_deaths", "name": " John Snow's Map and Data on the 1854 London Cholera Outbreak ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Age and body fat percentage of 25 normal adults. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/agefat.csv", "filename": "agefat", "name": " Total Body Composision Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Efficacy of Aspirin in preventing death after a myocardial infarct. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/aspirin.csv", "filename": "aspirin", "name": " Aspirin Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A meta-analysis on the efficacy of BCG vaccination against tuberculosis (TB). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/BCG.csv", "filename": "BCG", "name": " BCG Vaccine Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data from a clinical trial of an interactive multimedia program called ‘Beat the Blues’. ", + "description": "Data from a clinical trial of an interactive multimedia program called \"Beat the Blues\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/BtheB.csv", "filename": "BtheB", "name": " Beat the Blues Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from four randomised clinical trials on the prevention of gastointestinal damages by Misoprostol reported by Lanza et al. (1987, 1988a,b, 1989). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/Lanza.csv", "filename": "Lanza", "name": " Prevention of Gastointestinal Damages ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The distribution of the oral lesion site found in house-to-house surveys in three geographic regions of rural India. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/orallesions.csv", "filename": "orallesions", "name": " Oral Lesions in Rural India ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The erythrocyte sedimentation rate and measurements of two plasma proteins (fibrinogen and globulin). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/plasma.csv", "filename": "plasma", "name": " Blood Screening Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a placebo-controlled trial of a non-steroidal anti-inflammatory drug in the treatment of familial andenomatous polyposis (FAP). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/polyps.csv", "filename": "polyps", "name": " Familial Andenomatous Polyposis ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a placebo-controlled trial of a non-steroidal anti-inflammatory drug in the treatment of familial andenomatous polyposis (FAP). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/polyps3.csv", "filename": "polyps3", "name": " Familial Andenomatous Polyposis ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements made on Egyptian skulls from five epochs. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/skulls.csv", "filename": "skulls", "name": " Egyptian Skulls ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The mortality and drinking water hardness for 61 cities in England and Wales. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/water.csv", "filename": "water", "name": " Mortality and Water Hardness ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The allograft data frame has 34 rows and 4 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/allograft.csv", "filename": "allograft", "name": "data from Exercise 13.1, p418", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The bfeed data frame has 927 rows and 10 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/bfeed.csv", "filename": "bfeed", "name": "data from Section 1.14", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The bmt data frame has 137 rows and 22 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/bmt.csv", "filename": "bmt", "name": "data from Section 1.3", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The bnct data frame has 34 rows and 3 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/bnct.csv", "filename": "bnct", "name": "data from Exercise 7.7, p223", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The btrial data frame has 45 rows and 3 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/btrial.csv", "filename": "btrial", "name": "data from Section 1.5", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Six samples of penicillin were tested using the B. subtilis plate method on each of 24 plates. The response is the diameter (mm) of the zone of inhibition of growth of the organism. \nThe data are described in Davies and Goldsmith (1972) as coming from an investigation to “assess the variability between samples of penicillin by the B. subtilis method. In this test method a bulk-inoculated nutrient agar medium is poured into a Petri dish of approximately 90 mm. diameter, known as a plate. When the medium has set, six small hollow cylinders or pots (about 4 mm. in diameter) are cemented onto the surface at equally spaced intervals. A few drops of the penicillin solutions to be compared are placed in the respective cylinders, and the whole plate is placed in an incubator for a given time. Penicillin diffuses from the pots into the agar, and this produces a clear circular zone of inhibition of growth of the organisms, which can be readily measured. The diameter of the zone is related in a known way to the concentration of penicillin in the solution.” ", + "description": "Six samples of penicillin were tested using the B. subtilis plate method on each of 24 plates. The response is the diameter (mm) of the zone of inhibition of growth of the organism. \nThe data are described in Davies and Goldsmith (1972) as coming from an investigation to \u201cassess the variability between samples of penicillin by the B. subtilis method. In this test method a bulk-inoculated nutrient agar medium is poured into a Petri dish of approximately 90 mm. diameter, known as a plate. When the medium has set, six small hollow cylinders or pots (about 4 mm. in diameter) are cemented onto the surface at equally spaced intervals. A few drops of the penicillin solutions to be compared are placed in the respective cylinders, and the whole plate is placed in an incubator for a given time. Penicillin diffuses from the pots into the agar, and this produces a clear circular zone of inhibition of growth of the organisms, which can be readily measured. The diameter of the zone is related in a known way to the concentration of penicillin in the solution.\u201d ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/Penicillin.csv", "filename": "Penicillin", "name": "Variation in penicillin testing", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The average reaction time per day for subjects in a sleep deprivation study. On day 0 the subjects had their normal amount of sleep. Starting that night they were restricted to 3 hours of sleep per night. The observations represent the average reaction time on a series of tests given each day to each subject. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/sleepstudy.csv", "filename": "sleepstudy", "name": "Reaction times in a sleep deprivation study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The anorexia data frame has 72 rows and 3 columns. Weight change data for young female anorexia patients. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/anorexia.csv", "filename": "anorexia", "name": " Anorexia Data on Weight Change ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tests of the presence of the bacteria H. influenzae in children with otitis media in the Northern Territory of Australia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/bacteria.csv", "filename": "bacteria", "name": " Presence of Bacteria after Drug Treatments ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cushing's syndrome is a hypertensive disorder associated with over-secretion of cortisol by the adrenal gland. The observations are urinary excretion rates of two steroid metabolites. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Cushings.csv", "filename": "Cushings", "name": " Diagnostic Tests on Patients with Cushing's Syndrome ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A time series giving the monthly deaths from bronchitis, emphysema and asthma in the UK, 1974-1979, both sexes (deaths), ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/deaths.csv", "filename": "deaths", "name": " Monthly Deaths from Lung Diseases in the UK ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data were collected on the concentration of a chemical GAG in the urine of 314 children aged from zero to seventeen years. The aim of the study was to produce a chart to help a paediatrican to assess if a child's GAG concentration is ‘normal’. ", + "description": "Data were collected on the concentration of a chemical GAG in the urine of 314 children aged from zero to seventeen years. The aim of the study was to produce a chart to help a paediatrican to assess if a child's GAG concentration is \"normal\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/GAGurine.csv", "filename": "GAGurine", "name": " Level of GAG in Urine of Children ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Proportions of female children at various ages during adolescence who have reached menarche. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/menarche.csv", "filename": "menarche", "name": " Age of Menarche in Warsaw ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The purpose of this experiment was to assess the influence of calcium in solution on the contraction of heart muscle in rats. The left auricle of 21 rat hearts was isolated and on several occasions a constant-length strip of tissue was electrically stimulated and dipped into various concentrations of calcium chloride solution, after which the shortening of the strip was accurately measured as the response. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/muscle.csv", "filename": "muscle", "name": " Effect of Calcium Chloride on Muscle Contraction in Rat Hearts ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiments were performed on children on their ability to differentiate a signal in broad-band noise. The noise was played from a pair of speakers and a signal was added to just one channel; the subject had to turn his/her head to the channel with the added signal. The signal was either coherent (the amplitude of the noise was increased for a period) or incoherent (independent noise was added for the same period to form the same increase in power). \nThe threshold used in the original analysis was the stimulus loudness needs to get 75% correct responses. Some of the children had suffered from otitis media with effusion (OME). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/OME.csv", "filename": "OME", "name": " Tests of Auditory Perception in Children with OME ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A population of women who were at least 21 years old, of Pima Indian heritage and living near Phoenix, Arizona, was tested for diabetes according to World Health Organization criteria. The data were collected by the US National Institute of Diabetes and Digestive and Kidney Diseases. We used the 532 complete records after dropping the (mainly missing) data on serum insulin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Pima.te.csv", "filename": "Pima_tr", "name": " Diabetes in Pima Indian Women ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A population of women who were at least 21 years old, of Pima Indian heritage and living near Phoenix, Arizona, was tested for diabetes according to World Health Organization criteria. The data were collected by the US National Institute of Diabetes and Digestive and Kidney Diseases. We used the 532 complete records after dropping the (mainly missing) data on serum insulin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Pima.tr2.csv", "filename": "Pima_tr2", "name": " Diabetes in Pima Indian Women ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data frame gives the weight, in kilograms, of an obese patient at 52 time points over an 8 month period of a weight rehabilitation programme. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/wtloss.csv", "filename": "wtloss", "name": " Weight Loss Data from an Obese Patient ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Birth weight, date, and gestational period collected as part of the Child Health and Development Studies in 1961 and 1962. Information about the baby's parents — age, education, height, weight, and whether the mother smoked is also recorded. ", + "description": "Birth weight, date, and gestational period collected as part of the Child Health and Development Studies in 1961 and 1962. Information about the baby's parents \u2014 age, education, height, weight, and whether the mother smoked is also recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Gestation.csv", "filename": "Gestation", "name": "Data from the Child Health and Development Studies", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The HELP study was a clinical trial for adult inpatients recruited from a detoxification unit. Patients with no primary care physician were randomized to receive a multidisciplinary assessment and a brief motivational intervention or usual care, with the goal of linking them to primary medical care. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/HELPfull.csv", "filename": "HELPfull", "name": "Health Evaluation and Linkage to Primary Care", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The HELP study was a clinical trial for adult inpatients recruited from a detoxification unit. Patients with no primary care physician were randomized to receive a multidisciplinary assessment and a brief motivational intervention or usual care, with the goal of linking them to primary medical care. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/HELPmiss.csv", "filename": "HELPmiss", "name": "Health Evaluation and Linkage to Primary Care", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The HELP study was a clinical trial for adult inpatients recruited from a detoxification unit. Patients with no primary care physician were randomized to receive a multidisciplinary assessment and a brief motivational intervention or usual care, with the goal of linking them to primary medical care. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/HELPrct.csv", "filename": "HELPrct", "name": "Health Evaluation and Linkage to Primary Care", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rheumatoid self-assessment scores for 302 patients, measured on a five-level ordinal response scale at three follow-up times. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/multgee/arthritis.csv", "filename": "arthritis", "name": " Rheumatoid Arthritis Clinical Trial ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The classic data set used by Gossett (publishing as Student) for the introduction of the t-test. The design was a within subjects study with hours of sleep in a control condition compared to those in 3 drug conditions. Drug1 was 06mg of L Hscyamine, Drug 2L and Drug2R were said to be .6 mg of Left and Right isomers of Hyoscine. As discussed by Zabell (2008) these were not optical isomers. The detal1, delta2L and delta2R are changes from the baseline control. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/cushny.csv", "filename": "cushny", "name": " A data set from Cushny and Peebles (1905) on the effect of three drugs on hours of sleep, used by Student (1908) ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Francis Galton introduced the 'co-relation' in 1888 with a paper discussing how to measure the relationship between two variables. His primary example was the relationship between height and forearm length. The data table (cubits) is taken from Galton (1888). Unfortunately, there seem to be some errors in the original data table in that the marginal totals do not match the table. \nSir Francis Galton (1888) published the first demonstration of the correlation coefficient. The regression (or reversion to mediocrity) of the height to the length of the left forearm (a cubit) was found to .8. The original table cubits is taken from Galton (1888). There seem to be some errors in the table as published in that the row sums do not agree with the actual row sums. These data are used to create a matrix using table2matrix for demonstrations of analysis and displays of the data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/heights.csv", "filename": "heights", "name": "A data.frame of the Galton (1888) height and cubit data set.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "There are 628 data points in the original data, 575 of which have no missing values. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/quantreg/uis.csv", "filename": "uis", "name": "UIS Drug Treatment study data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data set issued from a study of the adverse events of a drug on 117 patients affected by Crohn's disease (a chronic inflammatory disease of the intestines). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/CrohnD.csv", "filename": "CrohnD", "name": "Crohn's Disease Adverse Events Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set was analyzed by Weisberg (1980) and Chambers et al. (1983). A catheter is passed into a major vein or artery at the femoral region and moved into the heart. The proper length of the introduced catheter has to be guessed by the physician. The aim of the data set is to describe the relation between the catheter length and the patient's height (X1) and weight (X2). \nThis data sets is used to demonstrate the effects caused by collinearity. The correlation between height and weight is so high that either variable almost completely determines the other. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/heart.csv", "filename": "heart", "name": "Heart Catherization Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Finney's data on vaso constriction in the skin of the digits. \nThe data taken from Finney (1947) were obtained in a carefully controlled study in human physiology where a reflex “vaso constriction” may occur in the skin of the digits after taking a single deep breath. The response y is the occurence (y = 1) or non-occurence (y = 0) of vaso constriction in the skin of the digits of a subject after he or she inhaled a certain volume of air at a certain rate. The responses of three subjects are available. The first contributed 9 responses, the second contributed 8 responses, and the third contributed 22 responses. \nAlthough the data represent repeated measurements, an analysis that assumes independent observations may be applied, as claimed by Pregibon (1981). ", + "description": "Finney's data on vaso constriction in the skin of the digits. \nThe data taken from Finney (1947) were obtained in a carefully controlled study in human physiology where a reflex \u201cvaso constriction\u201d may occur in the skin of the digits after taking a single deep breath. The response y is the occurence (y = 1) or non-occurence (y = 0) of vaso constriction in the skin of the digits of a subject after he or she inhaled a certain volume of air at a certain rate. The responses of three subjects are available. The first contributed 9 responses, the second contributed 8 responses, and the third contributed 22 responses. \nAlthough the data represent repeated measurements, an analysis that assumes independent observations may be applied, as claimed by Pregibon (1981). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/vaso.csv", "filename": "vaso", "name": "Vaso Constriction Skin Data Set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Systolic blood pressure, weight and smoking status for a sample of 500 adults ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Blood1.csv", "filename": "Blood1", "name": "Blood Pressure, Weight, and Smoking Status", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment on calcium supplements and blood pressure in 21 men \nThe purpose of this study was to see whether daily calcium supplements can lower blood pressure. The subjects were 21 men; each was randomly assigned either to a treatment group or to a control group. Those in the treatment group took a daily pill containing calcium. Those in the control group took a daily pill with no active ingredients. Each subject's blood pressure was measured at the beginning of the 12-week study, and again at the end. The decrease in blood pressure (begin-end) was recorded (so a negative value means blood pressure increased). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CalciumBP.csv", "filename": "CalciumBP", "name": "Do Calcium Supplements Lower Blood Pressure?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Drug interaction study with oral contraceptives \nTwenty-two female subjects were allocated randomly to one of two treatment sequences in a two period crossover design. The two treatments were a new Drug D or placebo, both given concomitantly with a standard oral contraceptive which was given in both study periods. The oral contraceptive has two components, ethinyl estradiol (EE) and norethindrone (NET). The purpose of the study was to evaluate whether the presence of Drug D affected the bioavailability of each of the oral contraceptive components. Note that our dataset does not include the NET variable. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Contraceptives.csv", "filename": "Contraceptives", "name": "Drug Interaction with Contraceptives", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Digested calories with different types of fiber in crackers .\nTwelve female subjects were fed a controlled diet, with crackers before every meal. There were four different kinds of crackers: control, bran fiber, gum fiber, and a combination of both bran and gum fiber. Over the course of the study, each subject ate all four kinds of crackers, one kind at a time, for a stretch of several days. The order was randomized. The response is the number of digested calories, measured as the difference between calories eaten and calories passed through the system. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CrackerFiber.csv", "filename": "CrackerFiber", "name": "Effects of Cracker Fiber on Digested Calories", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Fertility measurements for a sample of women who have difficulty getting pregnant \nA medical doctor and her team of researchers collected a variety of data on women who were having trouble getting pregnant. A key method for assessing fertility is a count of antral follicles (LowAFC or MeanAFC) that can be performed with noninvasive ultrasound. Researchers are interested in how the other variables are related to these counts. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Fertility.csv", "filename": "Fertility", "name": "Fertility Data for Women Having Trouble Getting Pregnant", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Finger tap rates after drug administration .\nScientists Scott and Chen, published research that compared the effects of caffeine with those of theobromine (a similar chemical found in chocolate) and with those of a placebo. Their experiment used four human subjects, and took place over several days. Each day each subject swallowed a tablet containing one of caffeine, theobromine, or the placebo. Two hours later they were timed while tapping a finger in a specified manner (that they had practiced earlier, to control for learning effects). The response is the number of taps in a fixed time interval. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Fingers.csv", "filename": "Fingers", "name": "Finger Tap Rates", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Finger tap rates after drug administration \nScientists Scott and Chen published research that compared the effects of caffeine with those of theobromine (a similar chemical found in chocolate) and with those of a placebo. Their experiment used four human subjects and took place over several days. Each day each subject swallowed a tablet containing one of caffeine, theobromine, or the placebo. Two hours later they were timed while tapping a finger in a specified manner (that they had practiced earlier, to control for learning effects). The response is the number of taps in a fixed time interval. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FranticFingers.csv", "filename": "FranticFingers", "name": "Finger Tap Rates", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Percentaged of correctly identified words in a hearing test .\nAudiologists use standard lists of 50 words to test hearing; the words are calibrated, using subjects with normal hearing, to make all 50 words on the list equally hard to hear. The goal of the study described here was to see how four such lists, denoted by L1-L4 in this dataset, compared when played at low volume with a noisy background. The response is the percentage of words identified correctly. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/HearingTest.csv", "filename": "HearingTest", "name": "Correctly Identified Words in a Hearing Test", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data for a sample of 200 patients at an Intensive Care Unit (ICU) \nThis dataset contains information for a sample of 200 patients who were part of a larger study conducted in a hospital's Intensive Care Unit (ICU). Since an ICU often deals with serious, life-threatening cases, a key variable to study is patient survival, which is coded in the Survive variable as 1 if the patient lived to be discharged and 0 if the patient died. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ICU.csv", "filename": "ICU", "name": "Intensive Care Unit Patients", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Body measurements for a sample of 198 children \nThis dataset comes from a 1977 anthropometric study of body measurements for children. Subjects in this sample are between the ages of 8 and 18 years old, selected at random from the much larger dataset of the original study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Kids198.csv", "filename": "Kids198", "name": "Body Measurements of Children", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hours of sleep for teenagers \nData from a sample of 446 teens, aged 14 to 18, who answer the question, \"On an average school night, how many hours of sleep do you get?\" The outcome variable records whether or not each person averages at least 7 hours of sleep. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LosingSleep.csv", "filename": "LosingSleep", "name": "Sleep Hours for Teenagers", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Comparing meniscus repair methods on cadaver knees \nEighteen, lightly embalmed, cadaveric knee specimens were used in a study to compare three different methods of meniscus repair. The specimens were randomly assigned to one of the three treatments: vertical suture, meniscus arrow, FasT-Fix. They were evaluated on three different response variables: load at failure, stiffness, and displacement. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Meniscus.csv", "filename": "Meniscus", "name": "Meniscus Repair Methods", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dopamine levels with different amounts of phenylalanine in diets \nPhenylketonuria (PKU) is an enzyme deficiency that keeps a person from being able to synthesize enough dopamine. The amino acid phenylalanine inhibits the enzyme needed to synthesize dopamine, and so to some extent, a diet low in phenylalanine can moderate the symptoms of PKU. In short, less phenylalanine in the diet should lead to more dopamine in the brain. The dopamine level for each patient was measured after a normal diet and after a week on a low phenylalanine diet. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PKU.csv", "filename": "PKU", "name": "Dopamine levels with PKU in diets", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A student measured her pulse several times a day over 26 days. \nA student measured her pulse in the morning, at noon, at 1:00, and in the evening for each of 26 days. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/RepeatedPulse.csv", "filename": "RepeatedPulse", "name": "Pulse Rates at Various Times of Day", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Heart rates for a sample of six tree shrews at each of three stages of sleep. \nHeart rates were recorded for a sample of six tree shrews at each of three stages of sleep. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SleepingShrews.csv", "filename": "SleepingShrews", "name": "Shrew Heart Rates at Stages of Sleep", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment to see if special exercises help babies learn to walk sooner \nScientists wondered if they could get babies to walk sooner by prescribing a set of special exercises. Their experimental design included four groups of babies and the following treatments:\n\nSpecial exercises: Parents were shown the special exercises and encouraged to use them with their children. They were phoned weekly to check on their child's progress.\n\nExercise control: These parents were not shown the special exercises, but they were told to make sure their babies spent at least 15 minutes a day exercising. \n\nWeekly report: Parents in this group were not given instructions about exercise. Like the parents in the treatment group, however, they received a phone call each week to check on progress.\n\nFinal report: These parents were not given weekly phone calls or instructions about exercises. They reported at the end of the study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/WalkingBabies.csv", "filename": "WalkingBabies", "name": "Effects of Exercise on First Walking", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment to see if financial incentives improve weight loss \nResearchers investigated whether financial incentives would help people lose weight more successfully. Some participants in the study were randomly assigned to a treatment group that was offered financial incentives for achieving weight loss goals, while others were assigned to a control group that did not use financial incentives. All participants were monitored over a four month period and the net weight change (Before - After in pounds) at the end of this period was recorded for each individual. Then the individuals were left alone for three months with a followup weight check at the seven-month mark to see whether weight losses persisted after the original four months of treatment.\nThe 4-month data alone (with missing values omitted) is stored in WeightLossIncentive4. \nThe 7-month data alone (with missing values omitted) is stored in WeightLossIncentive7. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/WeightLossIncentive.csv", "filename": "WeightLossIncentive", "name": "Do Financial Incentives Improve Weight Loss?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Details \nResearchers investigated whether financial incentives would help people lose weight more successfully. Some participants in the study were randomly assigned to a treatment group that was offered financial incentives for achieving weight loss goals, while others were assigned to a control group that did not use financial incentives. All participants were monitored over a four month period and the net weight change (Before - After in pounds) at the end of this period was recorded for each individual. Then the individuals were left alone for three months with a followup weight check at the seven-month mark to see whether weight losses persisted after the original four months of treatment. This dataset has only the non-missing 4-month data. The 7-month data are in WeightLossIncentive7 and both measurements (including missing values) are in WeightLossIncentive. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/WeightLossIncentive4.csv", "filename": "WeightLossIncentive4", "name": "Do Financial Incentives Improve Weight Loss? (4 Months)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Weight loss after seven months with/without a financial incentive \nResearchers investigated whether financial incentives would help people lose weight more successfully. Some participants in the study were randomly assigned to a treatment group that was offered financial incentives for achieving weight loss goals, while others were assigned to a control group that did not use financial incentives. All participants were monitored over a four month period and the net weight change (Before - After in pounds) at the end of this period was recorded for each individual. Then the individuals were left alone for three months with a followup weight check at the seven-month mark to see whether weight losses persisted after the original four months of treatment. This dataset has only the non-missing 7-month data. The 4-month data are in WeightLossIncentive4 and both measurements (including missing values) are in WeightLossIncentive. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/WeightLossIncentive7.csv", "filename": "WeightLossIncentive7", "name": "Do Financial Incentives Improve Weight Loss? (7 Months)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from the Youth Risk Behavior Surveillance System \nThis dataset is derived from the 2007 Youth Risk Behavior Surveillance System (YRBSS), which is an annual survey conducted by the Centers for Disease Control and Prevention (CDC) to monitor the prevalence of health-risk youth behaviors. This dataset focuses on whether or not youths have recently (in past 30 days) ridden with a drunk driver. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/YouthRisk.csv", "filename": "YouthRisk", "name": "Annual survey of health-risk youth behaviors", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Partial results from a trial of laser coagulation for the treatment of diabetic retinopathy. \nThe 197 patients in this dataset were a 50% random sample of the patients with \"high-risk\" diabetic retinopathy as defined by the Diabetic Retinopathy Study (DRS). Each patient had one eye randomized to laser treatment and the other eye received no treatment. For each eye, the event of interest was the time from initiation of treatment to the time when visual acuity dropped below 5/200 two visits in a row. Thus there is a built-in lag time of approximately 6 months (visits were every 3 months). Survival times in this dataset are therefore the actual time to blindness in months, minus the minimum possible time to event (6.5 months). Censoring was caused by death, dropout, or end of the study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/diabetic.csv", "filename": "diabetic", "name": "Ddiabetic retinopathy", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a stratified random sample containing 1/2 of the subjects from a study of the relationship between serum free light chain (FLC) and mortality. The original sample contains samples on approximately 2/3 of the residents of Olmsted County aged 50 or greater. \nIn 1995 Dr. Robert Kyle embarked on a study to determine the prevalence of monoclonal gammopathy of undetermined significance (MGUS) in Olmsted County, Minnesota, a condition which is normally only found by chance from a test (serum electrophoresis) which is ordered for other causes. Later work suggested that one component of immunoglobulin production, the serum free light chain, might be a possible marker for immune disregulation. In 2010 Dr. Angela Dispenzieri and colleagues assayed FLC levels on those samples from the original study for which they had patient permission and from which sufficient material remained for further testing. They found that elevated FLC levels were indeed associated with higher death rates. \nPatients were recruited when they came to the clinic for other appointments, with a final random sample of those who had not yet had a visit since the study began. An interesting side question is whether there are differences between early, mid, and late recruits. \nThis data set contains an age and sex stratified random sample that includes 7874 of the original 15759 subjects. The original subject identifiers and dates have been removed to protect patient identity. Subsampling was done to further protect this information. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/flchain.csv", "filename": "flchain", "name": "Assay of serum free light chain for 7874 subjects.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survival of patients on the waiting list for the Stanford heart transplant program.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/heart.csv", "filename": "heart_", "name": "Stanford Heart Transplant data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the recurrence times to infection, at the point of insertion of the catheter, for kidney patients using portable dialysis equipment. Catheters may be removed for reasons other than infection, in which case the observation is censored. Each patient has exactly 2 observations. \nThis data has often been used to illustrate the use of random effects (frailty) in a survival model. However, one of the males (id 21) is a large outlier, with much longer survival than his peers. If this observation is removed no evidence remains for a random subject effect. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/kidney.csv", "filename": "kidney", "name": "Kidney catheter data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Plasma cells are responsible for manufacturing immunoglobulins, an important part of the immune defense. At any given time there are estimated to be about 10^6 different immunoglobulins in the circulation at any one time. When a patient has a plasma cell malignancy the distribution will become dominated by a single isotype, the product of the malignant clone, visible as a spike on a serum protein electrophoresis. Monoclonal gammopathy of undertermined significance (MGUS) is the presence of such a spike, but in a patient with no evidence of overt malignancy. This data set of 241 sequential subjects at Mayo Clinic was the groundbreaking study defining the natural history of such subjects. Due to the diligence of the principle investigator 0 subjects have been lost to follow-up. \nThree subjects had MGUS detected on the day of death. In data set mgus1 these subjects have the time to MGUS coded as .5 day before the death in order to avoid tied times. \nThese data sets were updated in Jan 2015 to correct some small errors. \nNatural history of 241 subjects with monoclonal gammopathy of undetermined significance (MGUS). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/mgus.csv", "filename": "mgus", "name": "Monoclonal gammopathy data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Natural history of 1341 sequential patients with monoclonal gammopathy of undetermined significance (MGUS). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/mgus2.csv", "filename": "mgus2", "name": "Monoclonal gammopathy data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "D This data is from the Mayo Clinic trial in primary biliary cirrhosis (PBC) of the liver conducted between 1974 and 1984. A total of 424 PBC patients, referred to Mayo Clinic during that ten-year interval, met eligibility criteria for the randomized placebo controlled trial of the drug D-penicillamine. The first 312 cases in the data set participated in the randomized trial and contain largely complete data. The additional 112 cases did not participate in the clinical trial, but consented to have basic measurements recorded and to be followed for survival. Six of those cases were lost to follow-up shortly after diagnosis, so the data here are on an additional 106 cases as well as the 312 randomized participants. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/pbc.csv", "filename": "pbc", "name": "Mayo Clinic Primary Biliary Cirrhosis Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A trial of laser coagulation as a treatment to delay diabetic retinopathy. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/retinopathy.csv", "filename": "retinopathy", "name": "Diabetic Retinopathy", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This contains the Stanford Heart Transplant data in a different format. The main data set is in heart. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/stanford2.csv", "filename": "stanford2", "name": "More Stanford Heart Transplant data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Subjects on a liver transplant waiting list from 1990-1999, and their disposition: received a transplant, died while waiting, withdrew from the list, or censored. \nThis represents the transplant experience in a particular region, over a time period in which liver transplant became much more widely recognized as a viable treatment modality. The number of liver transplants rises over the period, but the number of subjects added to the liver transplant waiting list grew much faster. Important questions addressed by the data are the change in waiting time, who waits, and whether there was an consequent increase in deaths while on the list. \nBlood type is an important consideration. Donor livers from subjects with blood type O can be used by patients with A, B, AB or 0 blood types, whereas an AB liver can only be used by an AB recipient. Thus type O subjects on the waiting list are at a disadvantage, since the pool of competitors is larger for type O donor livers. \nThis data is of historical interest and provides a useful example of competing risks, but it has little relevance to current practice. Liver allocation policies have evolved and now depend directly on each individual patient's risk and need, assessments of which are regularly updated while a patient is on the waiting list. The overall organ shortage remains acute, however. \nThe transplant data set was a version used early in the analysis, transplant2 has several additions and corrections, and was the final data set and matches the paper. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/transplant.csv", "filename": "transplant", "name": "Liver transplant waiting list", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a trial of ursodeoxycholic acid (UDCA) in patients with primary biliary cirrohosis (PBC). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/udca.csv", "filename": "udca", "name": "Data from a trial of usrodeoxycholic acid ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Liver related laboratory data from a randomized, blind, parallel group clinical trial with 4 doses of a drug. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/texmex/liver.csv", "filename": "liver", "name": "Liver related laboratory data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Koch \\& Edwards (1988) from a double-blind clinical trial investigating a new treatment for rheumatoid arthritis. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Arthritis.csv", "filename": "Arthritis", "name": "Arthritis Treatment Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data from Ashford & Sowden (1970) given by Agresti (1990) on the association between two pulmonary conditions, breathlessness and wheeze, in a large sample of coal miners who were smokers with no radiological evidence of pneumoconlosis, aged between 20–64 when examined. This data is frequently used as an example of fitting models for bivariate, binary responses. ", + "description": "Data from Ashford & Sowden (1970) given by Agresti (1990) on the association between two pulmonary conditions, breathlessness and wheeze, in a large sample of coal miners who were smokers with no radiological evidence of pneumoconlosis, aged between 20-64 when examined. This data is frequently used as an example of fitting models for bivariate, binary responses. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/CoalMiners.csv", "filename": "CoalMiners", "name": "Breathlessness and Wheeze in Coal Miners", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The table relates the length of stay (in years) of 132 long-term schizophrenic patients in two London mental hospitals with the frequency of visits. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Hospital.csv", "filename": "Hospital", "name": "Hospital data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Westlund \\& Kurland (1953) on the diagnosis of multiple sclerosis (MS): two samples of patients, one from Winnipeg and one from New Orleans, were each rated by two neurologists (one from each city) in four diagnostic categories. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/MSPatients.csv", "filename": "MSPatients", "name": "Diagnosis of Multiple Sclerosis", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Kendall & Stuart (1961) on unaided vision among 3,242 men and 7,477 women, all aged 30-39 and employed in the U.K. Royal Ordnance factories 1943-1946. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/VisualAcuity.csv", "filename": "VisualAcuity", "name": "Visual Acuity in Left and Right Eyes", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Common" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Down's syndrome is a genetic disorder caused by an extra chromosome 21 or a part of chromosome 21 being translocated to another chromosome. The incidence of Down's syndrome is highly dependent on the mother's age and rises sharply after age 30. In the 1960's a large scale study of the effect of maternal age on the incidence of Down's syndrome was conducted at the British Columbia Health Surveillance Registry. These are the data which was collected in that study. \nMothers were classified by age. Most groups correspond to the age in years but the first group comprises all mothers with ages in the range 15-17 and the last is those with ages 46-49. No data for mothers over 50 or below 15 were collected. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/downs.bc.csv", "filename": "downs_bc", "name": " Incidence of Down's Syndrome in British Columbia\n", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Burt data frame has 27 rows and 4 columns. The “data” were simply (and notoriously) manufactured. The same data are in the dataset “twins\" in the alr3 package, but with different labels. ", + "description": "The Burt data frame has 27 rows and 4 columns. The \u201cdata\u201d were simply (and notoriously) manufactured. The same data are in the dataset \u201ctwins\" in the alr3 package, but with different labels. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Burt.csv", "filename": "Burt", "name": "Fraudulent Data on IQs of Twins Raised Apart", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data come to us from Hosmer and Lemeshow (2000). Called the low birth weight (lbw) data, the response is a binary variable, low, which indicates whether the birth weight of a baby is under 2500g (low=1), or over (low=0). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/lbw.csv", "filename": "lbw", "name": "lbw", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a simulated data of four SNPs with their alleles coded in characters. The variable y contains phenotypes (1=case, 0=control). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/fsnps.csv", "filename": "fsnps", "name": "A case-control data involving four SNPs with missing genotype", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains HLA markers DRB, DQA, DQB and phenotypes of 271 Schizophrenia patients (y=1) and controls (y=0). Genotypes for 3 HLA loci have prefixes name (e.g., \"DQB\") and a suffix for each of two alleles (\".a1\" and \".a2\"). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/hla.csv", "filename": "hla", "name": "The HLA data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A multi-generational pedigree containing individual, father, mother IDs and sex. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/lukas.csv", "filename": "lukas", "name": "An example pedigree", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Galton (1886) presented these data in a table, showing a cross-tabulation of 928 adult children born to 205 fathers and mothers, by their height and their mid-parent's height. He visually smoothed the bivariate frequency distribution and showed that the contours formed concentric and similar ellipses, thus setting the stage for correlation, regression and the bivariate normal distribution. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Galton.csv", "filename": "Galton", "name": " Galton's data on the heights of parents and their children ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set lists the individual observations for 934 children in 205 families on which Galton (1886) based his cross-tabulation. \nIn addition to the question of the relation between heights of parents and their offspring, for which this data is mainly famous, Galton had another purpose which the data in this form allows to address: Does marriage selection indicate a relationship between the heights of husbands and wives, a topic he called assortative mating? Keen [p. 297-298](2010) provides a brief discussion of this topic. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/GaltonFamilies.csv", "filename": "GaltonFamilies", "name": " Galton's data on the heights of parents and their children, by child ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The IndianIrish data frame has 18 rows and 4 columns. The data are genotype frequencies for two locations, for Xavante Indian and Irish populations respectively ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/hwde/IndianIrish.csv", "filename": "IndianIrish", "name": "Observed genotype frequencies at MN and S loci, for 2 populations", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The twins data frame has 24 rows and 3 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/twins.csv", "filename": "twins", "name": "data from Exercise 7.14, p225", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Experiment comparing twins (one urban, one rural) ability to clear airborne radioactive particles from their lungs \nTo assess lung health, the scientists measured \"tracheobronchial clearance rate,\" that is, in English, \"How fast do your lungs get rid of nasty stuff?\" Each subject agreed to inhale an aerosol of radioactive Teflon particles. A Geiger counter held to the chest measured the radioactivity just after inhaling, and again one hour later. The clearance rate was the percentage of radioactivity remaining – the lower the better. Subjects were 15 sets of identical twins, each pair with one twin living in an urban environment and the other in a rural environment. ", + "description": "Experiment comparing twins (one urban, one rural) ability to clear airborne radioactive particles from their lungs \nTo assess lung health, the scientists measured \"tracheobronchial clearance rate,\" that is, in English, \"How fast do your lungs get rid of nasty stuff?\" Each subject agreed to inhale an aerosol of radioactive Teflon particles. A Geiger counter held to the chest measured the radioactivity just after inhaling, and again one hour later. The clearance rate was the percentage of radioactivity remaining - the lower the better. Subjects were 15 sets of identical twins, each pair with one twin living in an urban environment and the other in a rural environment. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/RadioactiveTwins.csv", "filename": "RadioactiveTwins", "name": "Comparing Twins Ability to Clear Radioactive Particles", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Experiment comparing twins (one urban, one rural) ability to clear airborne radioactive particles from their lungs \nThis dataset is from a study to compare the effect of living environment (rural or urban) on human lung function, where the researchers were able to locate seven pairs of twins with one twin in each pair living in the country, the other in a city. To measure lung function, twins inhaled an aerosol of radioactive Teflon particles. By measuring the level of radioactivity immediately and then again after an hour, the scientists could measure the rate of “tracheobronchial clearance.\" The percentage of radioactivity remaining in the lungs after an hour told how quickly subjects' lungs cleared the inhaled particles. \nThis dataset was renamed as RadioactiveTwins for the second edition. ", + "description": "Experiment comparing twins (one urban, one rural) ability to clear airborne radioactive particles from their lungs \nThis dataset is from a study to compare the effect of living environment (rural or urban) on human lung function, where the researchers were able to locate seven pairs of twins with one twin in each pair living in the country, the other in a city. To measure lung function, twins inhaled an aerosol of radioactive Teflon particles. By measuring the level of radioactivity immediately and then again after an hour, the scientists could measure the rate of \u201ctracheobronchial clearance.\" The percentage of radioactivity remaining in the lungs after an hour told how quickly subjects' lungs cleared the inhaled particles. \nThis dataset was renamed as RadioactiveTwins for the second edition. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/TwinsLungs.csv", "filename": "TwinsLungs", "name": "Comparing Twins Ability to Clear Radioactive Particles", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from a placebo controlled trial of gamma interferon in chronic granulotomous disease (CGD). Contains the data on time to serious infections observed through end of study for each patient. \nThe cgd0 data set is in the form found in the references, with one line per patient and no recoding of the variables. The cgd data set (this one) has been cast into (start, stop] format with one line per event, and covariates such as center recoded as factors to include meaningful labels. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/cgd.csv", "filename": "cgd", "name": "Chronic Granulotamous Disease data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Results of a randomized trial of rhDNase for the treatment of cystic fibrosis. \nIn patients with cystic fibrosis, extracellular DNA is released by leukocytes that accumulate in the airways in response to chronic bacterial infection. This excess DNA thickens the mucus, which then cannot be cleared from the lung by the cilia. The accumulation leads to exacerbations of respiratory symptoms and progressive deterioration of lung function. At the time of this study more than 90% of cystic fibrosis patients eventually died of lung disease. \nDeoxyribonuclease I (DNase I) is a human enzyme normally present in the mucus of human lungs that digests extracellular DNA. Genentech, Inc. cloned a highly purified recombinant DNase I (rhDNase or Pulmozyme) which when delivered to the lungs in an aerosolized form cuts extracellular DNA, reducing the viscoelasticity of airway secretions and improving clearance. In 1992 the company conducted a randomized double-blind trial comparing rhDNase to placebo. Patients were then monitored for pulmonary exacerbations, along with measures of lung volume and flow. The primary endpoint was the time until first pulmonary exacerbation; however, data on all exacerbations were collected for 169 days. \nThe definition of an exacerbation was an infection that required the use of intravenous (IV) antibiotics. Subjects had 0–5 such episodes during the trial, those with more than one have multiple rows in the data set, those with none have NA for the IV start and end times. A few subjects were infected at the time of enrollment, subject 173 for instance has a first infection interval of -21 to 7. We do not count this first infection as an \"event\", and the subject first enters the risk set at day 7. Subjects who have an event are not considered to be at risk for another event during the course of antibiotics, nor for an additional 6 days after they end. (If the symptoms reappear immediately after cessation then from a medical standpoint this would not be a new infection.) \nThis data set reproduces the data in Therneau and Grambsch, is does not exactly reproduce those in Therneau and Hamilton due to data set updates. ", + "description": "Results of a randomized trial of rhDNase for the treatment of cystic fibrosis. \nIn patients with cystic fibrosis, extracellular DNA is released by leukocytes that accumulate in the airways in response to chronic bacterial infection. This excess DNA thickens the mucus, which then cannot be cleared from the lung by the cilia. The accumulation leads to exacerbations of respiratory symptoms and progressive deterioration of lung function. At the time of this study more than 90% of cystic fibrosis patients eventually died of lung disease. \nDeoxyribonuclease I (DNase I) is a human enzyme normally present in the mucus of human lungs that digests extracellular DNA. Genentech, Inc. cloned a highly purified recombinant DNase I (rhDNase or Pulmozyme) which when delivered to the lungs in an aerosolized form cuts extracellular DNA, reducing the viscoelasticity of airway secretions and improving clearance. In 1992 the company conducted a randomized double-blind trial comparing rhDNase to placebo. Patients were then monitored for pulmonary exacerbations, along with measures of lung volume and flow. The primary endpoint was the time until first pulmonary exacerbation; however, data on all exacerbations were collected for 169 days. \nThe definition of an exacerbation was an infection that required the use of intravenous (IV) antibiotics. Subjects had 0-5 such episodes during the trial, those with more than one have multiple rows in the data set, those with none have NA for the IV start and end times. A few subjects were infected at the time of enrollment, subject 173 for instance has a first infection interval of -21 to 7. We do not count this first infection as an \"event\", and the subject first enters the risk set at day 7. Subjects who have an event are not considered to be at risk for another event during the course of antibiotics, nor for an additional 6 days after they end. (If the symptoms reappear immediately after cessation then from a medical standpoint this would not be a new infection.) \nThis data set reproduces the data in Therneau and Grambsch, is does not exactly reproduce those in Therneau and Hamilton due to data set updates. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/rhDNase.csv", "filename": "rhDNase", "name": "rhDNASE data set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Genetics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "neuro is a matrix containing times of observed firing of a neuron in windows of 250ms either side of the application of a stimulus to a human subject. Each row of the matrix is a replication of the experiment and there were a total of 469 replicates. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/neuro.csv", "filename": "neuro", "name": "Neurophysiological Point Process Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Friendly data frame has 30 rows and 2 columns. The data are from an experiment on subjects' ability to remember words based on the presentation format. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Friendly.csv", "filename": "Friendly", "name": "Format Effects on Recall", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The head.injury data frame has 3121 rows and 11 columns. The data were simulated according to a simple logistic regression model to match roughly the clinical characteristics of a sample of individuals who suffered minor head injuries. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/head.injury.csv", "filename": "headinjury", "name": "Minor Head Injury (Simulated) Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The headInjury data frame has 3121 rows and 11 columns. The data were simulated according to a simple logistic regression model to match roughly the clinical characteristics of a sample of individuals who suffered minor head injuries. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/headInjury.csv", "filename": "headInjury", "name": "Minor Head Injury (Simulated) Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains APOE/APOC1 markers and Chinese Alzheimer's patients and controls. Variable id is subject id and y takes value 0 for controls and 2 for Alzheimer's. \nThe last six variables are age, sex and genotypes for APOE and APOC with suffixes for each of two alleles (\".a1\" and \".a2\"). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/apoeapoc.csv", "filename": "apoeapoc", "name": "APOE/APOC1 markers and Alzheimer's", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The markers are both with actual allele sizes and allele numbers. The dataset is distributed with the GENECOUNTING version 2.0 illustrating gene counting method involving chromosome X. A total of 183 patients and 157 controls (150 males, 190 females) were available, together with five markers in MAOA (monoamine oxidase A) region with alleles 12, 9, 6, 5, 3, and the first three markers were genotyped in all individuals while the fourth and fifth were genotyped for 294 and 304 individuals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/mao.csv", "filename": "mao", "name": "A study of Parkinson's disease and MAO gene", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a study of the neprilysin gene and sporadic Alzheimer's disease in Chinese. There are 257 cases and 242 controls, each with eight SNPs detecting through denaturing high-performance liquid chromatography (DHPLC). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/nep499.csv", "filename": "nep499", "name": "A study of Alzheimer's disease with eight SNPs and APOE", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study of Parkinson's disease and controls with APOE, LRRK2 markers rs10506151, rs10784486, rs1365763, rs1388598, rs1491938, rs1491941 and SNCA markers m770, int4 and SNCA. The column abc indicates if a subject is familial Parkinson's (+), sporadic (-), or controls (Control). Races involved are American Indians (AI), African American (B), and the rest are Caucasians. Diagnosis also included possible (POS), probable (PRO) and definite PDs. AON is the age at onset. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/PD.csv", "filename": "PD", "name": "A study of Parkinson's disease and APOE, LRRK2, SNCA makers", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The seizure data frame has 59 rows and 7 columns. The dataset has the number of epiliptic seizures in each of four two-week intervals, and in a baseline eight-week inverval, for treatment and control groups with a total of 59 individuals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/seizure.csv", "filename": "seizure", "name": "Epiliptic Seizures", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A randomised clinical trial investigating the effect of an anti-epileptic drug. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/epilepsy.csv", "filename": "epilepsy", "name": " Epilepsy Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on sex differences in the age of onset of schizophrenia. \nA sex difference in the age of onset of schizophrenia was noted by Kraepelin (1919). Subsequently epidemiological studies of the disorder have consistently shown an earlier onset in men than in women. One model that has been suggested to explain this observed difference is know as the subtype model which postulates two type of schizophrenia, one characterised by early onset, typical symptoms and poor premorbid competence, and the other by late onset, atypical symptoms, and good premorbid competence. The early onset type is assumed to be largely a disorder of men and the late onset largely a disorder of women. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/schizophrenia.csv", "filename": "schizophrenia", "name": " Age of Onset of Schizophrenia Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Though disorder and early onset of schizophrenia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/schizophrenia2.csv", "filename": "schizophrenia2", "name": " Schizophrenia Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Thall and Vail (1990) give a data set on two-week seizure counts for 59 epileptics. The number of seizures was recorded for a baseline period of 8 weeks, and then patients were randomly assigned to a treatment group or a control group. Counts were then recorded for four successive two-week periods. The subject's age is the only covariate. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/epil.csv", "filename": "epil", "name": " Seizure Counts for Epileptics ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a clinical trial of 59 patients with epilepsy (Breslow, 1996) in order to illustrate diagnostic techniques in Poisson regression. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/epilepsy.csv", "filename": "epilepsy_", "name": "Epilepsy Attacks Data Set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The kyphosis data frame has 81 rows and 4 columns. representing data on children who have had corrective spinal surgery ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/rpart/kyphosis.csv", "filename": "kyphosis", "name": "Data on Children who have had Corrective Spinal Surgery", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Brain tissue pH at time of death ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BrainpH.csv", "filename": "BrainpH", "name": "Brain pH Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dementia study comparing two groups of patients \nBrain MRIs were used to study the brains of patients with Dementia with Lewy Bodies, some of whom also were diagnosed with Alzheimer's Disease. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LewyBody2Groups.csv", "filename": "LewyBody2Groups", "name": "Lewy Bodies and Dimentia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dementia Study with Lewy Bodies \nBrain MRIs were used to study the brains of patients with Dementia with Lewy Bodies. These are the cases that were also diagnosed with Alzheimer's Disease. This is a subset of LewBody2Groups ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LewyDLBad.csv", "filename": "LewyDLBad", "name": "Lewy Bodies and Dimentia with Alzheimer's", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study investigated whether a handheld device that sends a magnetic pulse into a person's head might be an effective treatment for migraine headaches. Researchers recruited 200 subjects who suffered from migraines and randomly assigned them to receive either the TMS (transcranial magnetic stimulation) treatment or a sham (placebo) treatment from a device that did not deliver any stimulation. Subjects were instructed to apply the device at the onset of migraine symptoms and then assess how they felt two hours later. This dataset is a two-way table of the results. ", "url": "http://vincentarelbundock.github.io/Rdatasets/doc/Stat2Data/Migraines.html", "filename": "Migraines", "name": "Migraines and TMS", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A study investigated whether a handheld device that sends a magnetic pulse into a person's head might be an effective treatment for migraine headaches. Researchers recruited 200 subjects who suffered from migraines and randomly assigned them to receive either the TMS (transcranial magnetic stimulation) treatment or a sham (placebo) treatment from a device that did not deliver any stimulation. Subjects were instructed to apply the device at the onset of migraine symptoms and then assess how they felt two hours later. This dataset is a two-way table of the results. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/TMS.csv", "filename": "TMS", "name": "Effects of transcranial magnetic stimulation (TMS) on migraine headaches ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Neurology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Cowles data frame has 1421 rows and 4 columns. These data come from a study of the personality determinants of volunteering for psychological research. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Cowles.csv", "filename": "Cowles", "name": "Cowles and Davis's Data on Volunteering", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Ginzberg data frame has 82 rows and 6 columns. The data are for psychiatric patients hospitalized for depression. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Ginzberg.csv", "filename": "Ginzberg", "name": "Data on Depression", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set illustrates analyis of a multifactor observational study, with response given by subject's score on a vocabulary test, and factors for age group, education level, natality status, gender and year of the survey. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/GSSvocab.csv", "filename": "GSSvocab", "name": " Data from the General Social Survey (GSS) from the National Opinion Research Center of the University of Chicago. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Guyer data frame has 20 rows and 3 columns. The data are from an experiment in which four-person groups played a prisoner's dilemma game for 30 trails, each person making either a cooperative or competitive choice on each trial. Choices were made either anonymously or in public; groups were composed either of females or of males. The observations are 20 groups. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Guyer.csv", "filename": "Guyer", "name": "Anonymity and Cooperation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Moore data frame has 45 rows and 4 columns. The data are for subjects in a social-psychological experiment, who were faced with manipulated disagreement from a partner of either of low or high status. The subjects could either conform to the partner's judgment or stick with their own judgment. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Moore.csv", "filename": "Moore", "name": "Status, Authoritarianism, and Conformity", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Wong data frame has 331 row and 7 columns. The observations are longitudinal data on recovery of IQ after comas of varying duration for 200 subjects.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Wong.csv", "filename": "Wong", "name": " Post-Coma Recovery of IQ ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Six tests were given to 112 individuals. The covariance matrix is given in this object. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv", "filename": "abilityCov", "name": "Ability and Intelligence Tests", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A correlation matrix of 24 psychological tests given to 145 seventh and eight-grade children in a Chicago suburb by Holzinger and Swineford. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Harman74.cor.csv", "filename": "Harman74cor", "name": "Harman Example 7.4", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cushny and Peebles (1905) studied the effects of hydrobromides related to scopolamine and atropine in producing sleep. The sleep of mental patients was measured without hypnotic (Control) and after treatment with one of three drugs: L. hyoscyamine hydrobromide (L_hyoscyamine), L. hyoscine hydrobromide (L_hyoscyine), and a mixture (racemic) form, DL_hyoscine, called atropine. The L (levo) and D (detro) form of a given molecule are optical isomers (mirror images). \nThe drugs were given on alternate evenings, and the hours of sleep were compared with the intervening control night. Each of the drugs was tested in this manner a varying number of times in each subject. The average number of hours of sleep for each treatment is the response. \nStudent (1908) used these data to illustrate the paired-sample t-test in small samples, testing the hypothesis that the mean difference between a given drug and the control condition was zero. This data set became well known when used by Fisher (1925). Both Student and Fisher had problems labeling the drugs correctly (see Senn & Richardson (1994)), and consequently came to wrong conclusions. \nBut as well, the sample sizes (number of nights) for each mean differed widely, ranging from 3-9, and this was not taken into account in their analyses. To allow weighted analyses, the number of observations for each mean is contained in the data frame CushnyPeeblesN. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/CushnyPeebles.csv", "filename": "CushnyPeebles", "name": " Cushny-Peebles Data: Soporific Effects of Scopolamine Derivatives ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In a remarkable brief note in Nature, 1871, W. Stanley Jevons described the results of an experiment he had conducted on himself to determine the limits of the number of objects an observer could comprehend immediately without counting them. This was an important philosophical question: How many objects can the mind embrace at once? \nHe carried out 1027 trials in which he tossed an \"uncertain number\" of uniform black beans into a box and immediately attempted to estimate the number \"without the least hesitation\". His questions, procedure and analysis anticipated by 75 years one of the most influential papers in modern cognitive psychology by George Miller (1956), \"The magical number 7 plus or minus 2: Some limits on ...\" For Jevons, the magical number was 4.5, representing an empirical law of complete accuracy. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Jevons.csv", "filename": "Jevons", "name": " W. Stanley Jevons' data on numerical discrimination ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an psychiatric screening questionnaire ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/GHQ.csv", "filename": "GHQ", "name": " General Health Questionnaire ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The framing data contains 265 rows and 15 columns of data from a framing experiment conducted by Brader, Valentino and Suhay (2008). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mediation/framing.csv", "filename": "framing", "name": "Brader, Valentino and Suhay (2008) Framing Experiment Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A recurring question in the study of affect is the proper dimensionality and the relationship to various personality dimensions. Here is a data set taken from two studies of mood and arousal using movies to induce affective states. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/affect.csv", "filename": "affect", "name": "Two data sets of affect and arousal scores as a function of personality and movie conditions ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "25 personality self report items taken from the International Personality Item Pool (ipip.ori.org) were included as part of the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project. The data from 2800 subjects are included here as a demonstration set for scale construction, factor analysis, and Item Response Theory analysis. Three additional demographic variables (sex, education, and age) are also included. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/bfi.csv", "filename": "bfi", "name": "25 Personality items representing 5 factors", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cyril Burt reported an early factor analysis with a circumplex structure of 11 emotional variables in 1915. 8 of these were subsequently used by Harman in his text on factor analysis. Unfortunately, it seems as if Burt made a mistake for the matrix is not positive definite. With one change from .87 to .81 the matrix is positive definite. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/burt.csv", "filename": "burt", "name": "11 emotional variables from Burt (1915)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rindskopf and Rose (1988) use this data set to demonstrate confirmatory second order factor models. It is a nice example data set to explore hierarchical structure and alternative factor solutions. It contains measures of fluid and crystallized intelligence. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/cattell.csv", "filename": "cattell", "name": "12 cognitive variables from Cattell (1963) ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dwyer (1937) introduced a technique for factor extension and used 8 cognitive variables from Thurstone. This is the example data set used in his paper. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Dwyer.csv", "filename": "Dwyer", "name": "8 cognitive variables used by Dwyer for an example. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The EPI is and has been a very frequently administered personality test with 57 measuring two broad dimensions, Extraversion-Introversion and Stability-Neuroticism, with an additional Lie scale. Developed by Eysenck and Eysenck, 1964. Eventually replaced with the EPQ which measures three broad dimensions. This data set represents 3570 observations collected in the early 1990s at the Personality, Motivation and Cognition lab at Northwestern. An additional data set (epiR) has test and retest information for 474 participants. The data are included here as demonstration of scale construction and test-retest reliability. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/epi.csv", "filename": "epi", "name": "Eysenck Personality Inventory (EPI) data for 3570 participants", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A small data set of 5 scales from the Eysenck Personality Inventory, 5 from a Big 5 inventory, a Beck Depression Inventory, and State and Trait Anxiety measures. Used for demonstrations of correlations, regressions, graphic displays. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/epi.bfi.csv", "filename": "epi_bfi", "name": "13 personality scales from the Eysenck Personality Inventory and Big 5 inventory", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The EPI is and has been a very frequently administered personality test with 57 measuring two broad dimensions, Extraversion-Introversion and Stability-Neuroticism, with an additional Lie scale. Developed by Eysenck and Eysenck, 1964. Eventually replaced with the EPQ which measures three broad dimensions. This data set represents 3570 observations collected in the early 1990s at the Personality, Motivation and Cognition lab at Northwestern. An additional data set (epiR) has test and retest information for 474 participants. The data are included here as demonstration of scale construction and test-retest reliability. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/epi.dictionary.csv", "filename": "epi_", "name": "Eysenck Personality Inventory (EPI) data for 3570 participants", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Five classic data sets reported by Harman (1967) are 9 psychological (cognitive) variables taken from Holzinger and 8 emotional variables taken from Burt. Two others are socioeconomic and political data sets. Additionally, 8 physical variables. All five of these are used for tests and demonstrations of various factoring algortithms. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Harman.5.csv", "filename": "Harman", "name": "Five data sets from Harman (1967). 9 cognitive variables from Holzinger and 8 emotional variables from BurtFive data sets from Harman (1967). 9 cognitive variables from Holzinger and 8 emotional variables from Burt", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Five classic data sets reported by Harman (1967) are 9 psychological (cognitive) variables taken from Holzinger and 8 emotional variables taken from Burt. Two others are socioeconomic and political data sets. Additionally, 8 physical variables. All five of these are used for tests and demonstrations of various factoring algortithms. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Harman.8.csv", "filename": "Harman_8", "name": "Five data sets from Harman (1967). 9 cognitive variables from Holzinger and 8 emotional variables from Burt", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Five classic data sets reported by Harman (1967) are 9 psychological (cognitive) variables taken from Holzinger and 8 emotional variables taken from Burt. Two others are socioeconomic and political data sets. Additionally, 8 physical variables. All five of these are used for tests and demonstrations of various factoring algortithms. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Harman.political.csv", "filename": "Harman_political", "name": "Five data sets from Harman (1967). 9 cognitive variables from Holzinger and 8 emotional variables from Burt", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "16 multiple choice ability items taken from the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project. The data from 1525 subjects are included here as a demonstration set for scoring multiple choice inventories and doing basic item statistics. For more information on the development of an open source measure of cognitive ability, consult the readings available at the personality-project.org. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/iqitems.csv", "filename": "iqitems", "name": "16 multiple choice IQ items", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Emotions may be described either as discrete emotions or in dimensional terms. The Motivational State Questionnaire (MSQ) was developed to study emotions in laboratory and field settings. The data can be well described in terms of a two dimensional solution of energy vs tiredness and tension versus calmness. Additional items include what time of day the data were collected and a few personality questionnaire scores. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/msq.csv", "filename": "msq", "name": "75 mood items from the Motivational State Questionnaire for 3896 participantsv", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Emotions may be described either as discrete emotions or in dimensional terms. The Motivational State Questionnaire (MSQ) was developed to study emotions in laboratory and field settings. The data can be well described in terms of a two dimensional solution of energy vs tiredness and tension versus calmness. Alternatively, this space can be organized by the two dimensions of Positive Affect and Negative Affect. Additional items include what time of day the data were collected and a few personality questionnaire scores. 3032 unique participants took the MSQ at least once, 2753 at least twice, 446 three times, and 181 four times. The 3032 participants also took the sai state anxiety inventory at the same time. Some studies manipulated arousal by caffeine, others manipulations included affect inducing movies. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/msqR.csv", "filename": "msqR", "name": "75 mood items from the Motivational State Questionnaire for 3032 unique participants", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The NEO.PI.R is a widely used personality test to assess 5 broad factors (Neuroticism, Extraversion, Openness, Agreeableness and Conscientiousness) with six facet scales for each factor. The correlation matrix of the facets is reported in the NEO.PI.R manual for 1000 subjects. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/neo.csv", "filename": "neo", "name": "NEO correlation matrix from the NEO_PI_R manual", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "State Anxiety was measured two-three times in 11 studies at the Personality-Motivation-Cognition laboratory. Here are item responses for 11 studies (9 repeated twice, 2 repeated three times). In all studies, the first occasion was before a manipulation. In some studies, caffeine, or movies or incentives were then given to some of the participants before the second and third STAI was given. In addition, Trait measures are available and included in the tai data set (3032 subjects). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/sai.csv", "filename": "sai", "name": "State Anxiety data from the PMC lab over multiple occasions. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "State Anxiety was measured two-three times in 11 studies at the Personality-Motivation-Cognition laboratory. Here are item responses for 11 studies (9 repeated twice, 2 repeated three times). In all studies, the first occasion was before a manipulation. In some studies, caffeine, or movies or incentives were then given to some of the participants before the second and third STAI was given. In addition, Trait measures are available and included in the tai data set (3032 subjects). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/sai.dictionary.csv", "filename": "sai_dictionary", "name": "State Anxiety data from the PMC lab over multiple occasions. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Self reported scores on the SAT Verbal, SAT Quantitative and ACT were collected as part of the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project. Age, gender, and education are also reported. The data from 700 subjects are included here as a demonstration set for correlation and analysis. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/sat.act.csv", "filename": "sat_act", "name": "3 Measures of ability: SATV, SATQ, ACT", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The SPI (SAPA Personality Inventory) is a set of 135 items primarily selected from International Personality Item Pool (ipip.ori.org). This is an example data set collected using SAPA procedures the sapa-project.org web site. This data set includes 10 demographic variables as well. The data set with 4000 observations on 145 variables may be used for examples in scale construction and validation, as well as empirical scale construction to predict multiple criteria. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/spi.csv", "filename": "spi_", "name": "A sample from the SAPA Personality Inventory including an item dictionary and scoring keys.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Nurit Tal-Or, Jonanathan Cohen, Yariv Tasfati, and Albert Gunther (2010) examined the presumed effect of media on other people and change in attitudes. This data set is from Study 2, and examined the effect of presumed influence of the media upon subsequent actions. It is used as an example of mediation by Hayes (2013) and for the mediate function. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Tal_Or.csv", "filename": "Tal_Or", "name": " Data set testing causal direction in presumed media influence", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Nurit Tal-Or, Jonanathan Cohen, Yariv Tasfati, and Albert Gunther (2010) examined the presumed effect of media on other people and change in attitudes. This data set is from Study 2, and examined the effect of presumed influence of the media upon subsequent actions. It is used as an example of mediation by Hayes (2013) and for the mediate function. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Tal.Or.csv", "filename": "Tal__Or", "name": " Data set testing causal direction in presumed media influence", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tucker and Lewis (1973) introduced a reliability coefficient for ML factor analysis. Their example data set was previously reported by Tucker (1958) and taken from Thurstone and Thurstone (1941). The correlation matrix is a 9 x 9 for 710 subjects and has two correlated factors of ability: Word Fluency and Verbal. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Tucker.csv", "filename": "Tucker", "name": " 9 Cognitive variables discussed by Tucker and Lewis (1973) ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Amyloid-beta and cognitive impairment for a sample of Catholic priests ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Amyloid.csv", "filename": "Amyloid", "name": "Amyloid-beta and Cognitive Impairment", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment on reaction times to audio or visual stimuli by Oberlin College students. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/AudioVisual.csv", "filename": "AudioVisual", "name": "Reaction Times to Audio and Visual Stimuli", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Age at first speaking and aptitude test scores ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ChildSpeaks.csv", "filename": "ChildSpeaks", "name": "Age at First Speaking", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment relating pupil dilation to sexual orientation. \nDilateDiff is, essentially, the difference in pupil dilation when looking at (a) same-sex nudes and (b) opposite-sex nude photographs. More specifically, multiple measurements of pupil size were taken under each of the two conditions, together with a third condition that involved a neutral stimulus. Within-subject z-scores were then computed, which led to the DilateDiff numbers used here. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Eyes.csv", "filename": "Eyes", "name": "Pupil Dilation and Sexual Orientation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ratings from a facial photo and actual faithfulness. \\\nCollege students were asked to look at a photograph of an opposite-sex adult face and to rate the person, on a scale from 1 (low) to 10 (high), for attractiveness. They were also asked to rate trustworthiness, faithfulness, and sexual dimorphism (i.e., how masculine a male face is and how feminine a female face is). Overall, 68 students (34 males and 34 females) rated 170 faces (88 men and 82 women). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FaithfulFaces.csv", "filename": "FaithfulFaces", "name": "Faithfulness from a Photo?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survey data to see if subjects can guess author's sex from handwriting specimens ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Handwriting.csv", "filename": "Handwriting", "name": "Guess Author's Sex from Handwriting?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "True IQ and guessed IQ (from a photo) for 40 women \nOne hundred sixty raters (75 men and 85 women) took part in judging intelligence (on a 1=high to 7=low scale) based on photographs of students. The ratings were converted to z-scores and then put on an IQ scale to compare to actual measured IQ. There were photos of 80 students, 40 men and 40 women. This data set contains data for the 40 women. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/IQGuessing.csv", "filename": "IQGuessing", "name": "Guess IQ from a Photo?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Admissions to a mental health emergency room and full moons \nSome researchers in the early 1970s set out to study whether there is a \"full-moon\" effect on emergency room admissions at a mental health hospital. They separated the data over 12 months into rates before the full moon (mean number of patients seen 4-13 days before the full moon), during the full moon (the number of patients seen on the full moon day), and after the full moon (mean number of patients seen 4-13 days after the full moon). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MentalHealth.csv", "filename": "MentalHealth", "name": "Mental Health Admissions", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Attitudes towards ethics of a famous Milgram experiment \nOne of the most famous and most disturbing psychological studies of the twentieth century took place in the laboratory of Stanley Milgram at Yale University. Milgram's subjects were asked to monitor the answers of a \"learner\" and to push a button to deliver shocks whenever the learner gave a wrong answer. The more wrong answers, the more powerful the shock. Even Milgram himself was surprised by the results: Every one of his subjects ended up delivering what they thought was a dangerous 300-volt shock to a slow \"learner\" as punishment for repeated wrong answers. \n\nEven though the \"shocks\" were not real and the \"learner\" was in on the secret, the results triggered a hot debate about ethics and experiments with human subjects. To study attitudes on this issue, Harvard graduate student Maryann de Mateo conducted a randomized comparative experiment. Her subjects were 37 high school teachers who did not know about the Milgram study. Using chance, Maryann assigned each teacher to one of three treatment groups:\n\nGroup 1: Actual results. Each subject in this group read a description of Milgram's study, including the actual results that every subject delivered the highest possible \"shock.\" \n\nGroup 2: Many complied. Each subject read the same description given to the subjects in Group 1, except that the actual results were replaced by fake results, that many but not all subjects complied. \n\nGroup 3. Most refused. For subjects in this group, the fake results said that most subjects refused to comply. \n\nAfter reading the description, each subject was asked to rate the study according to how ethical they thought it was, from 1 (not at all ethical) to 9 (completely ethical.) ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Milgram.csv", "filename": "Milgram", "name": "Ethics and a Milgram Experiment", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Estimates of 45 seconds with different music playing \nParticipants were asked to judge when 45 seconds had passed in silence (control), while listening to an upbeat song (Metropolis, by David Guetta and Nicky Romero), and while listening to a calm song (Bach's Das Wohltemperierte Klavier, Prelude in C Major). The order in which the three conditions were experienced was randomized for each participant. Time until subject guessed 45 seconds had elapsed (TimeGuess) and the magnitude of the difference from 45 (Accuracy) were recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MusicTime.csv", "filename": "MusicTime", "name": "Estimating Time with Different Music Playing", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Effect of a waiter leaving a joke or an advertisement on getting a tip \nCan telling a joke affect whether or not a waiter in a coffee bar receives a tip from a customer? A study investigated this question at a coffee bar at a famous resort on the west coast of France. The waiter randomly assigned coffee-ordering customers to one of three groups: When receiving the bill one group also received a card telling a joke, another group received a card containing an advertisement for a local restaurant, and a third group received no card at all. He recorded whether or not each customer left a tip. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/TipJoke.csv", "filename": "TipJoke", "name": "Improve Chances of Getting a Tip?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ratings of an OCD symptom in psychotherapy sessions \nA patient had been diagnosed with OCD (obsessive/compulsive disorder) and underwent a series of psychotherapy sessions. Notes from the sessions were presented to three different experienced therapists who rated sessions with a particular OCD symptom (defense of undoing) on a 1 to 4 scale (smaller values indicating worse symptoms). If all three judges agreed on the stage of a session, that determined the category. Otherwise, they discussed until they reached a consensus. The sessions were also grouped into six groups with I being the earliest sessions and VI being the latest. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Undoing.csv", "filename": "Undoing", "name": "Defense of Undoing OCD Symptoms in Psychotherapy", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiment to compare visual and verbal performance \nSubjects carried out two kinds of tasks, one visual (identify letters), one verbal (identify sentences); and to report the results in either of two ways, one visual (pointing at a response), one verbal (speaking a response). Time to complete each task was recorded in seconds. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/VisualVerbal.csv", "filename": "VisualVerbal", "name": "Visual versus Verbal Performance", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Hout et al. (1987) given by Agresti (1990) summarizing the responses of married couples to the questionnaire item: Sex is fun for me and my partner: (a) never or occasionally, (b) fairly often, (c) very often, (d) almost always. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/SexualFun.csv", "filename": "SexualFun", "name": "Sex is Fun", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Psychology" } ] }, { "name": "Nature", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set is part of a long study into body temperature regulation in beavers. Four adult female beavers were live-trapped and had a temperature-sensitive radio transmitter surgically implanted. Readings were taken every 10 minutes. The location of the beaver was also recorded and her activity level was dichotomized by whether she was in the retreat or outside of it since high-intensity activities only occur outside of the retreat. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/beaver.csv", "filename": "beaver", "name": "Beaver Body Temperature Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "144 adult (over 2kg in weight) cats used for experiments with the drug digitalis had their heart and body weight recorded. 47 of the cats were female and 97 were male. The catsM data frame consists of the data for the male cats. The full data are in dataset cats in package MASS.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/catsM.csv", "filename": "catsM", "name": "Weight Data for Domestic Cats", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the cardiac oxygen consumption and left ventricular pressure were gathered on 7 domestic dogs.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/dogs.csv", "filename": "dogs", "name": "Cardiac Data for Domestic Dogs ", "number_format": 0, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Each row of the data frame represents a male duck who is a second generation cross of mallard and pintail ducks. For 11 such ducks a behavioural and plumage index were calculated. These were measured on scales devised for this experiment which was to examine whether there was any link between which species the ducks resembled physically and which they resembled in behaviour. The scale for the physical appearance ranged from 0 (identical in appearance to a mallard) to 20 (identical to a pintail). The behavioural traits of the ducks were on a scale from 0 to 15 with lower numbers indicating closer to mallard-like in behaviour. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/ducks.csv", "filename": "ducks", "name": "Behavioral and Plumage Characteristics of Hybrid Ducks ", "number_format": 0, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sections were prepared from the brain of adult guinea pigs. Spontaneous currents that flowed into individual brain cells were then recorded and the peak amplitude of each current measured. The aim of the experiment was to see if the current flow was quantal in nature (i.e. that it is not a single burst but instead is built up of many smaller bursts of current). If the current was indeed quantal then it would be expected that the distribution of the current amplitude would be multimodal with modes at regular intervals. The modes would be expected to decrease in magnitude for higher current amplitudes. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/paulsen.csv", "filename": "paulsen", "name": " Neurotransmission in Guinea Pig Brains ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data form a 3x4 factorial experiment, the factors being three poisons and four treatments. Each combination of the two factors was used for four animals, the allocation to animals having been completely randomized.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/poisons.csv", "filename": "poisons", "name": "Animal Survival Times", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data measured the survival percentages of batches of rats who were given varying doses of radiation. At each of 6 doses there were two or three replications of the experiment. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/survival.csv", "filename": "survival", "name": "Survival of Rats after Radiation Doses ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data come from an aerial line transect survey of Southern Bluefin Tuna in the Great Australian Bight. An aircraft with two spotters on board flies randomly allocated line transects. Each school of tuna sighted is counted and its perpendicular distance from the transect measured. The survey was conducted in summer when tuna tend to stay on the surface. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/tuna.csv", "filename": "tuna", "name": " Tuna Sighting Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Wolf depredations of livestock on Minnesota farms, 1976-1998. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Depredations.csv", "filename": "Depredations", "name": " Minnesota Wolf Depredation Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a three-factor experiment with each factor at three levels, for a total of 27 runs. Samples of worsted yarn were with different levels of the three factors were given a cyclic load until the sample failed. The goal is to understand how cycles to failure depends on the factors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Wool.csv", "filename": "Wool", "name": "Wool data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set considers 6 binary attributes for 20 animals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/animals.csv", "filename": "animals", "name": "Attributes of Animals", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The fishing data is adapted from Zuur, Hilbe and Ieno (2013) to determine whether the data appears to be generated from more than one generating mechanism. The data are originally adapted from Bailey et al. (2008) who were interested in how certain deep-sea fish populations were impacted when commercial fishing began in locations with deeper water than in previous years. Given that there are 147 sites that were researched, the model is of (1) the total number of fish counted per site (totabund); ( 2) on the mean water depth per site (meandepth); (3) adjusted by the area of the site (sweptarea); (4) the log of which is the model offset. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/fishing.csv", "filename": "fishing", "name": "fishing", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Squirrel data set (nuts) from Zuur, Hilbe, and Ieno (2013). As originally reported by Flaherty et al (2012), researchers recorded information about squirrel behavior and forest attributes across various plots in Scotland's Abernathy Forest. The study focused on the following variables. response cones number of cones stripped by red squirrels per plot predictor sntrees standardized number of trees per plot sheight standardized mean tree height per plot scover standardized percentage of canopy cover per plot The stripped cone count was only taken when the mean diameter of trees was under 0.6m (dbh). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/nuts.csv", "filename": "nuts", "name": "nuts", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Numbers of aberrant crypt foci (ACF) in the section 1 of the colons of 22 rats subjected to a single dose of the carcinogen azoxymethane (AOM), sacrificed at 3 different times. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/ACF1.csv", "filename": "ACF1", "name": "Aberrant Crypt Foci in Rat Colons", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The cfseal data frame has 30 rows and 11 columns consisting of weight measurements for various organs taken from 30 Cape Fur Seals that died as an unintended consequence of commercial fishing. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cfseal.csv", "filename": "cfseal", "name": "Cape Fur Seal Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from trials that studied the mortality response of codling moth to fumigation with methyl bromide. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/codling.csv", "filename": "codling", "name": "Dose-mortality data, for fumigation of codling moth with methyl bromide", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data compare mean length, mean breadth, and egg color, between cuckoos and their hosts. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cuckoohosts.csv", "filename": "cuckoohosts", "name": "Comparison of cuckoo eggs with host eggs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Length and breadth measurements of 120 eggs lain in the nests of six different species of host bird. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cuckoos.csv", "filename": "cuckoos", "name": "Cuckoo Eggs Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data record, for each of 2000 administrative regions, whether or not dengue was recorded at any time between 1961 and 1990. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/dengue.csv", "filename": "dengue", "name": "Dengue prevalence, by administrative region", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The fossum data frame consists of nine morphometric measurements on each of 43 female mountain brushtail possums, trapped at seven sites from Southern Victoria to central Queensland. This is a subset of the possum data frame. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/fossum.csv", "filename": "fossum", "name": "Female Possum Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The frogs data frame has 212 rows and 11 columns. The data are on the distribution of the Southern Corroboree frog, which occurs in the Snowy Mountains area of New South Wales, Australia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/frogs.csv", "filename": "frogs", "name": "Frogs Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The kiwishade data frame has 48 rows and 4 columns. The data are from a designed experiment that compared different kiwifruit shading treatments. There are four vines in each plot, and four plots (one for each of four treatments: none, Aug2Dec, Dec2Feb, and Feb2May) in each of three blocks (locations: west, north, east). Each plot has the same number of vines, each block has the same number of plots, with each treatment occurring the same number of times. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/kiwishade.csv", "filename": "kiwishade", "name": "Kiwi Shading Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the body and brain weights of 20 mice, together with the size of the litter. Two mice were taken from each litter size. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/litters.csv", "filename": "litters", "name": "Mouse Litters", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The lung vector consists of weight measurements of lungs taken from 30 Cape Fur Seals that died as an unintended consequence of commercial fishing. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/lung.csv", "filename": "lung", "name": "Cape Fur Seal Lung Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The milk data frame has 17 rows and 2 columns. Each of 17 panelists compared two milk samples for sweetness. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/milk.csv", "filename": "milk", "name": "Milk Sweetness Study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The moths data frame has 41 rows and 4 columns. These data are from a study of the effect of habitat on the densities of two species of moth (A and P). Transects were set across the search area. Within transects, sections were identified according to habitat type. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/moths.csv", "filename": "moths", "name": "Moths Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The possum data frame consists of nine morphometric measurements on each of 104 mountain brushtail possums, trapped at seven sites from Southern Victoria to central Queensland. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/possum.csv", "filename": "possum", "name": "Possum Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The possumsites data frame consists of Longitudes, Latitudes, and altitudes for the seven sites from Southern Victoria to central Queensland where the possum observations were made. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/possumsites.csv", "filename": "possumsites", "name": "Possum Sites", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A subset of Animals data frame from the MASS library. It contains the average body and brain measurements of five primates. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/primates.csv", "filename": "primates", "name": "Primate Body and Brain Weights", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ChickWeight data frame has 578 rows and 4 columns from an experiment on the effect of diet on early growth of chicks. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/ChickWeight.csv", "filename": "ChickWeight", "name": "Weight versus age of chicks on different diets", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment was conducted to measure and compare the effectiveness of various feed supplements on the growth rate of chickens. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/chickwts.csv", "filename": "chickwts", "name": "Chicken Weights by Feed Type", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The DNase data frame has 176 rows and 3 columns of data obtained during development of an ELISA assay for the recombinant protein DNase in rat serum. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/DNase.csv", "filename": "DNase", "name": "Elisa assay of DNase", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The counts of insects in agricultural experimental units treated with different insecticides. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/InsectSprays.csv", "filename": "InsectSprays", "name": "Effectiveness of Insect Sprays", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Annual numbers of lynx trappings for 1821–1934 in Canada. Taken from Brockwell & Davis (1991), this appears to be the series considered by Campbell & Walker (1977). ", + "description": "Annual numbers of lynx trappings for 1821-1934 in Canada. Taken from Brockwell & Davis (1991), this appears to be the series considered by Campbell & Walker (1977). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/lynx.csv", "filename": "lynx", - "name": "Annual Canadian Lynx trappings 1821–1934", - "number_format": 31, - "remove_quotes": true, + "name": "Annual Canadian Lynx trappings 1821-1934", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment was conducted to assess the potency of various constituents of orchard sprays in repelling honeybees, using a Latin square design. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/OrchardSprays.csv", "filename": "OrchardSprays", "name": "Potency of Orchard Sprays", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The response is the length of odontoblasts (cells responsible for tooth growth) in 60 guinea pigs. Each animal received one of three dose levels of vitamin C (0.5, 1, and 2 mg/day) by one of two delivery methods, orange juice or ascorbic acid (a form of vitamin C and coded as VC). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/ToothGrowth.csv", "filename": "ToothGrowth", "name": "The Effect of Vitamin C on Tooth Growth in Guinea Pigs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The number of immobile daphnids –in contrast to mobile daphnids– out of a total of 20 daphnids was counted for several concentrations of a toxic substance. ", + "description": "The number of immobile daphnids -in contrast to mobile daphnids- out of a total of 20 daphnids was counted for several concentrations of a toxic substance. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/daphnids.csv", "filename": "daphnids", "name": "Daphnia test", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset was obtained from a toxicity test using earthworms, and it contains the number of earthworms remaining in a container that was contaminated with a toxic substance (not disclosed) at various doses; so the number of earthworms not migrating to the neighbouring uncontaminated container. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/earthworms.csv", "filename": "earthworms", "name": "Earthworm toxicity test", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For three days, moths of the tobacco budworm (Heliothis virescens) were exposed to doses of the pyrethroid trans-cypermethrin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/H.virescens.csv", "filename": "Hvirescens", "name": "Mortality of tobacco budworms", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset contains measurements of mean arterial pressure (mmHG) and heart rate (b/min) for a baroreflex curve. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/heartrate.csv", "filename": "heartrate", "name": "Heart rate baroreflexes for rabbits", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Juvenile mysid shrimp (Mysidopsis bahia) were exposed to up to 32% effluent in a 7-day survival and growth test. The average weight per treatment replicate of surviving organisms was measured. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/M.bahia.csv", "filename": "M_bahia", "name": "Effect of an effluent on the growth of mysid shrimp", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data consist of average body weight gain of chickens being treated with one of the two methionine sources DLM and HMTBA. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/methionine.csv", "filename": "methionine", "name": "Weight gain for different methionine sources", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Test data from a 21 day fish test following the guidelines OECD GL204, using the test organism Rainbow trout Oncorhynchus mykiss. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/O.mykiss.csv", "filename": "O_mykiss", "name": "Test data from a 21 day fish test", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Fathead minnows (Pimephales promelas) were exposed to sodium pentachlorophenate concentrations ranging from 32 to 512 micro g/L in a 7-day larval survival and growth test. The average dry weight was measured. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/P.promelas.csv", "filename": "P_promelas", "name": "Effect of sodium pentachlorophenate on growth of fathead minnow", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dose-response experiment with vinclozolin in an AR reporter gene assay. The basic dose-response experiment was repeated 6 times on different days. Chinese Hamster Ovary cells were exposed to various concentrations of vinclozolin for 22 hours and the resulting luminescense effects were recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/vinclozolin.csv", "filename": "vinclozolin", "name": "Vinclozolin from AR in vitro assay", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 1182 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Fishing.csv", "filename": "Fishing", "name": "Choice of Fishing Mode ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 13705 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Tuna.csv", "filename": "Tuna", "name": "Choice of Brand for Tuna ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Annual sheep livestock numbers in Asia (in million head). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/livestock.csv", "filename": "livestock", "name": "Livestock (sheep) in Asia, 1961-2007.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A data frame attributed to Meyer (1989). \n“The pedigrees for each of these 282 animals derive from an additional 24 base population (Generation 0) animals that do not have records of their own but, nevertheless, are of interest with respect to the inference on their own additive genetic values. Furthermore, it is presumed that these original 24 base animals are not related to each other. Therefore, the row dimension of u is 306 (282+24).” (Templeman \\& Rosa 2004) ", + "description": "A data frame attributed to Meyer (1989). \n\u201cThe pedigrees for each of these 282 animals derive from an additional 24 base population (Generation 0) animals that do not have records of their own but, nevertheless, are of interest with respect to the inference on their own additive genetic values. Furthermore, it is presumed that these original 24 base animals are not related to each other. Therefore, the row dimension of u is 306 (282+24).\u201d (Templeman \\& Rosa 2004) ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/meyer.csv", "filename": "meyer", "name": "A pedigree data on 282 animals deriving from two generations", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dietox data frame has 861 rows and 7 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/dietox.csv", "filename": "dietox", "name": "Growth curves of pigs in a 3x3 factorial experiment", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is an updated and expanded version of the mammals sleep dataset. Updated sleep times and weights were taken from V. M. Savage and G. B. West. A quantitative, theoretical framework for understanding mammalian sleep. Proceedings of the National Academy of Sciences, 104 (3):1051-1056, 2007. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/msleep.csv", "filename": "msleep", "name": "An updated and expanded version of the mammals sleep dataset", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This vector field was produced from the data described in Brillinger, D.R., Preisler, H.K., Ager, A.A. and Kie, J.G. \"An exploratory data analysis (EDA) of the paths of moving animals\". J. Statistical Planning and Inference 122 (2004), 43-63, using the methods of Brillinger, D.R., \"Learning a potential function from a trajectory\", Signal Processing Letters. December (2007). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/seals.csv", "filename": "seals", "name": "Vector field of seal movements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are from a foster feeding experiment with rat mothers and litters of four different genotypes. The measurement is the litter weight after a trial feeding period. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/foster.csv", "filename": "foster", "name": " Foster Feeding Experiment ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Percentage incidence of the 13 characteristics of water voles in 14 areas. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/watervoles.csv", "filename": "watervoles", "name": " Water Voles Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data arise from an experiment to study the gain in weight of rats fed on four different diets, distinguished by amount of protein (low and high) and by source of protein (beef and cereal). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/weightgain.csv", "filename": "weightgain", "name": " Gain in Weight of Rats ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The baboon data frame has 25 rows and 2 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/KMsurv/baboon.csv", "filename": "baboon", "name": "data from Exercise 5.8, p147", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Contagious bovine pleuropneumonia (CBPP) is a major disease of cattle in Africa, caused by a mycoplasma. This dataset describes the serological incidence of CBPP in zebu cattle during a follow-up survey implemented in 15 commercial herds located in the Boji district of Ethiopia. The goal of the survey was to study the within-herd spread of CBPP in newly infected herds. Blood samples were quarterly collected from all animals of these herds to determine their CBPP status. These data were used to compute the serological incidence of CBPP (new cases occurring during a given time period). Some data are missing (lost to follow-up). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/cbpp.csv", "filename": "cbpp", "name": "Contagious bovine pleuropneumonia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Number of ticks on the heads of red grouse chicks sampled in the field (grouseticks) and an aggregated version (grouseticks_agg); see original source for more details ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/grouseticks.csv", "filename": "grouseticks", "name": " Data on red grouse ticks from Elston et al. 2001 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Average brain and body weights for 28 species of land animals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Animals.csv", "filename": "Animals", "name": " Brain and Body Weights for 28 Species ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Reynolds (1994) describes a small part of a study of the long-term temperature dynamics of beaver Castor canadensis in north-central Wisconsin. Body temperature was measured by telemetry every 10 minutes for four females, but data from a one period of less than a day for each of two animals is used there. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/beav1.csv", "filename": "beav1", "name": " Body Temperature Series of Beaver 1 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The heart and body weights of samples of male and female cats used for digitalis experiments. The cats were all adult, over 2 kg body weight. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/cats.csv", "filename": "cats", "name": " Anatomical Data from Domestic Cats ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The crabs data frame has 200 rows and 8 columns, describing 5 morphological measurements on 50 crabs each of two colour forms and both sexes, of the species Leptograpsus variegatus collected at Fremantle, W. Australia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/crabs.csv", "filename": "crabs", "name": " Morphological Measurements on Leptograpsus Crabs ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Knight and Skagen collected during a field study on the foraging behaviour of wintering Bald Eagles in Washington State, USA data concerning 160 attempts by one (pirating) Bald Eagle to steal a chum salmon from another (feeding) Bald Eagle. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/eagles.csv", "filename": "eagles", "name": " Foraging Ecology of Bald Eagles ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a foster feeding experiment with rat mothers and litters of four different genotypes: A, B, I and J. Rat litters were separated from their natural mothers at birth and given to foster mothers to rear. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/genotype.csv", "filename": "genotype", "name": " Rat Genotype Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with average brain and body weights for 62 species of land mammals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/mammals.csv", "filename": "mammals", "name": " Brain and Body Weights for 62 Species of Land Mammals ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Five rabbits were studied on two occasions, after treatment with saline (control) and after treatment with the 5-HT_3 antagonist MDL 72222. After each treatment ascending doses of phenylbiguanide were injected intravenously at 10 minute intervals and the responses of mean blood pressure measured. The goal was to test whether the cardiogenic chemoreflex elicited by phenylbiguanide depends on the activation of 5-HT_3 receptors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Rabbit.csv", "filename": "Rabbit\n", "name": " Blood Pressure in Rabbits ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the numbers of rotifers falling out of suspension for different fluid densities. There are two species, pm Polyartha major and kc, Keratella cochlearis and for each species the number falling out and the total number are given. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/rotifer.csv", "filename": "rotifer", "name": " Numbers of Rotifers by Fluid Density ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A numeric vector with 18 determinations by different laboratories of the amount (percentage of the declared total weight) of shrimp in shrimp cocktail. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/shrimp.csv", "filename": "shrimp", "name": " Percentage of Shrimp in Shrimp Cocktail ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Groups of 20 snails were held for periods of 1, 2, 3 or 4 weeks in carefully controlled conditions of temperature and relative humidity. There were two species of snail, A and B, and the experiment was designed as a 4 by 3 by 4 by 2 completely randomized design. At the end of the exposure time the snails were tested to see if they had survived; the process itself is fatal for the animals. The object of the exercise was to model the probability of survival in terms of the stimulus variables, and in particular to test for differences between species. \nThe data are unusual in that in most cases fatalities during the experiment were fairly small. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/snails.csv", "filename": "snails", "name": " Snail Mortality Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The waders data frame has 15 rows and 19 columns. The entries are counts of waders in summer. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/waders.csv", "filename": "waders", "name": " Counts of Waders at 15 Sites in South Africa ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Observations on the maximal running speed of mammal species and their body mass. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/quantreg/Mammals.csv", "filename": "Mammals", "name": "Garland(1983) Data on Running Speed of Mammals", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with average brain and body weights for 62 species of land mammals and three others. \nNote that this is simply the union of Animals and mammals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/Animals2.csv", "filename": "Animals2", "name": "Brain and Body Weights for 65 Species of Land Animals", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Possum diversity data: As issued from a study of the diversity of possum (arboreal marsupials) in the Montane ash forest (Australia), this dataset was collected in view of the management of hardwood forest to take conservation and recreation values, as well as wood production, into account. \nThe study is fully described in the two references. The number of different species of arboreal marsupials (possum) was observed on 151 different 3ha sites with uniform vegetation. For each site the nine variable measures (see below) were recorded. The problem is to model the relationship between diversity and these other variables. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/possumDiv.csv", "filename": "possumDiv", "name": "Possum Diversity Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment to see it the number of bee stings depends on previous stings. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BeeStings.csv", "filename": "BeeStings", "name": "Do Bee Stings Depend on Previous Stings?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An experiment on the effects of a hormone on blood calcium levels in robins \nAn experiment looked at the effects of treatment with a hormone for increasing the concentration of calcium in birds. Twenty birds (robins) were used in the study, ten male and ten female, equally divided between the hormone and no hormone treatments. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BirdCalcium.csv", "filename": "BirdCalcium", "name": "Effect of a Hormone on Bird Calcium Levels", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Nest and species characteristics for North American passerines \nAmy R. Moore, as a student at Grinnell College in 1999, wanted to study the relationship between species characteristics and the type of nest a bird builds, using data collected from available sources. For the study, she collected data by species for 84 separate species of North American passerines. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BirdNest.csv", "filename": "BirdNest", "name": " Nest Characteristics for Different Bird Species ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Body measurements for a sample of blue jays ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BlueJays.csv", "filename": "BlueJays", "name": "Blue Jay Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements for a sample of butterflies in Greenland ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ButterfliesBc.csv", "filename": "ButterfliesBc", "name": "Butterfly (Boloria chariclea) Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements on a sample of Manduca Sexta caterpillars ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Caterpillars.csv", "filename": "Caterpillars", "name": "Measurements of Manduca Sexta Caterpillars", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Oxygen intake of crabs with different noise sources \nAnimals that are stressed might increase their oxygen consumption. Biologists measured oxygen consumption of shore crabs that were either exposed to 7.5 minutes of ship noise or 7.5 minutes of ambient harbor noise. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CrabShip.csv", "filename": "CrabShip", "name": "Crab Oxygen Intake", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Lengths of cuckoo eggs laid in other birds' nests \nCuckoos are knows to lay their eggs in the nests of other (host) birds. The eggs are then adopted and hatched by the host birds. The data give the lengths of cuckoo eggs found in nests of various other bird species. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Cuckoo.csv", "filename": "Cuckoo", "name": "Measurements of Cuckoo Eggs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The rate of lactic acid turnover was measured by two methods for normal and diabetic dogs. \nFive dogs had their pancreas removed to make them diabetic (Operation=yes), the other five were normal (Operation=no). The rate of turnover of lactic acid was measured for each dog by two methods, infusion and injection. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/DiabeticDogs.csv", "filename": "DiabeticDogs", "name": "Lactic Acid Turnover in Dogs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with 28 observations on the following 4 variables. \nThe question of interest is whether a volcanic eruption or asteroid strike had created a dust cloud that led to extinction of most dinosaurs. Rock samples taken in Gubbio, Italy were measured for the concentration of iridium (a rare metal which is more common in asteroids). The deeper the sample, the older the rocks are. A sudden increase in iridium at some point in time would lend support for the asteroid hypothesis. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Dinosaurs.csv", "filename": "Dinosaurs", "name": "Iridium Levels in Rock Layers to Investigate Dinosaur Extinction", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Age and height of male African elephants \nData on 138 male African elephants that lived through droughts in the first two years of life. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ElephantsFB.csv", "filename": "ElephantsFB", "name": "Measurements of Male African Elephants", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Age and height of African elephants \nData on 288 African elephants that lived through droughts in the first two years of life. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ElephantsMF.csv", "filename": "ElephantsMF", "name": "Measurements of African Elephants", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiment on effects of diets on weight gain of rats \nData from this experiment compared weight gain for 60 baby rats that were fed different diets. Half of the rats had low-protein diets (Lo) and the rest had high-protein (Hi). The source of protein was either beef, cereal, or pork. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FatRats.csv", "filename": "FatRats", "name": "Diet and Weight of Rats", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Fertility measurement for eggs from a sample of 35 lake trout \nResearchers collected samples of female lake trout from Lake Ontario in September and November of 2002 through 2004. A goal of the study was to investigate the fertility of fish that had been stocked in the lake. One measure of the viability of fish eggs is percent dry mass (PctDM) which reflects the energy potential stored in the eggs by recording the percentage of the total egg material that is solid. Values of the PctDM for a sample of 35 lake trout (14 in September and 21 in November) are given in this dataset along with the age (in years) of the fish. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FishEggs.csv", "filename": "FishEggs", "name": "Fertility of Fish Eggs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Body measurements for a sample of 28 mammal species from a Fitch paper on acoustic allometry \nData on mammal species from a Zoology paper about acoustic allometry by W. Tecumseh Fitch. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Fitch.csv", "filename": "Fitch", "name": "Body Measurements of Mammal Species", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sexual activity and lifetimes of fruit flies \nHanley and Shapiro (1994) report on a study conducted by Partridge and Farquhar (1981) about the sexual behavior of fruit flies. It was already known that increased reproduction leads to shorter life spans for female fruit flies. But the question remained whether an increase in sexual activity would also reduce the life spans of male fruit flies. The researchers designed an experiment to answer this question. They had a total of 125 male fruit flies to use and they randomly assigned each of the 125 to one of the following five groups. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FruitFlies.csv", "filename": "FruitFlies", "name": "Fruit Fly Sexual Activity and Longevity", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results from an experiment on male fruit flies with different levels of sexual activity and competition from other males \n\nResearchers randomly assigned virgin male fruit flies to one of two treatments: live alone or live in an environment where they can sense one other male fly. Flies were randomly allocated to either have mating opportunities with female flies or to not have such opportunities. Those flies that were given mating opportunities were given 3, 4, or 5 opportunities to mate (Mating measures this number). Researchers also measured size, lifespan and activity levels of the fruit flies. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FruitFlies2.csv", "filename": "FruitFlies2", "name": "Fruit Fly Sexual Activity and Male Competition", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Brightness and fecundity of female glow-worms \nData on 26 female glow-worms captured in Finland. Female glow-worms attract males by glowing with part of their abdomen (lantern). Researchers believe the brightness of glow might be related to mating success. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/GlowWorms.csv", "filename": "GlowWorms", "name": "Female Glow-worms", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements for a sample of goldenrod galls .\nBiology students collected measurements on goldenrod galls at the Brown Family Environmental Center at Kenyon College. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Goldenrod.csv", "filename": "Goldenrod", "name": "Goldenrod Galls", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Presence/absence of gunnels (eels) at shoreline quadrats \nThis dataset comes from a study on the habitat preferences of a species of eel, called a gunnel. Biologist Jake Shorty sampled quadrats along a coastline and recorded whether or not the species was found in the quadrat. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Gunnels.csv", "filename": "Gunnels", "name": "Present", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data for a samples of hawks from three different species \nStudents and faculty at Cornell College in Mount Vernon, Iowa, collected data over many years at the hawk blind at Lake MacBride near Iowa City, Iowa. The data set that we are analyzing here is a subset of the original data set, using only those species for which there were more than 10 observations. Data were collected on random samples of three different species of hawks: Red-tailed, Sharp-shinned, and Cooper's hawks. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Hawks.csv", "filename": "Hawks", "name": "Measurements on Three Hawk Species", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tail lengths for two hawk species \nTail lengths measured for a sample of 838 hawks observed in Mount Vernon, Iowa. Note: HawkTail2 has these data in unstacked format and they are a subset of the data in Hawks which has a third species (Cooper's hawk). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/HawkTail.csv", "filename": "HawkTail", "name": "Tail Lengths of Hawks", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Tail lengths for two hawk species \nTail lengths measured for a sample of hawks observed in Mount Vernon, Iowa. Note: HawkTail has similar data in stacked format. The Hawks dataset has more variables and a third species (Cooper's hawk). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/HawkTail2.csv", "filename": "HawkTail2", "name": "Tail Lengths of Hawks (Unstacked)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Price and related characteristics of horses listed for sale on the internet \nUndergraduate students at Cal Poly collected data on prices of 50 horses advertised for sale on the internet. Predictor variables of price include the age and height of the horse (in hands), as well as its sex. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/HorsePrices.csv", "filename": "HorsePrices", "name": "Prices of Horses", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Body size and metabolic rate of Manduca Sexta caterpillars \nMarisa Stearns collected and analyzed body size and metabolic rates for Manduca Sexta caterpillars. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MetabolicRate.csv", "filename": "MetabolicRate", "name": "Metabolic Rate of Caterpillars", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Body size and eggs produced for a species of moths \nResearchers were interested in an association between body size and the number of eggs produced by a species of moths. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MothEggs.csv", "filename": "MothEggs", "name": "Moth Eggs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Effects of altering serotonin levels on social interactions of mice \nSerotonin is a chemical that influences mood balance in humans. But how does it affect mice? Scientists genetically altered mice by \"knocking out\" the expression of a gene, tryptophan hydroxylase 2 (Tph2), that regulates serotonin production. With careful breeding, the scientists produced three types of mice that we label as “Minus” for Tph2-/-, “Plus” for Tph2+/+, “Mixed” for Tph2+/-. The variable Genotype records Minus/Plus/Mixed. The variable Contacts is the number of social contacts that a mouse had with other mice during an experiment and the variable Sex is “M” for males and “F” for females. ", + "description": "Effects of altering serotonin levels on social interactions of mice \nSerotonin is a chemical that influences mood balance in humans. But how does it affect mice? Scientists genetically altered mice by \"knocking out\" the expression of a gene, tryptophan hydroxylase 2 (Tph2), that regulates serotonin production. With careful breeding, the scientists produced three types of mice that we label as \u201cMinus\u201d for Tph2-/-, \u201cPlus\u201d for Tph2+/+, \u201cMixed\u201d for Tph2+/-. The variable Genotype records Minus/Plus/Mixed. The variable Contacts is the number of social contacts that a mouse had with other mice during an experiment and the variable Sex is \u201cM\u201d for males and \u201cF\u201d for females. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MouseBrain.csv", "filename": "MouseBrain", "name": "Effects of Serotonin in Mice", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Comparing methods for measuring the size of oysters \nIn 2001 engineers at an R&D lab Agri-Tech, Inc, in Woodstock, Virginia, designed a 3-D system that they hoped would improve on the existing 2-D system for measuring the size of oysters. The 3-D system used computer scanning to estimate an oyster volume, whereas the old 2-D system estimated a cross-sectional area. Data shows the result of both systems, as well as the actual weight and volume of each oyster used in calibration. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Oysters.csv", "filename": "Oysters", "name": "Size of Oysters", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Size of perch caught in a Finnish lake \nThis dataset comes from a sample of fish (perch) caught at Lake Laengelmavesi in Finland. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Perch.csv", "filename": "Perch", "name": "Perch Sizes", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A scientist in Iowa was interested in additives to standard pig chow that might increase the rate at which the pigs gained weight. Two factors of interest were vitamin B12 and antibiotics. To perform the experiment, the scientist randomly assigned 12 pigs, three to each of the diet combinations (Antibiotic only, B12 only, both, and neither). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PigFeed.csv", "filename": "PigFeed", "name": "Additives in Pig Feed", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ant counts on samples of different kinds of sandwiches \nAs young students, Dominic Kelly and his friends enjoyed watching ants gather on pieces of sandwiches. Later, as a university student, Dominic decided to study this with a more formal experiment. He chose three types of sandwich fillings (vegemite, peanut butter, and ham & pickles), four types of bread (multigrain, rye, white, and wholemeal), and put butter on some of the sandwiches. \nTo conduct the experiment he randomly chose a sandwich, broke off a piece, and left it on the ground near an ant hill. After several minutes he placed a jar over the sandwich bit and counted the number of ants. He repeated the process, allowing time for ants to return to the hill after each trial, until he had two samples for each combination of the three factors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SandwichAnts.csv", "filename": "SandwichAnts", "name": "Ants on Sandwiches", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Metamorphose rates for sea slugs exposed to different water samples \nSea slugs, common on the coast of southern California, live on vaucherian seaweed. The larvae from these sea slugs need to locate this type of seaweed to survive. A study was done to try to determine whether chemicals that leach out of the seaweed attract the larvae. Seawater was collected over a patch of this kind of seaweed at 5-minute intervals as the tide was coming in and, presumably, mixing with the chemicals. The idea was that as more seawater came in, the concentration of the chemicals was reduced. Each sample of water was divided into 6 parts. Fifteen larvae were then introduced to this seawater to see what percentage metamorphosed (an indication that the desired chemical was detected). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SeaSlugs.csv", "filename": "SeaSlugs", "name": "Sea Slug Larvae", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Weight and wing length for a sample of Savannah sparrows \nPriscilla Erickson from Kenyon College collected data on a stratified random sample of 116 Savannah sparrows at Kent Island. Nests that were reduced, controlled (no change), or enlarged. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Sparrows.csv", "filename": "Sparrows", "name": "Sparrow Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Land area and number of mammal species for islands in Southeast Asia \nThis dataset shows the number of mammal species and the area for 13 islands in Southeast Asia. Biologists have speculated that the number of species is related to the size of an island and would like to be able to predict the number of species given the size of an island. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SpeciesArea.csv", "filename": "SpeciesArea", "name": "Land Area and Mammal Species", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Comparing intestine lengths for tadpoles with and without exposure to Bd fugus \nBiologists wondered whether tadpoles can adjust the relative length of their intestines if they are exposed to a fungus called Batrachochytrium dendrobatidis (Bd). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Tadpoles.csv", "filename": "Tadpoles", "name": "Effects of a Fungus on Tadpoles", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rat treatment data from Mantel et al. Three rats were chosen from each of 100 litters, one of which was treated with a drug, and then all followed for tumor incidence. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/rats.csv", "filename": "rats", "name": "Rat treatment data from Mantel et al", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Fisher et al. (1943) giving the number of tokens found for each of 501 species of butterflies collected in Malaya. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Butterfly.csv", "filename": "Butterfly", "name": "Butterfly Species in Malaya", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from von Bortkiewicz (1898), given by Andrews \\& Herzberg (1985), on number of deaths by horse or mule kicks in 14 corps of the Prussian army. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/VonBort.csv", "filename": "VonBort", "name": "Von Bortkiewicz Horse Kicks Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Galápagos Islands about 600 miles off the coast of Ecuador provide an excellent laboratory for studying the factors that influence the development and survival of different life species. They were the site of much of Charles Darwin’s original research leading later to publication of his “Origin of Species”. Descending from a few stranded ancestors and cut off from the rest of the world, the Galápagos animals offer much more obvious proofs of the fact of evolution than can be seen in the more intricate complexities of life in most environments. Darwin wrote \nThe natural history of these islands is eminently curious, and well deserves attention. Most of the organic productions are aboriginal creations, found nowhere else; there is even a difference between the inhabitants of the different islands; yet all show a marked relationship with those of America, though separated from that continent by an open space of ocean, between 500 and 600 miles in width. The archipelago is a little world in itself, or rather a satellite attached to America, whence it has derived a few stray colonists and has received the general character of its indigenous productions. Considering the small size of the islands, we feel the more astonished at the number of their aboriginal beings, and at their confined range. Seeing every height crowned with its crater, and the boundaries of most of the lava-streams still distinct, we are led to believe that within a period geologically recent the unbroken ocean was here spread out. Hence, both in space and time, we seem to be brought somewhere near to that great fact---that mystery of mysteries---the first appearance of new beings on earth.\nAnd from elsewhere in Darwin's diary:\nI never dreamed that islands 50 or 60 miles apart, and most of them in sight of each other, formed of precisely the same rocks, placed under a quite similar climate, rising to a nearly equal height, would have been differently tenanted … It is the circumstance that several of the islands possess their own species of the tortoise, mocking-thrush, finches and numerous plants, these species having the same general habits, occupying analogous situations, and obviously filling the same place in the natural economy of the archipelago, that strikes me with wonder.\nM.P. Johnson and P.H. Raven, “Species number and endemism: The Gal�pagos Archipelago revisited”, Science, 179, 893-895 (1973), have presented data giving the number of plant species and related variables for 29 different islands. Counts are given for both the total number of species and the number of species that occur only in the Gal�pagos (the endemics). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nIsland\n\nName of Island\n\nPlants\n\nNumber of plant species\n\nPlantEnd\n\nNumber of endemic plant species\n\nFinches\n\nNumber of finch species\n \nFinchEnd\n\nNumber of endemic finch species\n \nFinchGenera\n \nNumber of finch genera\n \nArea\n \nArea (km^2)\n \nElevation\n \nMaximum elevation (m)\n \nNearest\n \nDistance from to nearest island (km)\n \nStCruz\n \nDistance to Santa Cruz Island (km)\n\nAdjacent\n \nArea of adjacent island (km^2)\n\n\n\n", + "description": "The Gal\u00e1pagos Islands about 600 miles off the coast of Ecuador provide an excellent laboratory for studying the factors that influence the development and survival of different life species. They were the site of much of Charles Darwin\"s original research leading later to publication of his \u201cOrigin of Species\u201d. Descending from a few stranded ancestors and cut off from the rest of the world, the Gal\u00e1pagos animals offer much more obvious proofs of the fact of evolution than can be seen in the more intricate complexities of life in most environments. Darwin wrote \nThe natural history of these islands is eminently curious, and well deserves attention. Most of the organic productions are aboriginal creations, found nowhere else; there is even a difference between the inhabitants of the different islands; yet all show a marked relationship with those of America, though separated from that continent by an open space of ocean, between 500 and 600 miles in width. The archipelago is a little world in itself, or rather a satellite attached to America, whence it has derived a few stray colonists and has received the general character of its indigenous productions. Considering the small size of the islands, we feel the more astonished at the number of their aboriginal beings, and at their confined range. Seeing every height crowned with its crater, and the boundaries of most of the lava-streams still distinct, we are led to believe that within a period geologically recent the unbroken ocean was here spread out. Hence, both in space and time, we seem to be brought somewhere near to that great fact---that mystery of mysteries---the first appearance of new beings on earth.\nAnd from elsewhere in Darwin's diary:\nI never dreamed that islands 50 or 60 miles apart, and most of them in sight of each other, formed of precisely the same rocks, placed under a quite similar climate, rising to a nearly equal height, would have been differently tenanted \u2026 It is the circumstance that several of the islands possess their own species of the tortoise, mocking-thrush, finches and numerous plants, these species having the same general habits, occupying analogous situations, and obviously filling the same place in the natural economy of the archipelago, that strikes me with wonder.\nM.P. Johnson and P.H. Raven, \u201cSpecies number and endemism: The Gal\ufffdpagos Archipelago revisited\u201d, Science, 179, 893-895 (1973), have presented data giving the number of plant species and related variables for 29 different islands. Counts are given for both the total number of species and the number of species that occur only in the Gal\ufffdpagos (the endemics). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nIsland\n\nName of Island\n\nPlants\n\nNumber of plant species\n\nPlantEnd\n\nNumber of endemic plant species\n\nFinches\n\nNumber of finch species\n \nFinchEnd\n\nNumber of endemic finch species\n \nFinchGenera\n \nNumber of finch genera\n \nArea\n \nArea (km^2)\n \nElevation\n \nMaximum elevation (m)\n \nNearest\n \nDistance from to nearest island (km)\n \nStCruz\n \nDistance to Santa Cruz Island (km)\n\nAdjacent\n \nArea of adjacent island (km^2)\n\n\n\n", "url": "http://www.statsci.org/data/general/galapagos.txt", "filename": "galapagos", - "name": "Galápagos Island Species Data", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, + "name": "Gal\u00e1pagos Island Species Data", "use_first_row_for_vectorname": true } ], "name": "Animals" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Charles Darwin conducted an experiment to examine the superiority of cross-fertilized plants over self-fertilized plants. 15 pairs of plants were used. Each pair consisted of one cross-fertilized plant and one self-fertilized plant which germinated at the same time and grew in the same pot. The plants were measured at a fixed time after planting and the difference in heights between the cross- and self-fertilized plants are recorded in eighths of an inch.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/darwin.csv", "filename": "darwin", "name": "Darwin's Plant Height Differences ", "number_format": 0, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The location of living bramble canes in a 9m square plot was recorded. We take 9m to be the unit of distance so that the plot can be thought of as a unit square. The bramble canes were also classified by their age. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/brambles.csv", "filename": "brambles", "name": "Spatial Location of Bramble Canes ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The cane data frame has 180 rows and 5 columns. The data frame represents a randomized block design with 45 varieties of sugar-cane and 4 blocks.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/cane.csv", "filename": "cane", "name": "Sugar-cane Disease Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The number of balsam-fir seedlings in each quadrant of a grid of 50 five foot square quadrants were counted. The grid consisted of 5 rows of 10 quadrants in each row. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/fir.csv", "filename": "fir", "name": "Counts of Balsam-fir Seedlings ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "8 characteristics for 18 popular flowers.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/flower.csv", "filename": "flower", "name": "Flower Characteristics", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset constitutes a description of 136 plant species according to biological attributes (morphological or reproductive) ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/plantTraits.csv", "filename": "plantTraits", "name": "Plant Species Traits Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data frames have averages by blocks (parcels) for the treatment 111. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/ant111b.csv", "filename": "ant111b", "name": "Averages by block of corn yields, for treatment 111 only", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data frames have yield averages by blocks (parcels). The ant111b data set is a subset of this. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/antigua.csv", "filename": "antigua", "name": "Averages by block of yields for the Antigua Corn data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Each of 20 tasters each assessed three out of the four varieties. The experiment was conducted according to a balanced incomplete block design. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/appletaste.csv", "filename": "appletaste", "name": "Tasting experiment that compared four apple varieties", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The biomass data frame has 135 rows and 8 columns. The rainforest data frame is a subset of this one. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/biomass.csv", "filename": "biomass", "name": "Biomass Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from a study that examined how the electrical resistance of a slab of kiwifruit changed with the apparent juice content. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/fruitohms.csv", "filename": "fruitohms", "name": "Electrical Resistance of Kiwi Fruit", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Leaf length, width and petiole measurements taken at various sites in Australia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/leafshape.csv", "filename": "leafshape", "name": "Full Leaf Shape Data Set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The leafshape17 data frame has 61 rows and 8 columns. These are leaf length, width and petiole measurements taken at several sites in Australia. This is a subset of the leafshape data frame. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/leafshape17.csv", "filename": "leafshape17", "name": "Subset of Leaf Shape Data Set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data consist of measurements of vapour pressure and of the difference between leaf and air temperature. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/leaftemp.csv", "filename": "leaftemp", "name": "Leaf and Air Temperature Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The leaftemp.all data frame has 62 rows and 9 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/leaftemp.all.csv", "filename": "leaftempAll", "name": "Full Leaf and Air Temperature Data Set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data which compare the heights of crossed plants with self-fertilized plants. Plants were paired within the pots in which they were grown, with one on one side and one on the other. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/mignonette.csv", "filename": "mignonette", "name": "Darwin's Wild Mignonette Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The rainforest data frame has 65 rows and 7 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/rainforest.csv", "filename": "rainforest", "name": "Rainforest Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data were taken from species lists for South Australia, Victoria and Tasmania. Species were classified as CC, CR, RC and RR, with C denoting common and R denoting rare. The first code relates to South Australia and Victoria, and the second to Tasmania. They were further classified by habitat according to the Victorian register, where D = dry only, W = wet only, and WD = wet or dry. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/rareplants.csv", "filename": "rareplants", "name": "Rare and Endangered Plant Species", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The rice data frame has 72 rows and 7 columns. The data are from an experiment that compared wild type (wt) and genetically modified rice plants (ANU843), each with three different chemical treatments (F10, NH4Cl, and NH4NO3). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/rice.csv", "filename": "rice", "name": "Genetically Modified and Wild Type Rice Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The roller data frame has 10 rows and 2 columns. Different weights of roller were rolled over different parts of a lawn, and the depression was recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/roller.csv", "filename": "roller", "name": "Lawn Roller Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The seedrates data frame has 5 rows and 2 columns on the effect of seeding rate of barley on yield. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/seedrates.csv", "filename": "seedrates", "name": "Barley Seeding Rate Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Concentration-time measurements on different varieties of apples under methyl bromide injection. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/sorption.csv", "filename": "sorption", "name": "sorption data set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data frames have yield averages by blocks (parcels). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/stVincent.csv", "filename": "stVincent", "name": "Averages by block of yields for the St. Vincent Corn data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The sugar data frame has 12 rows and 2 columns. They are from an experiment that compared an unmodified wild type plant with three different genetically modified forms. The measurements are weights of sugar that were obtained by breaking down the cellulose. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/sugar.csv", "filename": "sugar", "name": "Description", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The tomato data frame has 24 rows and 2 columns. They are from an experiment that exposed tomato plants to four different 'nutrients'. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/tomato.csv", "filename": "tomato", "name": "Root weights of tomato plants exposed to 4 different treatments", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data frames have averages by blocks (parcels) for the treatment 111. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/vince111b.csv", "filename": "vince111b", "name": "Averages by block of corn yields, for treatment 111 only", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This famous (Fisher's or Anderson's) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv", "filename": "iris", "name": "Edgar Anderson's Iris Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This famous (Fisher's or Anderson's) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris3.csv", "filename": "iris3", "name": "Edgar Anderson's Iris Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Loblolly data frame has 84 rows and 3 columns of records of the growth of Loblolly pine trees. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Loblolly.csv", "filename": "Loblolly", "name": "Growth of Loblolly pine trees", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Orange data frame has 35 rows and 3 columns of records of the growth of orange trees. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Orange.csv", "filename": "Orange", "name": "Growth of Orange Trees", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results from an experiment to compare yields (as measured by dried weight of plants) obtained under a control and two different treatment conditions. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/PlantGrowth.csv", "filename": "PlantGrowth", "name": "Results from an Experiment on Plant Growth", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Operational data of a plant for the oxidation of ammonia to nitric acid. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/stackloss.csv", "filename": "stackloss", "name": "Brownlee's Stack Loss Plant Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Contains normalized tree-ring widths in dimensionless units. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/treering.csv", "filename": "treering", - "name": " Yearly Treering Data, -6000–1979 ", - "number_format": 31, - "remove_quotes": true, + "name": " Yearly Treering Data, -6000-1979 ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set provides measurements of the girth, height and volume of timber in 31 felled black cherry trees. Note that girth is the diameter of the tree (in inches) measured at 4 ft 6 in above the ground. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/trees.csv", "filename": "trees", "name": "Girth, Height and Volume for Black Cherry Trees", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dataset from an experiment exploring the effect of increasing concentrations of a herbicide on the volume of the treated algae. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/algae.csv", "filename": "algae", "name": "Volume of algae as function of increasing concentrations of a herbicide", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "MCPA, 2,4-D, mecorprop and dichorlprop were applied either as technical grades materials (h = 1, 2, 3, 4) or as commercial formulations (herb = 5, 6, 7, 8). Each experimental unit consisted of five 1-week old seedlings grown together in a pot of nutrient solution during 14 days. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/auxins.csv", "filename": "auxins", "name": "Effect of technical grade and commercially formulated auxin herbicides", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Germination data from tests of chickweed seeds from chlorsulfuron resistant and sensitive biotypes ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/chickweed.csv", "filename": "chickweed", "name": " Germination of common chickweed (Stellaria media) ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Germination data from tests of chickweed seeds from chlorsulfuron resistant and sensitive biotypes ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/chickweed0.csv", "filename": "chickweed0", "name": " Germination of common chickweed (Stellaria media) ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quantal assay data from an experiment where the insectide deguelin was applied to Macrosiphoniella sanborni. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/deguelin.csv", "filename": "deguelin", "name": "Deguelin applied to chrysanthemum aphis", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Small plants of Galium aparine, growing in pots in a green house, were sprayed with the technical grade phenmidipham herbicide either alone or in mixture with an ester of oleic acid. The plants were allowed to grow in the green house for 14 days after herbicide treatment. Then the dry matter was measured per pot. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/G.aparine.csv", "filename": "GAparine", "name": "Herbicide applied to Galium aparine", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Germination data were obtained from experiments involving the three species mungbean, rice, and wheat, which were opposed to different temperatures between 10 and 40 degrees Celsius. Experiments lasted at most 18 days. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/germination.csv", "filename": "germination", "name": " Germination of three crops ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In an experiment barley was grown in a hydroponic solution with a herbicide. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/leaflength.csv", "filename": "leaflength", "name": "Leaf length of barley", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from an experiment where isobutylalcohol was dissolved in a nutrient solution in which lettuce (Lactuca sativa) plants were grown. The plant biomass of the shoot was determined af 21 days. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/lettuce.csv", "filename": "lettuce", "name": "Hormesis in lettuce plants", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data consist of 5 mixture, 6 dilutions, three replicates, and 12 common controls; in total 102 onservations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/mecter.csv", "filename": "mecter", "name": "Mechlorprop and terbythylazine tested on Lemna minor", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "To assess the competitive ability between two biotypes of Lolium rigidum, one resistant to glyphosate and the other a sensitive wild type, the density of resistant and sensitive biotypes was counted after germination. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/RScompetition.csv", "filename": "RScompetition", "name": "Competition between two biotypes", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A single dose-response curve. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/ryegrass.csv", "filename": "ryegrass", "name": "Effect of ferulic acid on growth of ryegrass", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from an experiment, comparing the potency of the two herbicides glyphosate and bentazone in white mustard Sinapis alba. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/S.alba.csv", "filename": "S_alba", "name": "Potency of two herbicides", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Green alga (Selenastrum capricornutum) was exposed to cadmium chloride concentrations ranging from 5 to 80 micro g/L in geometric progression in 4-day population growth test. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/S.capricornutum.csv", "filename": "S_capricornutum", "name": "Effect of cadmium on growth of green alga", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data stem from an experiment assessing the inhibitory effect of secalonic acids on plant growth. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/secalonic.csv", "filename": "secalonic", "name": "Root length measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment investigating the inhibition of photosynthesis in response to two synthetic photosystem II inhibitors, the herbicides diuron and bentazon. More specifically, the effect of oxygen consumption of thylakoid membranes (chloroplasts) from spinach was measured after incubation with the synthetic inhibitors in 5 assays, resulting in 5 dose-response curves. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/spinach.csv", "filename": "spinach", "name": "Inhibition of photosynthesis", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Test on the effect of terbuthylazin on Lemna minor, performed on an aseptic culture according to the OECD-guidelines. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/terbuthylazin.csv", "filename": "terbuthylazin", "name": "The effect of terbuthylazin on growth rate", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total annual rice production (million metric tons) for Guinea. 1970-2011. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/guinearice.csv", "filename": "guinearice", "name": "Rice production (Guinea)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Impact of ozone on the growth of sitka spruce trees. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/sitka89.csv", "filename": "sitka89", "name": "Growth of Sitka Spruce Trees", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The spruce data frame has 1027 rows and 6 columns. The data consists of measurements on 79 sitka spruce trees over two growing seasons. The trees were grown in four controlled environment chambers, of which the first two, containing 27 trees each, were treated with introduced ozone at 70 ppb whilst the remaining two, containing 12 and 13 trees, were controls. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/spruce.csv", "filename": "spruce", "name": "Log-size of 79 Sitka spruce trees", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Darwin (1876) studied the growth of pairs of zea may (aka corn) seedlings, one produced by cross-fertilization and the other produced by self-fertilization, but otherwise grown under identical conditions. His goal was to demonstrate the greater vigour of the cross-fertilized plants. The data recorded are the final height (inches, to the nearest 1/8th) of the plants in each pair. \nIn the Design of Experiments, Fisher (1935) used these data to illustrate a paired t-test (well, a one-sample test on the mean difference, cross - self). Later in the book (section 21), he used this data to illustrate an early example of a non-parametric permutation test, treating each paired difference as having (randomly) either a positive or negative sign. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/ZeaMays.csv", "filename": "ZeaMays", "name": " Darwin's Heights of Cross- and Self-fertilized Zea May Pairs ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The mendel3 data frame has 27 rows and 4 columns. Data are from Mendel (1886), and are reproduced in Fisher (1936) and Weir (1996). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/hwde/mendelABC.csv", "filename": "mendelABC", "name": "Mendel's F2 trifactorial data for seed shape (A: round or wrinkled), cotyledon color (B: albumen yellow or green), and seed coat color (C: grey-brown or white)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total yield in bushels per acre for 10 varieties at 6 sites in each of two years. \nThese data are yields in bushels per acre, of 10 varieties of barley grown in 1/40 acre plots at University Farm, St. Paul, and at the five branch experiment stations located at Waseca, Morris, Crookston, Grand Rapids, and Duluth (all in Minnesota). The varieties were grown in three randomized blocks at each of the six stations during 1931 and 1932, different land being used each year of the test. \nImmer et al. (1934) present the data for each Year*Site*Variety*Block. The data here is the average yield across the three blocks. \nImmer et al. (1934) refer (once) to the experiment as being conducted in 1930 and 1931, then later refer to it (repeatedly) as being conducted in 1931 and 1932. Later authors have continued the confusion. \nCleveland (1993) suggests that the data for the Morris site may have had the years switched. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lattice/barley.csv", "filename": "H_barley", "name": " Yield data from a Minnesota barley trial ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on genetic variation in responses to fertilization and simulated herbivory in Arabidopsis ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/Arabidopsis.csv", "filename": "Arabidopsis", "name": " Arabidopsis clipping/fertilization data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The cabbages data set has 60 observations and 4 variables ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/cabbages.csv", "filename": "cabbages", "name": " Data from a cabbage field trial ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A numeric vector of 15 measurements by different laboratories of the pesticide DDT in kale, in ppm (parts per million) using the multiple pesticide residue measurement. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/DDT.csv", "filename": "DDT", "name": " DDT in Kale ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The farms data frame has 20 rows and 4 columns. The rows are farms on the Dutch island of Terschelling and the columns are factors describing the management of grassland. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/farms.csv", "filename": "farms", "name": " Ecological Factors in Farm Management ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The immer data frame has 30 rows and 4 columns. Five varieties of barley were grown in six locations in each of 1931 and 1932. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/immer.csv", "filename": "immer", "name": " Yields from a Barley Field Trial ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The yield of oats from a split-plot field trial using three varieties and four levels of manurial treatment. The experiment was laid out in 6 blocks of 3 main plots, each split into 4 sub-plots. The varieties were applied to the main plots and the manurial treatments to the sub-plots. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/oats.csv", "filename": "oats", "name": " Data from an Oats Field Trial ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Sitka data frame has 395 rows and 4 columns. It gives repeated measurements on the log-size of 79 Sitka spruce trees, 54 of which were grown in ozone-enriched chambers and 25 were controls. The size was measured five times in 1988, at roughly monthly intervals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Sitka.csv", "filename": "Sitka", "name": " Growth Curves for Sitka Spruce Trees in 1988 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Sitka89 data frame has 632 rows and 4 columns. It gives repeated measurements on the log-size of 79 Sitka spruce trees, 54 of which were grown in ozone-enriched chambers and 25 were controls. The size was measured eight times in 1989, at roughly monthly intervals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Sitka89.csv", "filename": "Sitka89", "name": " Growth Curves for Sitka Spruce Trees in 1989 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment to test whether exposure to mites protects against Wilt Disease in cotton plants. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Mites.csv", "filename": "Mites", "name": "Mites and Wilt Disease", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 171 observations \nnumber of observations : 1026 \nobservation : farms \ncountry : Indonesia ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/RiceFarms.csv", "filename": "RiceFarms", "name": "Production of Rice in Indonesia ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Two of the earliest examples of the correlation coefficient were Francis Galton's data sets on the relationship between mid parent and child height and the similarity of parent generation peas with child peas. This is the data set for the Galton height. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/galton.csv", "filename": "galton", "name": "Galton's Mid parent child height data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Francis Galton introduced the correlation coefficient with an analysis of the similarities of the parent and child generation of 700 sweet peas. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/peas.csv", "filename": "peas", "name": "Galton's Peas", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set was used by Campbell (1984) to locate bushfire scars. The dataset contains satelite measurements on five frequency bands, corresponding to each of 38 pixels. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/bushfire.csv", "filename": "bushfire", "name": " Campbell Bushfire Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The damage carrots data set from Phelps (1982) was used by McCullagh and Nelder (1989) in order to illustrate diagnostic techniques because of the presence of an outlier. In a soil experiment trial with three blocks, eight levels of insecticide were applied and the carrots were tested for insect damage. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/carrots.csv", "filename": "carrots", "name": "Insect Damages on Carrots", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements of aspects pulp fibers and the paper produced from them. Four properties of each are measured in sixty-two samples. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/pulpfiber.csv", "filename": "pulpfiber", "name": "Pulp Fiber and Paper Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Growth of alfalfa sprouts in acidic conditions ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Alfalfa.csv", "filename": "Alfalfa", "name": " Alfalfa Growth ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily prices and trading volume of Apple stock from July 21st to August 21st in 2016 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/AppleStock.csv", "filename": "AppleStock", "name": "Daily Price and Volume of Apple Stock", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with 252 observations on the following 5 variables. \nData on samples of leaves from the species Dodonaea viscosa subsp. angustissima (common name hopbush), which have been collected in a certain region of South Australia for many years. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LeafWidth.csv", "filename": "LeafWidth", "name": "Leaf Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements of the pesticide fenthion in olive oil over time \nFenthion is a pesticide used against the olive fruit fly in olive groves. It is toxic to humans so it is important that there be no residue left on the fruit or in olive oil that will be consumed. One theory was that if there is residue of the pesticide left in the olive oil, it would dissipate over time. Chemists set out to test that theory by taking a random sample of small amounts of olive oil with fenthion residue and measuring the amount of fenthion in the oil at three different times over the year - day 0, day 281 and day 365. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Olives.csv", "filename": "Olives", "name": "Fenthion in Olive Oil", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains data from an experiment conducted by the Department of Biology at Kenyon College at a site near the campus in Gambier, Ohio. In April 1990, student and faculty volunteers planted 1000 white pine (Pinus strobes) seedlings at the Brown Family Environmental Center. These seedlings were planted in two grids, distinguished by 10- and 15-foot spacings between the seedlings. Several variables were measured and recorded for each seedling over time (in 1990, 1996, and 1997). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PKU.csv", "filename": "Pines", "name": "Measurements of Pine Tree Seedlings", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Plants" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data values are monthly averages of the daily stages (heights) of the Rio Negro at Manaus. Manaus is 18km upstream from the confluence of the Rio Negro with the Amazon but because of the tiny slope of the water surface and the lower courses of its flatland affluents, they may be regarded as a good approximation of the water level in the Amazon at the confluence. The data here cover 90 years from January 1903 until December 1992. \nThe Manaus gauge is tied in with an arbitrary bench mark of 100m set in the steps of the Municipal Prefecture; gauge readings are usually referred to sea level, on the basis of a mark on the steps leading to the Parish Church (Matriz), which is assumed to lie at an altitude of 35.874 m according to observations made many years ago under the direction of Samuel Pereira, an engineer in charge of the Manaus Sanitation Committee Whereas such an altitude cannot, by any means, be considered to be a precise datum point, observations have been provisionally referred to it. The measurements are in metres. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/manaus.csv", "filename": "manaus", "name": "Average Heights of the Rio Negro river at Manaus", "number_format": 0, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Nitrofen is a herbicide that was used extensively for the control of broad-leaved and grass weeds in cereals and rice. Although it is relatively non-toxic to adult mammals, nitrofen is a significant tetragen and mutagen. It is also acutely toxic and reproductively toxic to cladoceran zooplankton. Nitrofen is no longer in commercial use in the U.S., having been the first pesticide to be withdrawn due to tetragenic effects. \nThe data here come from an experiment to measure the reproductive toxicity of nitrofen on a species of zooplankton (Ceriodaphnia dubia). 50 animals were randomized into batches of 10 and each batch was put in a solution with a measured concentration of nitrofen. Then the number of live offspring in each of the three broods to each animal was recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/nitrofen.csv", "filename": "nitrofen", "name": "Toxicity of Nitrofen in Aquatic Systems ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Biweekly averages of the water salinity and river discharge in Pamlico Sound, North Carolina were recorded between the years 1972 and 1977. The data in this set consists only of those measurements in March, April and May. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/salinity.csv", "filename": "salinity", "name": "Water Salinity and River Discharge ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Heights, stored as a multivariate time series, are for the lakes Erie, Michigan/Huron, Ontario and St Clair ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/greatLakes.csv", "filename": "greatLakes", "name": " Yearly averages of Great Lake heights: 1918 - 2009 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Manitoba.lakes data frame has 9 rows and 2 columns. The areas and elevations of the nine largest lakes in Manitoba, Canada. The geography of Manitoba (a relatively flat province) can be divided crudely into three main areas: a very flat prairie in the south which is at a relatively high elevation, a middle region consisting of mainly of forest and Precambrian rock, and a northern region which drains more rapidly into Hudson Bay. All water in Manitoba, which does not evaporate, eventually drains into Hudson Bay. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/Manitoba.lakes.csv", "filename": "ManitobaLakes", "name": "The Nine Largest Lakes in Manitoba", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Annual measurements of the level, in feet, of Lake Huron 1875–1972. ", + "description": "Annual measurements of the level, in feet, of Lake Huron 1875-1972. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/LakeHuron.csv", "filename": "LakeHuron", - "name": "Level of Lake Huron 1875–1972", - "number_format": 31, - "remove_quotes": true, + "name": "Level of Lake Huron 1875-1972", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements of the annual flow of the river Nile at Aswan (formerly Assuan), 1871–1970, in 10^8 m^3, “with apparent changepoint near 1898” (Cobb(1978), Table 1, p.249). ", + "description": "Measurements of the annual flow of the river Nile at Aswan (formerly Assuan), 1871-1970, in 10^8 m^3, \u201cwith apparent changepoint near 1898\u201d (Cobb(1978), Table 1, p.249). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Nile.csv", "filename": "Nile", "name": "Flow of the River Nile", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data set gives the lengths (in miles) of 141 “major” rivers in North America, as compiled by the US Geological Survey. ", + "description": "This data set gives the lengths (in miles) of 141 \u201cmajor\u201d rivers in North America, as compiled by the US Geological Survey. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/rivers.csv", "filename": "rivers", "name": "Lengths of Major North American Rivers", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1980 \nnumber of observations : 659 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Somerville.csv", "filename": "Somerville", "name": "Visits to Lake Somerville ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data represent annual maximal levels of the River Nidd in Yorkshire. These data are suitable for analysis with gev. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/evir/nidd.annual.csv", "filename": "nidd_annual", "name": "The River Nidd Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data represent high river levels of the River Nidd in Yorkshire above a threshold value of 65. These data are suitable for analysis with gpd. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/evir/nidd.thresh.csv", "filename": "nidd_thresh", "name": "The River Nidd Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The original data set is the waterflow in January of the Kootenay river, measured at two locations, namely, Libby (Montana) and Newgate (British Columbia) for 13 consecutive years, 1931–1943. \nThe data set is of mostly interest because it has been used as example in innumerous didactical situations about robust regression. To this end, one number (in observation 4) has been modified from the original data from originally 44.9 to 15.7 (here). ", + "description": "The original data set is the waterflow in January of the Kootenay river, measured at two locations, namely, Libby (Montana) and Newgate (British Columbia) for 13 consecutive years, 1931-1943. \nThe data set is of mostly interest because it has been used as example in innumerous didactical situations about robust regression. To this end, one number (in observation 4) has been modified from the original data from originally 44.9 to 15.7 (here). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/kootenay.csv", "filename": "kootenay", "name": "Waterflow Measurements of Kootenay River in Libby and Newgate", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a data set consisting of measurements of water salinity (i.e., its salt concentration) and river discharge taken in North Carolina's Pamlico Sound, recording some bi-weekly averages in March, April, and May from 1972 to 1977. This dataset was listed by Ruppert and Carroll (1980). In Carrol and Ruppert (1985) the physical background of the data is described. They indicated that observations 5 and 16 correspond to periods of very heavy discharge and showed that the discrepant observation 5 was masked by observations 3 and 16, i.e., only after deletion of these observations it was possible to identify the influential observation 5. \nThis data set is a prime example of the masking effect. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/salinity.csv", "filename": "salinity_", "name": "Salinity Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hourly water temperatures from Gulf of Mexico near Key West, Florida \nHourly readings of water temperatures from a measuring device in the Gulf of Mexico near Key West, Florida. The hourly temperatures are provided from October 3, 2016 to October 3, 2017 and were obtained from station 8724580. A few missing values have been interpolated to provide a complete series. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/KeyWestWater.csv", "filename": "KeyWestWater", "name": "Key West Water Temperatures", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Factors relating to Mississippi River levee failure \nThe goal of this investigation was to test the relative importance of geologic, geomorphic, and other physical factors that have led to levee failures through the past century along much of the Mississippi River. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LeveeFailures.csv", "filename": "LeveeFailures", "name": "Levee Failures along the Mississippi River", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Amounts of iron in water samples of four rivers \nSome geologists were interested in the water chemistry of rivers in upstate New York. They took water samples at three different locations in four rivers (Grasse, Oswegatchie, Raquette, and St. Regis). The sampling sites were chosen to investigate how the composition of the water changes as it flows from the source to the mouth of each river. The sampling sites were labeled as upstream, midstream, and downstream. This dataset contains the concentrations of iron in the samples. The dataset RiverElements has similar concentration data for many other elements. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/RiverIron.csv", "filename": "RiverIron", "name": "Iron in River Water Samples", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Area of sea ice in the Arctic measured yearly in September (1979 to 2015) \nClimatologists have been measuring the amount of sea ice in both the Arctic and Antarctic regions for a number of years. This datafile gives information about the amount of sea ice in the arctic region as measured in September (the time when the amount of ice is at its least) since 1979. The basic research question is to see if we can use time to model the amount of sea ice. \nIn fact, there are two ways to measure the amount of sea ice: Area and Extent. Area measures the actual amount of space taken up by ice. Extent measures the area inside the outer boundaries created by the ice. If there are areas inside the outer boundaries that are not ice (think about a slice of swiss cheese), then the Extent will be a larger number than the Area. In fact, this is almost always true. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SeaIce.csv", "filename": "SeaIce", "name": "Arctic Sea Ice (1979-2015)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rainfall, wave-surge, Port Pirie and River Nidd data sets. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/texmex/nidd.csv", "filename": "nidd", "name": "Rain, wavesurge, portpirie and nidd datasets.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rain, wavesurge, portpirie and nidd datasets.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/texmex/portpirie.csv", "filename": "portpirie", "name": "Rain, wavesurge, portpirie and nidd datasets.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rainfall, wave-surge, Port Pirie and River Nidd data sets. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/texmex/rain.csv", "filename": "rain", "name": "Rain, wavesurge, portpirie and nidd datasets.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rainfall, wave-surge, Port Pirie and River Nidd data sets. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/texmex/wavesurge.csv", "filename": "wavesurge", "name": "Rain, wavesurge, portpirie and nidd datasets.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Rivers" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are the pole positions from a paleomagnetic study of New Caledonian laterites. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/polar.csv", "filename": "polar", "name": "Pole Positions of New Caledonian Laterites ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Southern Oscillation Index (SOI) is the difference in barometric pressure at sea level between Tahiti and Darwin. Annual SOI and Australian rainfall data, for the years 1900-2001, are given. Australia's annual mean rainfall is an area-weighted average of the total annual precipitation at approximately 370 rainfall stations around the country. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/bomsoi.csv", "filename": "bomsoi", "name": "Southern Oscillation Index Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The geophones data frame has 56 rows and 2 columns. Thickness of a layer of Alberta substratum as measured by a line of geophones. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/geophones.csv", "filename": "geophones", "name": "Seismic Timing Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "K-Ar Ages (millions of years) and distances (km) from Kilauea along the trend of the chain of Hawaian volcanic islands and other seamounts that are believed to have been created by a moving \"hot spot\". The age of Kilauea is given as 0-0.4 Ma. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/hotspots.csv", "filename": "hotspots", "name": "Hawaian island chain hotspot Potassium-Argon ages", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ar-Ar Ages (millions of years) and distances (km) from Kilauea along the trend of the chain of Hawaian volcanic islands and other seamounts that are believed to have been created by a moving \"hot spot\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/hotspots2006.csv", "filename": "hotspots2006", "name": "Hawaian island chain hotspot Argon-Argon ages", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data characterise rock art at 103 sites in the Pacific. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/rockArt.csv", "filename": "rockArt", "name": "Pacific Rock Art features", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data gives peak accelerations measured at various observation stations for 23 earthquakes in California. The data have been used by various workers to estimate the attenuating affect of distance on ground acceleration. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/attenu.csv", "filename": "attenu", - "name": "The Joyner–Boore Attenuation Data", - "number_format": 31, - "remove_quotes": true, + "name": "The Joyner-Boore Attenuation Data", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data set give the locations of 1000 seismic events of MB > 4.0. The events occurred in a cube near Fiji since 1964. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/quakes.csv", "filename": "quakes", "name": "Locations of Earthquakes off Fiji", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements on 48 rock samples from a petroleum reservoir.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/rock.csv", "filename": "rock", "name": "Measurements on Petroleum Rock Samples", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Maunga Whau (Mt Eden) is one of about 50 volcanos in the Auckland volcanic field. This data set gives topographic information for Maunga Whau on a 10m by 10m grid. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/volcano.csv", "filename": "volcano", "name": "Topographic Information on Auckland's Maunga Whau Volcano", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A 2d density estimate of the waiting and eruptions variables data faithful. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/faithfuld.csv", "filename": "faithfuld", "name": "2d density estimate of Old Faithful data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Henry Cavendish carried out a series of experiments in 1798 to determine the mean density of the earth, as an indirect means to calculate the gravitational constant, G, in Newton's formula for the force (f) of gravitational attraction, f = G m M / r^2 between two bodies of mass m and M. \nStigler (1977) used these data to illustrate properties of robust estimators with real, historical data. For these data sets, he found that trimmed means performed as well or better than more elaborate robust estimators", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Cavendish.csv", "filename": "Cavendish", "name": " Cavendish's Determinations of the Density of the Earth ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A numeric vector of 31 determinations of nickel content (ppm) in a Canadian syenite rock. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/abbey.csv", "filename": "abbey", "name": " Determinations of Nickel Content ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A version of the eruptions data from the ‘Old Faithful’ geyser in Yellowstone National Park, Wyoming. This version comes from Azzalini and Bowman (1990) and is of continuous measurement from August 1 to August 15, 1985. \nSome nocturnal duration measurements were coded as 2, 3 or 4 minutes, having originally been described as ‘short’, ‘medium’ or ‘long’. ", + "description": "A version of the eruptions data from the \"Old Faithful\" geyser in Yellowstone National Park, Wyoming. This version comes from Azzalini and Bowman (1990) and is of continuous measurement from August 1 to August 15, 1985. \nSome nocturnal duration measurements were coded as 2, 3 or 4 minutes, having originally been described as \"short\", \"medium\" or \"long\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/geyser.csv", "filename": "geyser", "name": "Old Faithful Geyser Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset was collected on a line transect survey in gilgai territory in New South Wales, Australia. Gilgais are natural gentle depressions in otherwise flat land, and sometimes seem to be regularly distributed. The data collection was stimulated by the question: are these patterns reflected in soil properties? At each of 365 sampling locations on a linear grid of 4 meters spacing, samples were taken at depths 0-10 cm, 30-40 cm and 80-90 cm below the surface. pH, electrical conductivity and chloride content were measured on a 1:5 soil:water extract from each sample. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/gilgais.csv", "filename": "gilgais", "name": " Line Transect of Soil in Gilgai Territory ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Skye data frame has 23 rows and 3 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Skye.csv", "filename": "Skye", "name": " AFM Compositions of Aphyric Skye Lavas ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cobar Ore data from Green and Silverman (1994). The data consists of measurements on the \"true width\" of an ore-bearing rock layer from a mine in Cobar, Australia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/quantreg/CobarOre.csv", "filename": "CobarOre", "name": " Cobar Ore data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dataset with pH-value and Calcium content in soil samples, collected in different communities of the Condroz region in Belgium. The data pertain to a subset of 428 samples with a pH-value between 7.0 and 7.5. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/condroz.csv", "filename": "condroz", "name": " Condroz Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset investigates the effect from inorganic and organic Phosphorus in the soil upon the phosphorus content of the corn grown in this soil, from Prescott (1975). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/phosphor.csv", "filename": "phosphor", "name": "Phosphorus Content Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Geology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data collected at Winnipeg International Airport (Canada) on periods (in days) between rain events. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/droughts.csv", "filename": "droughts", "name": "Periods Between Rain Events", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Carbon dioxide record from the EPICA (European Project for Ice Coring in Antarctica) Dome C ice core covering 0 to 800 kyr BP. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/edcCO2.csv", "filename": "edcCO2", "name": "EPICA Dome C Ice Core 800KYr Carbon Dioxide Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Temperature record, using Deuterium as a proxy, from the EPICA (European Project for Ice Coring in Antarctica) Dome C ice core covering 0 to 800 kyr BP. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/edcT.csv", "filename": "edcT", "name": "EPICA Dome C Ice Core 800KYr Temperature Estimates", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Details are given of atmospheric pressure at landfall, estimated damage in millions of dollars, and deaths, for named hurricanes that made landfall in the US mainland from 1950 through to 2012. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/hurricNamed.csv", "filename": "hurricNamed", "name": " Named US Atlantic Hurricanes ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly provisional mean total ozone (in Dobson units) at Halley Bay (approximately corrected to Bass-Paur). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/ozone.csv", "filename": "ozone", "name": "Ozone Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily air quality measurements in New York, May to September 1973. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/airquality.csv", "filename": "airquality", "name": "New York Air Quality Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Atmospheric concentrations of CO2 are expressed in parts per million (ppm) and reported in the preliminary 1997 SIO manometric mole fraction scale. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/CO2.csv", "filename": "co2", "name": "Mauna Loa Atmospheric CO2 Concentration", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Waiting time between eruptions and the duration of the eruption for the Old Faithful geyser in Yellowstone National Park, Wyoming, USA. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/faithful.csv", "filename": "faithful", "name": "Old Faithful Geyser Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly numbers of sunspots, as from the World Data Center, aka SIDC. This is the version of the data that will occasionally be updated when new counts become available. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/sunspot.month.csv", "filename": "sunspotmonth", "name": "Monthly Sunspot Data, from 1749 to \"Present\"", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Yearly numbers of sunspots from 1700 to 1988 (rounded to one digit). \nNote that monthly numbers are available as sunspot.month, though starting slightly later. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/sunspot.year.csv", "filename": "sunspotyear", - "name": "Yearly Sunspot Data, 1700–1988", - "number_format": 31, - "remove_quotes": true, + "name": "Yearly Sunspot Data, 1700-1988", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly mean relative sunspot numbers from 1749 to 1983. Collected at Swiss Federal Observatory, Zurich until 1960, then Tokyo Astronomical Observatory. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/sunspots.csv", "filename": "sunspots", - "name": "Monthly Sunspot Numbers, 1749–1983", - "number_format": 31, - "remove_quotes": true, + "name": "Monthly Sunspot Numbers, 1749-1983", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1972 \nnumber of observations : 30 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Airq.csv", "filename": "Airq", "name": "Air Quality for Californian Metropolitan Areas ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 1827 \nobservation : individuals \ncountry : Australia ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Kakadu.csv", "filename": "Kakadu", "name": "Willingness to Pay for the Preservation of the Kakadu National Park ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1987 \nnumber of observations : 312 \nobservation : individuals \ncountry : Portugal ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/NaturalPark.csv", "filename": "NaturalPark", "name": "Willingness to Pay for the Preservation of the Alentejo Natural Park ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Maximum annual temperatures (degrees Celsius) for Moorabbin Airport, Melbourne. 1971-2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/maxtemp.csv", "filename": "maxtemp", "name": "Maximum annual temperatures at Moorabbin Airport, Melbourne", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Anomalies, for the years 1880 to 2010, from the 1951 - 1980 average. These are the GISS (Goddard Institute for Space Studies) Land-Ocean Temperature Index (LOTI) data ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/loti.csv", "filename": "loti", "name": " Global temperature anomalies ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Counts of the number of yeast cells were made each of 400 regions in a 20 x 20 grid on a microscope slide, comprising a 1 sq. mm. area. This experiment was repeated four times, giving samples A, B, C and D. \nStudent (1906) used these data to investigate the errors in random sampling. He says \"there are two sources of error: (a) the drop taken may not be representative of the bulk of the liquid; (b) the distribution of the cells over the area which is examined is never exactly uniform, so that there is an 'error of random sampling.'\" \nThe data in the paper are provided in the form of discrete frequency distributions for the four samples. Each shows the frequency distribution squares containing a count of 0, 1, 2, ... yeast cells. These are combined here in Yeast. In addition, he gives a table (Table I) showing the actual number of yeast cells counted in the 20 x 20 grid for sample D, given here as YeastD.mat. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Yeast.csv", "filename": "Yeast", "name": " Student's (1906) Yeast Cell Counts ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Counts of the number of yeast cells were made each of 400 regions in a 20 x 20 grid on a microscope slide, comprising a 1 sq. mm. area. This experiment was repeated four times, giving samples A, B, C and D. \nStudent (1906) used these data to investigate the errors in random sampling. He says \"there are two sources of error: (a) the drop taken may not be representative of the bulk of the liquid; (b) the distribution of the cells over the area which is examined is never exactly uniform, so that there is an 'error of random sampling.'\" \nThe data in the paper are provided in the form of discrete frequency distributions for the four samples. Each shows the frequency distribution squares containing a count of 0, 1, 2, ... yeast cells. These are combined here in Yeast. In addition, he gives a table (Table I) showing the actual number of yeast cells counted in the 20 x 20 grid for sample D, given here as YeastD.mat. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/YeastD.mat.csv", "filename": "YeastD_mat", "name": " Student's (1906) Yeast Cell Counts ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Several meteorological measurements for a period between 1920 and 1931. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/meteo.csv", "filename": "meteo", "name": " Meteorological Measurements for 11 Years ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily measurements of ozone concentration, wind speed, temperature and solar radiation in New York City from May to September of 1973. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lattice/environmental.csv", "filename": "environmental", "name": " Atmospheric environmental conditions in New York City ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with 17 observations on boiling point of water and barometric pressure in inches of mercury. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/forbes.csv", "filename": "forbes", "name": " Forbes' Data on Boiling Points in the Alps ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The topo data frame has 52 rows and 3 columns, of topographic heights within a 310 feet square. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/topo.csv", "filename": "topo", "name": " Spatial Topographic Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Mr Derek Whiteside of the UK Building Research Station recorded the weekly gas consumption and average external temperature at his own house in south-east England for two heating seasons, one of 26 weeks before, and one of 30 weeks after cavity-wall insulation was installed. The object of the exercise was to assess the effect of the insulation on gas consumption. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/whiteside.csv", "filename": "whiteside", "name": " House Insulation: Whiteside's Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Official snowfall data by month and season for Grand Rapids, MI, going back to 1893. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/SnowGR.csv", "filename": "SnowGR", "name": "Snowfall data for Grand Rapids, MI", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "2016-17 weather in several cities ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Weather.csv", "filename": "Weather", "name": "Weather", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set is a subset of the data from the 2006 ASA Data expo challenge, http://stat-computing.org/dataexpo/2006/. The data are monthly ozone averages on a very coarse 24 by 24 grid covering Central America, from Jan 1995 to Dec 2000. The data is stored in a 3d area with the first two dimensions representing latitude and longitude, and the third representing time. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plyr/ozone.csv", "filename": "ozone_", "name": "Monthly ozone measurements over Central America.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Air Quality Data Set for May 1973, from Chambers et al. (1983). The whole data set consists of daily readings of air quality values from May 1, 1973 to September 30, 1973, but here are included only the values for May. This data set is an example of the special treatment of the missing values. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/airmay.csv", "filename": "airmay", "name": "Air Quality Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains daily means (from midnight to midnight) of NOx, i.e., mono-nitrogen oxides, in [ppb] at 13 sites in central Switzerland and Aarau for the year 2004. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/ambientNOxCH.csv", "filename": "ambientNOxCH", "name": " Daily Means of NOx (mono-nitrogen oxides) in air", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An agricultural experiment in which different tillage methods were implemented. The effects of tillage on plant (maize) biomass were subsequently determined by modeling biomass accumulation for each tillage treatment using a 3 parameter Weibull function. \nA datset where the total biomass is modeled conditional on a three value factor, and hence vector parameters are used. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/biomassTill.csv", "filename": "biomassTill", "name": "Biomass Tillage Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A typical medium sized environmental data set with hourly measurements of NOx pollution content in the ambient air. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/NOxEmissions.csv", "filename": "NOxEmissions", "name": "NOx Air Pollution Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rainfall amounts from a cloud seeding experiment (winter only) \nResearchers were interested in whether seeded clouds would produce more rainfall. An experiment was conducted in Tasmania between 1964 and 1971 and rainfall amounts were measured in inches per rainfall period. The researchers measured the amount of rainfall in two target areas: East (TE) and West (TW). They also measured the amount of rainfall in three control locations. Clouds were coded as being either seeded (treatment) or unseeded (control). This is a subset (only Winter months) of the larger CloudSeeding2 dataset. All rainfall amounts are in inches. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CloudSeeding.csv", "filename": "CloudSeeding", "name": "Cloud Seeding Experiment (Winter Only)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rainfall amounts from a cloud seeding experiment \nResearchers were interested in whether seeded clouds would produce more rainfall. An experiment was conducted in Tasmania between 1964 and 1971 and rainfall amounts were measured in inches per rainfall period. The researchers measured the amount of rainfall in two target areas: East (TE) and West (TW). They also measured the amount of rainfall in three control locations. Clouds were coded as being either seeded (treatment) or unseeded (control). A subset (only Winter months) of these data is stored in CloudSeeding. All rainfall amounts are in inches. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CloudSeeding2.csv", "filename": "CloudSeeding2", "name": "Cloud Seeding Experiment (Four Seasons)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily carbon dioxide measurements for April through November 2011 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CO2.csv", "filename": "CO2", "name": "Daily CO2 Measurements in Germany", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly carbon dioxide readings at Mauna Loa, Hawaii \nMonthly average carbon dioxide readings (1988 - 2017) at the Mauna Loa Observatory in Hawaii. Data collected and disseminated by ERSL (Earth System Research Laboratory) of the U.S. NOAA (National Oceanic and Atmospheric Administration. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CO2Hawaii.csv", "filename": "CO2Hawaii", "name": "CO2 Readings in Hawaii", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly carbon dioxide readings at the South Pole \nMonthly average carbon dioxide readings (1988 - 2016) at the South Pole. Data collected and disseminated by ERSL (Earth System Research Laboratory) of the U.S. NOAA (National Oceanic and Atmospheric Administration. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CO2SouthPole.csv", "filename": "CO2SouthPole", "name": "CO2 Readings at the South Pole", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Air pollution data from Leeds (U.K.) city centre, collected from 1994 to 1998. The summer data set corresponds to the months of April to July inclusive. The winter data set corresponds to the months of November to February inclusive. Some outliers have been removed, as discussed by Heffernan and Tawn, 2004. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/texmex/summer.csv", "filename": "summer", "name": "Air pollution data, separately for summer and winter months", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Air pollution data from Leeds (U.K.) city centre, collected from 1994 to 1998. The summer data set corresponds to the months of April to July inclusive. The winter data set corresponds to the months of November to February inclusive. Some outliers have been removed, as discussed by Heffernan and Tawn, 2004. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/texmex/winter.csv", "filename": "winter", "name": "Air pollution data, separately for summer and winter months", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataframe contains a simulated data set to illustrate the models for ecological inference. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/eidat.csv", "filename": "eidat", "name": "Simulation Data for Ecological Inference", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Other" } ] }, { "name": "Statistics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The measurements are the population (in 1000's) of 49 U.S. cities in 1920 and 1930. The 49 cities are a random sample taken from the 196 largest cities in 1920. The city data frame consists of the first 10 observations in bigcity.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/bigcity.csv", "filename": "bigcity", "name": "Population of U.S. Cities", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", "column_description_0": "index", "column_description_1": "census year. ", "column_description_2": "Population, in millions ", - "comment_character": "#", - "create_index_column": false, - "description": "The CanPop data frame has 16 rows and 2 columns. Decennial time-series of Canadian population, 1851–2001. ", + "description": "The CanPop data frame has 16 rows and 2 columns. Decennial time-series of Canadian population, 1851-2001. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/CanPop.csv", "filename": "CanPop", "name": "Canadian Population Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Channing House is a retirement centre in Palo Alto, California. These data were collected between the opening of the house in 1964 until July 1, 1975. In that time 97 men and 365 women passed through the centre. For each of these, their age on entry and also on leaving or death was recorded. A large number of the observations were censored mainly due to the resident being alive on July 1, 1975 when the data was collected. Over the time of the study 130 women and 46 men died at Channing House. Differences between the survival of the sexes, taking age into account, was one of the primary concerns of this study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/channing.csv", "filename": "channing", "name": " Channing House Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The measurements are the population (in 1000's) of 49 U.S. cities in 1920 and 1930. The 49 cities are a random sample taken from the 196 largest cities in 1920. The city data frame consists of the first 10 observations in bigcity. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/city.csv", "filename": "city", "name": "Population of U.S. Cities ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Angell data frame has 43 rows and 4 columns. The observations are 43 U. S. cities around 1950. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Angell.csv", "filename": "Angell", "name": "Moral Integration of American Cities", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Anscombe data frame has 51 rows and 4 columns. The observations are the U. S. states plus Washington, D. C. in 1970. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Anscombe.csv", "filename": "Anscombe", "name": "U. S. State Public-School Expenditures", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Chirot data frame has 32 rows and 5 columns. The observations are counties in Romania. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Chirot.csv", "filename": "Chirot", "name": "The 1907 Romanian Peasant Rebellion", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Duncan data frame has 45 rows and 4 columns. Data on the prestige and other characteristics of 45 U. S. occupations in 1950. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Duncan.csv", "filename": "Duncan", "name": "Duncan's Occupational Prestige Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Ericksen data frame has 66 rows and 9 columns. The observations are 16 large cities, the remaining parts of the states in which these cities are located, and the other U. S. states. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Ericksen.csv", "filename": "Ericksen", "name": "The 1980 U.S. Census Undercount", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Leinhardt data frame has 105 rows and 4 columns. The observations are nations of the world around 1970. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Leinhardt.csv", "filename": "Leinhardt", "name": "Data on Infant-Mortality", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Migration data frame has 90 rows and 8 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Migration.csv", "filename": "Migration", "name": "Canadian Interprovincial Migration Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Minneapolis Demographic Data 2015, by Neighborhood, from the 2015 American Community Survey ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/MplsDemo.csv", "filename": "MplsDemo", "name": " Minneapolis Demographic Data 2015, by Neighborhood ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The States data frame has 51 rows and 8 columns. The observations are the U. S. states and Washington, D. C. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/States.csv", "filename": "States", "name": "Education and Related Statistics for the U.S. States", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "National health, welfare, and education statistics for 213 places, mostly UN members, but also other areas like Hong Kong that are not independent countries. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/UN.csv", "filename": "UN", - "name": " National Statistics from the United Nations, Mostly From 2009–2011 ", - "number_format": 31, - "remove_quotes": true, + "name": " National Statistics from the United Nations, Mostly From 2009-2011 ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Social indicators data on 207 nations distributed by the United Nations circa 1998. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/UN98.csv", "filename": "UN98", "name": " United Nations Social Indicators Data 1998] ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The USPop data frame has 22 rows and 1 columns. This is a decennial time-series, from 1790 to 2000. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/USPop.csv", "filename": "USPop", "name": "Population of the United States", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Vocab data frame has 30,351 rows and 4 columns. The observations are respondents to U.S. General Social Surveys, 1972-2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Vocab.csv", "filename": "Vocab", "name": "Vocabulary and Education", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on whether or not households in Bangladesh changed the wells that they were using. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Wells.csv", "filename": "Wells", "name": "Well Switching in Bangladesh", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Latitudes and longitudes for Adelaide, Alice, Brisbane, Broome, Cairns, Canberra, Darwin, Melbourne, Perth and Sydney; i.e., for the cities to which the road distances in audists relate. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/aulatlong.csv", "filename": "aulatlong", "name": "Latitudes and longitudes for ten Australian cities", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Population figures for Australian states and territories for 1917, 1927, ..., 1997. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/austpop.csv", "filename": "austpop", "name": "Population figures for Australian States and Territories", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Australian regional temperature data, Australian regional rainfall data, and Annual SOI, are given for the years 1900-2008 or 1900-2011 or 1900-2012. The regional rainfall and temperature data are area-weighted averages for the respective regions. The Southern Oscillation Index (SOI) is the difference in barometric pressure at sea level between Tahiti and Darwin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/bomregions.csv", "filename": "bomregions", "name": "Australian and Related Historical Annual Climate Data, by region", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Australian regional temperature data, Australian regional rainfall data, and Annual SOI, are given for the years 1900-2008 or 1900-2011 or 1900-2012. The regional rainfall and temperature data are area-weighted averages for the respective regions. The Southern Oscillation Index (SOI) is the difference in barometric pressure at sea level between Tahiti and Darwin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/bomregions2011.csv", "filename": "bomregions2011", "name": "Australian and Related Historical Annual Climate Data, by region", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Australian regional temperature data, Australian regional rainfall data, and Annual SOI, are given for the years 1900-2008 or 1900-2011 or 1900-2012. The regional rainfall and temperature data are area-weighted averages for the respective regions. The Southern Oscillation Index (SOI) is the difference in barometric pressure at sea level between Tahiti and Darwin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/bomregions2012.csv", "filename": "bomregions2012", "name": "Australian and Related Historical Annual Climate Data, by region", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The corrected Boston housing data (from http://lib.stat.cmu.edu/datasets/). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/bostonc.csv", "filename": "bostonc", - "name": "Boston Housing Data – Corrected", - "number_format": 31, - "remove_quotes": true, + "name": "Boston Housing Data - Corrected", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Population estimates for several Canadian cities. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cities.csv", "filename": "cities", "name": "Populations of Major Canadian Cities (1992-96)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The science data frame has 1385 rows and 7 columns. \nThe data are on attitudes to science, from a survey where there were results from 20 classes in private schools and 46 classes in public schools. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/science.csv", "filename": "science", "name": "School Science Survey Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Numbers (in thousands) of Australian residents measured quarterly from March 1971 to March 1994. The object is of class \"ts\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/austres.csv", "filename": "austres", "name": " Quarterly Time Series of the Number of Australian Residents ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Distribution of hair and eye color and sex in 592 statistics students. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/HairEyeColor.csv", "filename": "HairEyeColor", "name": "Hair and Eye Color of Statistics Students", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The areas in thousands of square miles of the landmasses which exceed 10,000 square miles. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/islands.csv", "filename": "islands", "name": "Areas of the World's Major Landmasses", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The mean annual temperature in degrees Fahrenheit in New Haven, Connecticut, from 1912 to 1971. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/nhtemp.csv", "filename": "nhtemp", "name": "Average Yearly Temperatures in New Haven", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A time series object containing average air temperatures at Nottingham Castle in degrees Fahrenheit for 20 years. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/nottem.csv", "filename": "nottem", - "name": " Average Monthly Temperatures at Nottingham, 1920–1939 ", - "number_format": 31, - "remove_quotes": true, + "name": " Average Monthly Temperatures at Nottingham, 1920-1939 ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The average amount of precipitation (rainfall) in inches for each of 70 United States (and Puerto Rico) cities. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/precip.csv", "filename": "precip", "name": "Annual Precipitation in US Cities", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A time series giving the monthly totals of accidental deaths in the USA. The values for the first six months of 1979 are 7798 7406 8363 8460 9217 9316. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/USAccDeaths.csv", "filename": "USAccDeaths", - "name": " Accidental Deaths in the US 1973–1978 ", - "number_format": 31, - "remove_quotes": true, + "name": " Accidental Deaths in the US 1973-1978 ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data set gives the population of the United States (in millions) as recorded by the decennial census for the period 1790–1970. ", + "description": "This data set gives the population of the United States (in millions) as recorded by the decennial census for the period 1790-1970. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/uspop.csv", "filename": "uspop", "name": "Populations Recorded by the US Census", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Death rates per 1000 in Virginia in 1940. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/VADeaths.csv", "filename": "VADeaths", "name": "Death Rates in Virginia (1940)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 250 \nobservation : households \ncountry : California ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/HC.csv", "filename": "HC", "name": "Heating and Cooling System Choice in Newly Built Houses in California ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 900 \nobservation : households \ncountry : California ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Heating.csv", "filename": "Heating", "name": "Heating System Choice in California Houses ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "quarterly observations from 1960-1 to 2001-4 \nnumber of observations : 168 \nobservation : country \ncountry : Canada ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Hstarts.csv", "filename": "Hstarts", "name": "Housing Starts ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 125 observations from 1960 to 1985 \nnumber of observations : 3250 \nobservation : country \ncountry : World ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/SumHes.csv", "filename": "SumHes", "name": "The Penn Table ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total Murdered women, per 100 000 standard population. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/wmurders.csv", "filename": "wmurders", "name": "Annual female murder rate (per 100,000 standard population) in the USA. 1950-2004.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Demographic information of midwest counties ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/midwest.csv", "filename": "midwest", "name": "Midwest demographics", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A table indicating month by month, for the years 1812-1854, the number of prostitutes on the registers of the administration of the city of Paris. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Prostitutes.csv", "filename": "Prostitutes", "name": " Parent-Duchatelet's time-series data on the number of prostitutes in Paris ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Birth and death rates for 69 countries. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/birthdeathrates.csv", "filename": "birthdeathrates", "name": " Birth and Death Rates Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a study carried out to investigate the causes of jeering or baiting behaviour by a crowd when a person is threatening to commit suicide by jumping from a high building. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/suicides.csv", "filename": "suicides", "name": " Crowd Baiting Behaviour and Suicides ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Heights in inches of the singers in the New York Choral Society in 1979. The data are grouped according to voice part. The vocal range for each voice part increases in pitch according to the following order: Bass 2, Bass 1, Tenor 2, Tenor 1, Alto 2, Alto 1, Soprano 2, Soprano 1. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lattice/singer.csv", "filename": "singer", "name": " Heights of New York Choral Society singers ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These datasets record mortality rates across all ages in the USA by cause of death, sex, and rural/urban status, 2011–2013. The two datasets represent the national aggregate rates and the region-wise rates for each administrative region under the Department of Health and Human Services (HHS). ", + "description": "These datasets record mortality rates across all ages in the USA by cause of death, sex, and rural/urban status, 2011-2013. The two datasets represent the national aggregate rates and the region-wise rates for each administrative region under the Department of Health and Human Services (HHS). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lattice/USMortality.csv", "filename": "USMortality", "name": " Mortality Rates in US by Cause and Gender ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These datasets record mortality rates across all ages in the USA by cause of death, sex, and rural/urban status, 2011–2013. The two datasets represent the national aggregate rates and the region-wise rates for each administrative region under the Department of Health and Human Services (HHS). ", + "description": "These datasets record mortality rates across all ages in the USA by cause of death, sex, and rural/urban status, 2011-2013. The two datasets represent the national aggregate rates and the region-wise rates for each administrative region under the Department of Health and Human Services (HHS). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lattice/USRegionalMortality.csv", "filename": "USRegionalMortality", "name": " Mortality Rates in US by Cause and Gender ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A regular time series giving the monthly totals of accidental deaths in the USA. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/accdeaths.csv", "filename": "accdeaths", "name": " Accidental Deaths in the US 1973-1978 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the cross-classification of people in Caithness, Scotland, by eye and hair colour. The region of the UK is particularly interesting as there is a mixture of people of Nordic, Celtic and Anglo-Saxon origin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/caith.csv", "filename": "caith", "name": " Colours of Eyes and Hair of People in Caithness ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data provide per capita alcohol consumption values for many countries in 2005 and 2008. There are also a few countries for which there are data in other years. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Alcohol.csv", "filename": "Alcohol", "name": "Alcohol Consumption per Capita", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A day by day record of the number of births in each US State. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Birthdays.csv", "filename": "Birthdays", "name": "US Births in 1969 - 1988", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Number of births each day from 1968 to 1988 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Births.csv", "filename": "Births", "name": "US Births", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A day by day record of the number of births in the United States in 2015. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Births2015.csv", "filename": "Births2015", "name": "US Births in 2015", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A day by day record of the number of births in the United States in 1978. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Births78.csv", "filename": "Births78", "name": "US Births in 1978", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Current Population Survey (CPS) is used to supplement census information between census years. These data consist of a random sample of persons from the CPS85, with information on wages and other characteristics of the workers, including sex, number of years of education, years of work experience, occupational status, region of residence and union membership. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/CPS85.csv", "filename": "CPS85", "name": "Data from the 1985 Current Population Survey (CPS85)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on houses in Saratoga County, New York, USA in 2006 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/SaratogaHouses.csv", "filename": "SaratogaHouses", "name": "Houses in Saratoga County (2006)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Housing status for 362 severely mentally ill homeless subjects measured at baseline and at three follow-up times. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/multgee/housing.csv", "filename": "housing_", "name": " Homeless Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Deaths by year, by corp, from horse kicks. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/prussian.csv", "filename": "prussian", "name": "Prussian army horse kick data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Airline distances between 11 US cities may be used as an example for multidimensional scaling or cluster analysis. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/cities.csv", "filename": "cities_", "name": "Distances between 11 US cities", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Contains information on 20 Schools from the Mid-Atlantic and New England States, drawn from a population studied by Coleman et al. (1966). Mosteller and Tukey (1977) analyze this sample consisting of measurements on six different variables, one of which will be treated as a responce. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/coleman.csv", "filename": "coleman", "name": "Coleman Data Set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data consists of 150 randomly selected persons from a survey with information on over 2000 elderly US citizens, where the response, indicates participation in the U.S. Food Stamp Program. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/foodstamp.csv", "filename": "foodstamp", "name": "Food Stamp Program Participation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Infant mortality rates in the United States by decade (1920-2010) \nInfant mortality (deaths within one year of birth per 1,000 births) in the US from 1920 - 2010 (by decade). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/InfantMortality2010.csv", "filename": "InfantMortality2010", "name": "Infant Mortality Rates", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Health services data for 83 metropolitan areas \nThe U.S. Census Bureau regularly collects information for many metropolitan areas in the United States, including data on number of physicians and number (and size) of hospitals. This dataset has such information for 83 different metropolitan areas. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MetroHealth83.csv", "filename": "MetroHealth83", "name": "Health Services in Metropolitan Areas", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from births in North Carolina in 2001 \nThis dataset contains data on a sample of 1450 birth records that statistician John Holcomb selected from the North Carolina State Center for Health and Environmental Statistics. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/NCbirths.csv", "filename": "NCbirths", "name": "North Carolina Birth Records", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Demographics and a measurement of racial animus in cities based on Google searches \nProfessor Seth Stephens-Davidowitz studies the level of racial animus across different areas in America by measuring the percent of Google search queries that include racially charged language. A measurement, Animus, is derived from his algorithm and is scaled to be between 0 (low racial animus) and 250 (high racial animus). The dataset includes those values along with demographic information about each media market. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/RacialAnimus.csv", "filename": "RacialAnimus", "name": "Racial Animus and City Demgraphics", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sample of homes in Northampton, MA to see whether being close to a bike trail enhances the value of the home \nThis dataset comprises 104 homes in Northampton, MA that were sold in 2007. The authors measured the shortest distance from each home to a railtrail on streets and pathways with Google maps and recorded the Zillow.com estimate of each home's price in 1998 and 2011. Additional attributes such as square footage, number of bedrooms and number of bathrooms are available from a realty database from 2007. We divide the houses into two groups based on distance to the trail (DistGroup). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/RailsTrails.csv", "filename": "RailsTrails", "name": "Homes in Northampton MA Near Rail Trails", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on religiosity of countries from the Pew Global Attitudes Project \nThe Pew Research Center's Global Attitudes Project surveyed people around the world and asked (among many other questions) whether they agreed that \"belief in God is necessary for morality,\" whether religion is very important in their lives, and whether they pray at least once per day. The variable Religiosity is the sum of the percentage of positive responses on these three items, measured in each of 44 countries. The dataset also includes the per capita GDP for each country and indicator variables that record the part of the world the country is in. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ReligionGDP.csv", "filename": "ReligionGDP", "name": "Religion and GDP for Countries", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on serious suicide attempts in Shandong, China \nData from a study of serious suicide attempts over three years in a predominantly rural population in Shandong, China. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SuicideChina.csv", "filename": "SuicideChina", "name": "Suicide Attempts in Shandong, China", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "State teen pregnancy rates, Civil War participation, and church attendance. \nState level data on teen pregnancies, church attendance, and role in the U.S. Civil War. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/TeenPregnancy.csv", "filename": "TeenPregnancy", "name": "State Teen Pregnancy Rates", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "US population by age and sex, for 2000 through 2020", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/uspop2.csv", "filename": "uspop2", "name": "Projected US Population", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from the Danish Welfare Study about broken marriages or permanent relationships depending on gender and social rank. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/BrokenMarriage.csv", "filename": "BrokenMarriage", "name": "Broken Marriage Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Geissler, cited in Sokal & Rohlf (1969) and Lindsey (1995) on gender distributions in families in Saxony in the 19th century. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Saxony.csv", "filename": "Saxony", "name": "Families in Saxony", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Heuer (1979) on suicide rates in West Germany classified by age, sex, and method of suicide. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Suicide.csv", "filename": "Suicide", "name": "Suicide Rates in Germany", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These five datasets are part of a larger set of 10 multiply imputed data sets describing individual preferences toward immigration policy. Imputation was performed via Amelia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/immi1.csv", "filename": "immigration", "name": "Individual Preferences Over Immigration Policy", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains five sociomatrices of simulated data social network data.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/sna.ex.csv", "filename": "sna_ex", "name": "Simulated Example of Social Network Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains turnout and demographic data from a sample of respondents to the 2000 Current Population Survey (CPS). The states represented are South Carolina and Arkansas. The data represent only a sample and results from this example should not be used in publication. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/voteincome.csv", "filename": "voteincome", "name": "Sample Turnout and Demographic Data from the 2000 Current Population Survey", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Demographics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In a study into the effect that warning signs have on speeding patterns, Cambridgeshire County Council considered 14 pairs of locations. The locations were paired to account for factors such as traffic volume and type of road. One site in each pair had a sign erected warning of the dangers of speeding and asking drivers to slow down. No action was taken at the second site. Three sets of measurements were taken at each site. Each set of measurements was nominally of the speeds of 100 cars but not all sites have exactly 100 measurements. These speed measurements were taken before the erection of the sign, shortly after the erection of the sign, and again after the sign had been in place for some time. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/amis.csv", "filename": "amis", "name": "Car Speeding and Warning Signs ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Proschan (1963) reported on the times between failures of the air-conditioning equipment in 10 Boeing 720 aircraft. The aircondit data frame contains the intervals for the ninth aircraft while aircondit7 contains those for the seventh aircraft. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/aircondit.csv", "filename": "aircondit", "name": "Failures of Air-conditioning Equipment ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Proschan (1963) reported on the times between failures of the air-conditioning equipment in 10 Boeing 720 aircraft. The aircondit data frame contains the intervals for the ninth aircraft while aircondit7 contains those for the seventh aircraft. \nBoth data frames have just one column. Note that the data have been sorted into increasing order. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/aircondit7.csv", "filename": "aircondit7", "name": "Failures of Air-conditioning Equipment ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The motor data frame has 94 rows and 4 columns. The rows are obtained by removing replicate values of time from the dataset mcycle. Two extra columns are added to allow for strata with a different residual variance in each stratum. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/motor.csv", "filename": "motor", "name": "Data from a Simulated Motorcycle Accident", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data comes from a unpublished master's paper by Carl Hoffstedt. They relate the automobile accident rate, in accidents per million vehicle miles to several potential terms. The data include 39 sections of large highways in the state of Minnesota in 1973. The goal of this analysis was to understand the impact of design variables, Acpts, Slim, Sig, and Shld that are under the control of the highway department, on accidents. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Highway1.csv", "filename": "Highway1", "name": "Highway Accidents", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data set used in McCullagh & Nelder (1989), Hardin & Hilbe (2003), and other sources. The data contains values on the number of reported accidents for ships belonging to a company over a given time period. When a ship was constructed is also recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/ships.csv", "filename": "ships", "name": "ships", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data is an observation-based version of the 1912 Titanic passenger survival log, ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/titanic.csv", "filename": "titanic", "name": "titanic", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data is an grouped version of the 1912 Titanic passenger survival log, ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/titanicgrp.csv", "filename": "titanicgrp", "name": "titanicgrp", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A non-experimental \"control\" group, used in various studies of the effect of a labor training program, alternative to the experimental control group in nswdemo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cps3.csv", "filename": "cps3", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Estimates of total worldwide carbon emissions from fossil fuel use. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/fossilfuel.csv", "filename": "fossilfuel", "name": "Fossil Fuel Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The record times in 1984 for 35 Scottish hill races. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/hills.csv", "filename": "hills", "name": "Scottish Hill Races Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "US data, for 1997-2002, from police-reported car crashes in which there is a harmful event (people or property), and from which at least one vehicle was towed. Data are restricted to front-seat occupants, include only a subset of the variables recorded, and are restricted in other ways also.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nassCDS.csv", "filename": "nassCDS", "name": "Airbag and other influences on accident fatalities", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Progression in world record times for track and road races.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/progression.csv", "filename": "progression", "name": "Progression of Record times for track races, 1912 - 2008", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The record times in 2000 for 77 Scottish long distance races. We believe the data are, for the most part, trustworthy. However, the dist variable for Caerketton (record 58) seems to have been variously recorded as 1.5 mi and 2.5 mi. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/races2000.csv", "filename": "races2000", "name": "Scottish Hill Races Data - 2000", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are from an experiment that aimed to model the effects of the tinting of car windows on visual performance. The authors were mainly interested in effects on side window vision, and hence in visual recognition tasks that would be performed when looking through side windows. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/tinting.csv", "filename": "tinting", "name": "Car Window Tinting Experiment Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Record times for track and road races, at August 9th 2006", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/worldRecords.csv", "filename": "worldRecords", "name": "Record times for track and road races, at August 9th 2006", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The revenue passenger miles flown by commercial airlines in the United States for each year from 1937 to 1960. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/airmiles.csv", "filename": "airmiles", - "name": "Passenger Miles on Commercial US Airlines, 1937–1960", - "number_format": 31, - "remove_quotes": true, + "name": "Passenger Miles on Commercial US Airlines, 1937-1960", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The classic Box & Jenkins airline data. Monthly totals of international airline passengers, 1949 to 1960. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/AirPassengers.csv", "filename": "AirPassengers", "name": "Monthly Airline Passenger Numbers 1949-1960", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the speed of cars and the distances taken to stop. Note that the data were recorded in the 1920s. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/cars.csv", "filename": "cars", "name": "Speed and Stopping Distances of Cars", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973–74 models). ", + "description": "The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973-74 models). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv", "filename": "mtcars", "name": "Motor Trend Car Road Tests", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "UKDriverDeaths is a time series giving the monthly totals of car drivers in Great Britain killed or seriously injured Jan 1969 to Dec 1984. Compulsory wearing of seat belts was introduced on 31 Jan 1983. \nSeatbelts is more information on the same problem. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Seatbelts.csv", "filename": "UKDriverDeaths", - "name": " Road Casualties in Great Britain 1969–84 ", - "number_format": 31, - "remove_quotes": true, + "name": " Road Casualties in Great Britain 1969-84 ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data set provides information on the fate of passengers on the fatal maiden voyage of the ocean liner ‘Titanic’, summarized according to economic status (class), sex, age and survival. ", + "description": "This data set provides information on the fate of passengers on the fatal maiden voyage of the ocean liner \"Titanic\", summarized according to economic status (class), sex, age and survival. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Titanic.csv", "filename": "Titanic", "name": "Survival of passengers on the Titanic", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly UK gas consumption from 1960Q1 to 1986Q4, in millions of therms. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/UKgas.csv", "filename": "UKgas", "name": "UK Quarterly Gas Consumption", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Accident.csv", "filename": "Accident", "name": "Ship Accidents ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 4654 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Car.csv", "filename": "Car", "name": "Stated Preferences for Car Choice ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 48 observations from 1982 to 1988 \nnumber of observations : 336 \nobservation : regional \ncountry : United States \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Fatality.csv", "filename": "Fatality", "name": "Drunk Driving Laws and Traffic Deaths ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 18 observations from 1960 to 1978 \nnumber of observations : 342 \nobservation : country \ncountry : OECD ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Gasoline.csv", "filename": "Gasoline", "name": "Gasoline Consumption ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 840 \nobservation : individuals \ncountry : Australia ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/ModeChoice.csv", "filename": "ModeChoice", "name": "Data to Study Travel Mode Choice ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1987 \nnumber of observations : 2929 \nobservation : individuals \ncountry : Netherland ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Train.csv", "filename": "Train", "name": "Stated Preferences for Train Traveling ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 25 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/TranspEq.csv", "filename": "TranspEq", "name": "Statewide Data on Transportation Equipment Manufacturing ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly international arrivals (in thousands) to Australia from Japan, New Zealand, UK and the US. 1981Q1 - 2012Q3. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/arrivals.csv", "filename": "arrivals", "name": "International Arrivals to Australia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total annual air passengers (in millions) including domestic and international aircraft passengers of air carriers registered in Australia. 1970-2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/ausair.csv", "filename": "ausair", "name": "Air Transport Passengers Australia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total international visitors to Australia (in millions). 1980-2015. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/austa.csv", "filename": "austa", "name": "International visitors to Australia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly visitor nights (in millions) spent by international tourists to Australia. 1999-2015. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/austourists.csv", "filename": "austourists", "name": "International Tourists to Australia: Total visitor nights.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Overseas departures from Australia: permanent departures, long-term (more than one year) residents departing, long-term (more than one year) visitors departing, short-term (less than one year) residents departing and short-term (less than one year) visitors departing. January 1976 - November 2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/departures.csv", "filename": "departures", "name": "Total monthly departures from Australia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Air traffic numbers are in thousands, and divided into first class, business class and economy class. There was a major pilots' industrial dispute during the data period resulting in some weeks with zero traffic. There was also at least two changes in the definitions of passenger classes. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/melsyd.csv", "filename": "melsyd", - "name": "Total weekly air passenger numbers on Ansett airline flights between Melbourne and Sydney, 1987–1992.", - "number_format": 31, - "remove_quotes": true, + "name": "Total weekly air passenger numbers on Ansett airline flights between Melbourne and Sydney, 1987-1992.", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total quarterly visitor nights (in millions) from 1998-2016 for twenty regions of Australia within six states. The states are: New South Wales, Queensland, South Australia, Victoria, Western Australia, and Other. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/visnights.csv", "filename": "visnights", "name": "Quarterly visitor nights for various regions of Australia.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Aircraft Crash Data ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/airAccs.csv", "filename": "airAccs", "name": " Aircraft Crash data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from the US FARS (Fatality Analysis Recording System) archive that is intended to include every accident in which there was at least one fatality. Data are limited to vehicles where the front seat passenger seat was occupied. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/FARS.csv", "filename": "FARS", "name": " US fatal road accident data for automobiles, 1998 to 2010 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are included on variables that may be relevant to assessing airbag and seatbelt effectiveness in preventing fatal injury. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/fars2007.csv", "filename": "fars2007", "name": " US fatal road accident data, 2007 and 2008 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are a 3-way table, indexed by state, a set of variable names, and years ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/FARSmiss.csv", "filename": "FARSmiss", "name": " Summary information on records omitted from the FARS dataset ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Spanish Armada (Spanish: Grande y Felicisima Armada, literally \"Great and Most Fortunate Navy\") was a Spanish fleet of 130 ships that sailed from La Coruna in August 1588. During its preparation, several accounts of its formidable strength were circulated to reassure allied powers of Spain or to intimidate its enemies. One such account was given by Paz Salas et Alvarez (1588). The intent was bring the forces of Spain to invade England, overthrow Queen Elizabeth I, and re-establish Spanish control of the Netherlands. However the Armada was not as fortunate as hoped: it was all destroyed in one week's fighting. \ne Falguerolles (2008) reports the table given here as Armada as an early example of data to which multivariate methods might be applied. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Armada.csv", "filename": "Armada", "name": " La Felicisima Armada ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Michael Florent van Langren (1598-1675) was a Dutch mathematician and astronomer, who served as a royal mathematician to King Phillip IV of Spain, and who worked on one of the most significant problems of his time— the accurate determination of longitude, particularly for navigation at sea. \nIn order to convince the Spanish court of the seriousness of the problem (often resulting in great losses through ship wrecks), he prepared a 1-dimensional line graph, showing all the available estimates of the distance in longitude between Toledo and Rome, which showed large errors, for even this modest distance. This 1D line graph, from Langren (1644), is believed to be the first known graph of statistical data (Friendly etal., 2010). It provides a compelling example of the notions of statistical variability and bias. \nThe data frame Langren1644 gives the estimates and other information derived from the previously known 1644 graph. It turns out that van Langren produced other versions of this graph, as early as 1628. The data frame Langren.all gives the estimates derived from all known versions of this graph. ", + "description": "Michael Florent van Langren (1598-1675) was a Dutch mathematician and astronomer, who served as a royal mathematician to King Phillip IV of Spain, and who worked on one of the most significant problems of his time\u2014 the accurate determination of longitude, particularly for navigation at sea. \nIn order to convince the Spanish court of the seriousness of the problem (often resulting in great losses through ship wrecks), he prepared a 1-dimensional line graph, showing all the available estimates of the distance in longitude between Toledo and Rome, which showed large errors, for even this modest distance. This 1D line graph, from Langren (1644), is believed to be the first known graph of statistical data (Friendly etal., 2010). It provides a compelling example of the notions of statistical variability and bias. \nThe data frame Langren1644 gives the estimates and other information derived from the previously known 1644 graph. It turns out that van Langren produced other versions of this graph, as early as 1628. The data frame Langren.all gives the estimates derived from all known versions of this graph. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Langren.all.csv", "filename": "Langren", "name": " van Langren's Data on Longitude Distance between Toledo and Rome ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Gas mileage, horsepower, and other information for 392 vehicles.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Auto.csv", "filename": "Auto", "name": " Auto Data Set ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A regular time series giving the monthly totals of car drivers in Great Britain killed or seriously injured Jan 1969 to Dec 1984. Compulsory wearing of seat belts was introduced on 31 Jan 1983 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/drivers.csv", "filename": "drivers", "name": " Deaths of Car Drivers in Great Britain 1969-84 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame giving a series of measurements of head acceleration in a simulated motorcycle accident, used to test crash helmets. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/mcycle.csv", "filename": "mcycle", "name": " Data from a Simulated Motorcycle Accident ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with the annual deaths in road accidents for half the US states. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/road.csv", "filename": "road", "name": " Road Accident Deaths in US States ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data frame giving the number of damage incidents and aggregate months of service by ship type, year of construction, and period of operation. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/ships.csv", "filename": "ships_", "name": " Ships Damage Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An experiment was performed in Sweden in 1961–2 to assess the effect of a speed limit on the motorway accident rate. The experiment was conducted on 92 days in each year, matched so that day j in 1962 was comparable to day j in 1961. On some days the speed limit was in effect and enforced, while on other days there was no speed limit and cars tended to be driven faster. The speed limit days tended to be in contiguous blocks. ", + "description": "An experiment was performed in Sweden in 1961-2 to assess the effect of a speed limit on the motorway accident rate. The experiment was conducted on 92 days in each year, matched so that day j in 1962 was comparable to day j in 1961. On some days the speed limit was in effect and enforced, while on other days there was no speed limit and cars tended to be driven faster. The speed limit days tended to be in contiguous blocks. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Traffic.csv", "filename": "Traffic", "name": " Effect of Swedish Speed Limits on Accidents ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Pioneer Valley Planning Commission (PVPC) collected data north of Chestnut Street in Florence, MA for ninety days from April 5, 2005 to November 15, 2005. Data collectors set up a laser sensor, with breaks in the laser beam recording when a rail-trail user passed the data collection station. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/RailTrail.csv", "filename": "RailTrail", "name": "Volume of Users of a Rail Trail", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Pioneer Valley Planning Commission (PVPC) collected data north of Chestnut Street in Florence, MA for ninety days from April 5, 2005 to November 15, 2005. Data collectors set up a laser sensor, with breaks in the laser beam recording when a rail-trail user passed the data collection station. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Riders.csv", "filename": "Riders", "name": "Volume of Users of a Massachusetts Rail Trail", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Aircraft Data, deals with 23 single-engine aircraft built over the years 1947-1979, from Office of Naval Research. The dependent variable is cost (in units of \\$100,000) and the explanatory variables are aspect ratio, lift-to-drag ratio, weight of plane (in pounds) and maximal thrust. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/aircraft.csv", "filename": "aircraft", "name": "Aircraft Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Delivery Time Data, from Montgomery and Peck (1982). The aim is to explain the time required to service a vending machine (Y) by means of the number of products stocked (X1) and the distance walked by the route driver (X2). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/delivery.csv", "filename": "delivery", "name": "Delivery Time Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Length of stay for 201 patients that stayed at the University Hospital of Lausanne during the year 2000. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/los.csv", "filename": "los", "name": " Length of Stay Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ontime arrivals for American and Delta airlines at LaGuardia and O'Hare airports ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Airlines.csv", "filename": "Airlines", "name": "Ontime Records for Two Airlines at Two Airports", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements of noise levels for different filters to reduce pollution levels of automobiles. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/AutoPollution.csv", "filename": "AutoPollution", "name": " Noise Levels of Filters to Reduce Automobile Pollution ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Flight response of Pacific Brant to overflights of helicopters \nA 1994 study collected data on the effects of air traffic on the behavior of the Pacific Brant (a small migratory goose). The data represent the flight response to helicopter \"overflights\" to see what the relationship between the proximity of a flight, both lateral and altitudinal, would be to the propensity of the Brant to flee the area. For this experiment, air traffic was restricted to helicopters because previous study had ascertained that helicopters created more radical flight response than other aircraft. The data are in FlightResponse. Each case represents a flock of Brant that has been observed during one overflight in the study. Flocks were determined observationally as contiguous collections of Brants, flock sizes varying from 10 to 30,000 birds. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FlightResponse.csv", "filename": "FlightResponse", "name": "Response of Migratory Geese to Helicopter Overflights", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Commute times for four cities \nThe data are distances (miles) and times (minutes) of daily commute (one-way) for random samples of 500 commuters in each of four cities (Boston, Houston, Minneapolis, Washington) in 2007. The random samples were taken from the Metropolitan Public Use File of the 2007 American Housing Survey ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MetroCommutes.csv", "filename": "MetroCommutes", "name": "Commute Times", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly traffic (in 1,000's) across the Peace Bridge between Canada and the U.S. \nMonthly traffic (in thousands of vehicles) across the Peace Bridge between the U.S. and Canada near Niagara Falls between January 2003 and December 2015. Note PeaceBridge2012 has only the last four years of this series. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PeaceBridge2003.csv", "filename": "PeaceBridge2003", "name": "Monthly Peace Bridge Traffic ( 2003-2015)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly traffic (in 1,000's) across the Peace Bridge between Canada and the U.S. \nMonthly traffic (in thousands of vehicles) across the Peace Bridge between the U.S. and Canada near Niagara Falls between January 2012 and December 2015. Note PeaceBridge2003 has similar data starting in 2003. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PeaceBridge2012.csv", "filename": "PeaceBridge2012", "name": "Monthly Peace Bridge Traffic ( 2012-2015)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Highway fatality rates 1987-2007 \nIn 1987 the federal government allowed the speed limit on interstate highways to be 65 mph in most areas. In 1995 federal restrictions were eliminated, so that states assumed control of setting speed limits on interstate highways. This data set compares fatality rates for years before and after the states assumed control for highway speed limits. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Speed.csv", "filename": "Speed", "name": "Highway Fatality Rates (Yearly)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Risky behavior (riding with a drinking driver) in youths \nThis dataset is derived from the 2007 Youth Risk Behavior Surveillance System (YRBSS), which is an annual survey conducted by the Centers for Disease Control and Prevention (CDC) to monitor the prevalence of health-risk youth behaviors. This dataset focuses on whether or not youths have recently (in past 30 days) ridden with a drunk driver. \nThis dataset renamed as YouthRisk for the second edition. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/YouthRisk2007.csv", "filename": "YouthRisk2007", "name": "Riding with a Driver Who Has Been Drinking", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Mersey (1912) about the 18 (out of 20) lifeboats launched before the sinking of the S. S. Titanic. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Lifeboats.csv", "filename": "Lifeboats", "name": "Lifeboats on the Titanic", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a study in England in two periods from November 1969 to October 1971 and November 1971 to October 1973. A new compulsory safety measure for trucks was introduced in October 1971. Therefore, the question is whether the safety measure had an effect on the number of accidents and on the point of collision on the truck. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Trucks.csv", "filename": "Trucks", "name": "Truck Accidents Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Traffic" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The excess return for the Acme Cleveland Corporation are recorded along with those for all stocks listed on the New York and American Stock Exchanges were recorded over a five year period. These excess returns are relative to the return on a risk-less investment such a U.S. Treasury bills. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/acme.csv", "filename": "acme", "name": "Monthly Excess Returns", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data relate to the construction of 32 light water reactor (LWR) plants constructed in the U.S.A in the late 1960's and early 1970's. The data was collected with the aim of predicting the cost of construction of further LWR plants. 6 of the power plants had partial turnkey guarantees and it is possible that, for these plants, some manufacturers' subsidies may be hidden in the quoted capital costs. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/nuclear.csv", "filename": "nuclear", "name": " Nuclear Power Station Construction Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Each week that the market is open the Australian Wool Corporation set a floor price which determines their policy on intervention and is therefore a reflection of the overall price of wool for the week in question. Actual prices paid can vary considerably about the floor price. The series here is the log of the ratio between the price for fine grade wool and the floor price, each market week between July 1976 and Jun 1984. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/wool.csv", "filename": "wool", "name": " Australian Relative Wool Prices ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Mroz data frame has 753 rows and 8 columns. The observations, from the Panel Study of Income Dynamics (PSID), are married women. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Mroz.csv", "filename": "Mroz", "name": "U.S. Women's Labor-Force Participation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Ornstein data frame has 248 rows and 4 columns. The observations are the 248 largest Canadian firms with publicly available information in the mid-1970s. The names of the firms were not available. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Ornstein.csv", "filename": "Ornstein", "name": "Interlocking Directorates Among Major Canadian Firms", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Prestige data frame has 102 rows and 6 columns. The observations are occupations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Prestige.csv", "filename": "Prestige", "name": "Prestige of Canadian Occupations", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Sahlins data frame has 20 rows and 2 columns. The observations are households in a Central African village. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Sahlins.csv", "filename": "Sahlins", "name": "Agricultural Production in Mazulu Village", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The 2008-09 nine-month academic salary for Assistant Professors, Associate Professors and Professors in a college in the U.S. The data were collected as part of the on-going effort of the college's administration to monitor salary differences between male and female faculty members. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Salaries.csv", "filename": "Salaries", "name": " Salaries for Professors ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The SLID data frame has 7425 rows and 5 columns. The data are from the 1994 wave of the Canadian Survey of Labour and Income Dynamics, for the province of Ontario. There are missing data, particularly for wages. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/SLID.csv", "filename": "SLID", "name": "Survey of Labour and Income Dynamics", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on transaction times in branch offices of a large Australian bank. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Transact.csv", "filename": "Transact", "name": "Transaction data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Womenlf data frame has 263 rows and 4 columns. The data are from a 1977 survey of the Canadian population. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Womenlf.csv", "filename": "Womenlf", "name": "Canadian Women's Labour-Force Participation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Gross National Product (GNP) per capita and percentage of the population working in agriculture for each country belonging to the European Union in 1993. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/agriculture.csv", "filename": "agriculture", "name": "European Union Agricultural Workforces", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a subset of the German Socio-Economic Panel (SOEP). The subset was created by Rabe-Hesketh and Skrondal (2005). Only working women are included in these data. Beginning in 1997, German health reform in part entailed a 200 co-payment as well as limits in provider reimbursement. Patients were surveyed for the one year panel (1996) prior to and the one year panel (1998) after reform to assess whether the number of physician visits by patients declined - which was the goal of reform legislation. The response, or variable to be explained by the model, is numvisit, which indicates the number of patient visits to a physician's office during a three month period. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/mdvis.csv", "filename": "mdvis", "name": "mdvis", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "U.S. data extracted from Cars93, a data frame in the MASS package. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/carprice.csv", "filename": "carprice", "name": "US Car Price Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Cars93.summary data frame has 6 rows and 4 columns created from information in the Cars93 data set in the Venables and Ripley MASS package. Each row corresponds to a different class of car (e.g. Compact, Large, etc.). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/Cars93.summary.csv", "filename": "Cars93Summary", "name": "A Summary of the Cars93 Data set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Numbers are given in different categories of worker, in each of two investigations. The first source of information is the Board of Trade Census that was conducted on 1886. The second is a relatively informal survey conducted by US Bureau of Labor representatives in 1889, for use in official reports. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cottonworkers.csv", "filename": "cottonworkers", "name": "Occupation and wage profiles of British cotton workers", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are annual apparent alcohol consumption in Australia and New Zealand, in liters of pure alcohol content per annum, separately for beer, wine, and spirits (including spirit-based products). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/grog.csv", "filename": "grog", "name": "Alcohol consumption in Australia and New Zealand", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The houseprices data frame consists of the floor area, price, and the number of bedrooms for a sample of houses sold in Aranda in 1999. Aranda is a suburb of Canberra, Australia. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/houseprices.csv", "filename": "houseprices", "name": "Aranda House Prices", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The number of workers in the Canadian labour force broken down by region (BC, Alberta, Prairies, Ontario, Quebec, Atlantic) for the 24-month period from January, 1995 to December, 1996 (a time when Canada was emerging from a deep economic recession). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/jobs.csv", "filename": "jobs", "name": "Canadian Labour Force Summary Data (1995-96)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data frame Lottario is a summary of 122 weekly draws of an Ontario lottery, beginning in November, 1978. Each draw consists of 7 numbered balls, drawn without replacement from an urn consisting of balls numbered from 1 through 39. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/Lottario.csv", "filename": "Lottario", "name": "Ontario Lottery Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The medExpenses data frame contains average weekly medical expenses including drugs for 33 families randomly sampled from a community of 600 families which contained 2700 individuals. These data were collected in the 1970's at an unknown location. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/medExpenses.csv", "filename": "medExpenses", "name": "Family Medical Expenses", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data frame contains 445 rows and 10 columns. These data are from an investigation of the effect of training on changes, between 1974-1975 and 1978, in the earnings of individuals who had experienced employment difficulties Data are for the male experimental control and treatment groups. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nsw74demo.csv", "filename": "nsw74demo", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data frame contains 2675 rows and 10 columns. These data are pertinent to an investigation of the way that earnings changed, between 1974-1975 and 1978, in the absence of training. Data for the experimental treatment group (NSW) were combined with control data results from the Panel Study of Income Dynamics (PSID) study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nsw74psid1.csv", "filename": "nsw74psid1", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are pertinent to an investigation of the way that earnings changed, between 1974-1975 and 1978, in the absence of training. The data frame combines data for the experimental treatment group (NSW, 185 observations), using as control data results from the PSID (Panel Study of Income Dynamics) study (128 observations). The latter were chosen to mimic the characteristics of the NSW training and control groups. These are a subset of the nsw74psid1 data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nsw74psid3.csv", "filename": "nsw74psid3", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The nsw74psidA data frame has 252 rows and 10 columns. See nsw74psid1 for more information. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nsw74psidA.csv", "filename": "nsw74psidA", "name": "A Subset of the nsw74psid1 Data Set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The nswdemo data frame contains 722 rows and 10 columns. These data are pertinent to an investigation of the way that earnings changed, between 1974-1975 and 1978, for an experimental treatment who were given job training as compared with a control group who did not receive such training. \nThe psid1 data set is an alternative non-experimental \"control\" group. psid2 and psid3 are subsets of psid1, designed to be better matched to the experimental data than psid1. Note also the cps1, cps2 and cps3 datasets (DAAGxtras) that have been proposed as non-experimental controls. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nswdemo.csv", "filename": "nswdemo", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data frame contains 2787 rows and 10 columns. These data are pertinent to an investigation of the way that earnings changed, between 1974-1975 and 1978, in the absence of training. Data for the experimental treatment group in nswdemo are combined with the psid1 control data from the Panel Study of Income Dynamics (PSID) study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nswpsid1.csv", "filename": "nswpsid1", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A non-experimental \"control\" group, used in various studies of the effect of a labor training program, alternative to the experimental control group in nswdemo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/psid1.csv", "filename": "psid1", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A non-experimental \"control\" group, used in various studies of the effect of a labor training program, alternative to the experimental control group in nswdemo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/psid2.csv", "filename": "psid2", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A non-experimental \"control\" group, used in various studies of the effect of a labor training program, alternative to the experimental control group in nswdemo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/psid3.csv", "filename": "psid3", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a survey on social and other kinds of support. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/socsupport.csv", "filename": "socsupport", "name": "Social Support Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data on objects appearing in three windows on a video lottery terminal, together with the prize payout (usually 0). Observations were taken on two successive days in late 1994 at a hotel lounge north of Winnipeg, Manitoba. Each observation cost 25 cents (Canadian). The game played was ‘Double Diamond’. ", + "description": "Data on objects appearing in three windows on a video lottery terminal, together with the prize payout (usually 0). Observations were taken on two successive days in late 1994 at a hotel lounge north of Winnipeg, Manitoba. Each observation cost 25 cents (Canadian). The game played was \"Double Diamond\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/vlt.csv", "filename": "vlt", "name": "Video Lottery Terminal Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The wages1833 data frame gives the wages of Lancashire cotton factory workers in 1833. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/wages1833.csv", "filename": "wages1833", "name": "Wages of Lancashire Cotton Factory Workers in 1833", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "From a survey of the clerical employees of a large financial organization, the data are aggregated from the questionnaires of the approximately 35 employees for each of 30 (randomly selected) departments. The numbers give the percent proportion of favourable responses to seven questions in each department.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/attitude.csv", "filename": "attitude", - "name": "The Chatterjee–Price Attitude Data", - "number_format": 31, - "remove_quotes": true, + "name": "The Chatterjee-Price Attitude Data", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The sales time series BJsales and leading indicator BJsales.lead each contain 150 observations. The objects are of class \"ts\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/BJsales.csv", "filename": "BJsales", "name": "Sales Data with Leading Indicator", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Conversion rates between the various Euro currencies.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/euro.csv", "filename": "euro", "name": "Conversion Rates of Euro Currencies", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Contains the daily closing prices of major European stock indices: Germany DAX (Ibis), Switzerland SMI, France CAC, and UK FTSE. The data are sampled in business time, i.e., weekends and holidays are omitted. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/EuStockMarkets.csv", "filename": "EuStockMarkets", - "name": " Daily Closing Prices of Major European Stock Indices, 1991–1998 ", - "number_format": 31, - "remove_quotes": true, + "name": " Daily Closing Prices of Major European Stock Indices, 1991-1998 ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly earnings (dollars) per Johnson & Johnson share 1960–80. ", + "description": "Quarterly earnings (dollars) per Johnson & Johnson share 1960-80. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/JohnsonJohnson.csv", "filename": "JohnsonJohnson", "name": "Quarterly Earnings per Johnson & Johnson Share", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data on the savings ratio 1960–1970. ", + "description": "Data on the savings ratio 1960-1970. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/LifeCycleSavings.csv", "filename": "LifeCycleSavings", "name": "Intercountry Life-Cycle Savings Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A macroeconomic data set which provides a well-known example for a highly collinear regression. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/longley.csv", "filename": "longley", "name": "Longley's Economic Regression Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cross-classification of a sample of British males according to each subject's occupational status and his father's occupational status. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/occupationalStatus.csv", "filename": "occupationalStatus", "name": "Occupational Status of Fathers and their Sons", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set consists of United States personal expenditures (in billions of dollars) in the categories; food and tobacco, household operation, medical and health, personal care, and private education for the years 1940, 1945, 1950, 1955 and 1960. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/USPersonalExpenditure.csv", "filename": "USPersonalExpenditure", "name": "Personal Expenditure Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 6 observations from 1970 to 1984 \nnumber of observations : 90 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Airline.csv", "filename": "Airline", "name": "Cost for U.S. Airlines ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data.frame identifying which of 70 countries had a banking crisis each year 1800:2010. The first column is year. The remaining columns carry the names of the countries; those columns are 1 for years with banking crises and 0 otherwise. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/bankingCrises.csv", "filename": "bankingCrises", "name": " Countries in Banking Crises ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1972 \nnumber of observations : 4877 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Benefits.csv", "filename": "Benefits", "name": "Unemployment of Blue Collar Workers ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 126 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Bids.csv", "filename": "Bids", "name": "Bids Received By U.S. Firms ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1980 \nnumber of observations : 23972 \nobservation : households \ncountry : Spain ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/BudgetFood.csv", "filename": "BudgetFood", "name": "Budget Share of Food for Spanish Households ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1973 to 1992 \nnumber of observations : 1729 \nobservation : households \ncountry : Italy ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/BudgetItaly.csv", "filename": "BudgetItaly", "name": "Budget Shares for Italian Households ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1980 to 1982 \nnumber of observations : 1519 \nobservation : households \ncountry : United Kingdown ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/BudgetUK.csv", "filename": "BudgetUK", "name": "Budget Shares of British Households ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1994 \nnumber of observations : 1472 \nobservation : individuals \ncountry : Belgium ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Bwages.csv", "filename": "Bwages", "name": "Wages in Belgium ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "monthly observations from 1960–01 to 2002–12 \nnumber of observations : 516 ", + "description": "monthly observations from 1960-01 to 2002-12 \nnumber of observations : 516 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Capm.csv", "filename": "Capm", "name": "Stock Market Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1990 \nnumber of observations : 400 \nobservation : production units \ncountry : Netherland ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Clothing.csv", "filename": "Clothing", "name": "Sales Data of Men's Fashion Stores ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "quarterly observations from 1947-1 to 1996-4 \nnumber of observations : 200 \nobservation : country \ncountry : Canada ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Consumption.csv", "filename": "Consumption", "name": "Quarterly Data on Consumption and Expenditure ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1998 \nnumber of observations : 11130 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/CPSch3.csv", "filename": "CPSch3", "name": "Earnings from the Current Population Survey ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 3292 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Cracker.csv", "filename": "Cracker", "name": "Choice of Brand for Crakers ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 2000 \nnumber of observations : 308 \nobservation : goods \ncountry : Singapore ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Diamond.csv", "filename": "Diamond", "name": "Pricing the C's of Diamond Stones ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "weekly observations from 1975 to 1989 \nnumber of observations : 778 \nobservation : country \ncountry : Germany ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/DM.csv", "filename": "DM", "name": "DM Dollar Exchange Rate ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1988-1989 \nnumber of observations : 4266 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Earnings.csv", "filename": "Earnings", "name": "Earnings for Three Age Groups ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1970 to 1970 \nnumber of observations : 158 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Electricity.csv", "filename": "Electricity", "name": "Cost Function for Electricity Producers ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "monthly observations from 1979–01 to 2001–12 \nnumber of observations : 276 ", + "description": "monthly observations from 1979-01 to 2001-12 \nnumber of observations : 276 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Forward.csv", "filename": "Forward", "name": "Exchange Rates of US Dollar Against Other Currencies ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "daily observations from 1980–01 to 1987–05–21 \nnumber of observations : 1867 \nobservation : country \ncountry : World ", + "description": "daily observations from 1980-01 to 1987-05-21 \nnumber of observations : 1867 \nobservation : country \ncountry : World ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Garch.csv", "filename": "Garch", "name": "Daily Observations on Exchange Rates of the US Dollar Against Other Currencies ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1980 \nnumber of observations : 758 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Griliches.csv", "filename": "Griliches", "name": "Wage Datas ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 10 observations from 1935 to 1954 \nnumber of observations : 200 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Grunfeld.csv", "filename": "Grunfeld", "name": "Grunfeld Investment Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1997-1998 \nnumber of observations : 2381 observation : individuals country : United States \nIn package version 0.2-9 and earlier this dataset was called Hdma. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Hdma.csv", "filename": "Hmda", "name": "The Boston HMDA Data Set ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 506 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Hedonic.csv", "filename": "Hedonic", "name": "Hedonic Prices of Census Tracts in Boston ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1993 \nnumber of observations : 22272 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/HI.csv", "filename": "HI", "name": "Health Insurance and Hours Worked By Wives ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1987 \nnumber of observations : 546 \nobservation : goods \ncountry : Canada ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Housing.csv", "filename": "Housing", "name": "Sales Prices of Houses in the City of Windsor ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on quantiles of the distributions of family incomes in the United States. This combines three data sources: \n(1) US Census Table F-1 for the central quantiles \n(2) Piketty and Saez for the 95th and higher quantiles \n(3) Gross Domestic Product and implicit price deflators from MeasuringWorth.com ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/incomeInequality.csv", "filename": "incomeInequality", "name": " Income Inequality in the US ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "quarterly observations from 1971–1 to 1985–2 \nnumber of observations : 58 \nobservation : country \ncountry : United Kingdown ", + "description": "quarterly observations from 1971-1 to 1985-2 \nnumber of observations : 58 \nobservation : country \ncountry : United Kingdown ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/IncomeUK.csv", "filename": "IncomeUK", "name": "Seasonally Unadjusted Quarterly Data on Disposable Income and Expenditure ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 2000 \nnumber of observations : 180 \nobservation : goods ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Journals.csv", "filename": "Journals", "name": "Economic Journals Dat Set ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "annual observations from 1920 to 1941 \nnumber of observations : 22 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Klein.csv", "filename": "Klein", "name": "Klein's Model I ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 532 observations from 1979 to 1988 \nnumber of observations : 5320 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/LaborSupply.csv", "filename": "LaborSupply", "name": "Wages and Hours Worked ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1996 \nnumber of observations : 569 \nobservation : production units \ncountry : Belgium ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Labour.csv", "filename": "Labour", "name": "Belgian Firms ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "annual observations from 1791 to 1990 \nnumber of observations : 200 \nobservation : country \ncountry : United Kingdown ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/LT.csv", "filename": "LT", "name": "Dollar Sterling Exchange Rate ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "quarterly observations from 1959-1 to 2000-4 \nnumber of observations : 168 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Macrodat.csv", "filename": "Macrodat", "name": "Macroeconomic Time Series for the United States ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "annual observations from 1947 to 1971 \nnumber of observations : 25 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/ManufCost.csv", "filename": "ManufCost", "name": "Manufacturing Costs ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "monthly observations from 1950-2 to 1990-12 \nnumber of observations : 491 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Mishkin.csv", "filename": "Mishkin", "name": "Inflation and Interest Rates ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1982 \nnumber of observations : 50 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Mofa.csv", "filename": "Mofa", - "name": "International Expansion of U.S. Mofa's (majority–owned Foreign Affiliates in Fire (finance, Insurance and Real Estate) ", - "number_format": 31, - "remove_quotes": true, + "name": "International Expansion of U.S. Mofa's (majority-owned Foreign Affiliates in Fire (finance, Insurance and Real Estate) ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "quarterly observations from 1967-1 to 1998-4 \nnumber of observations : 128 \nobservation : country \ncountry : Canada ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Money.csv", "filename": "Money", "name": "Money, GDP and Interest Rate in Canada ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "quarterly observations from 1954–01 to 1994–12 \nnumber of observations : 164 \ncountry : United States ", + "description": "quarterly observations from 1954-01 to 1994-12 \nnumber of observations : 164 \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/MoneyUS.csv", "filename": "MoneyUS", "name": "Macroeconomic Series for the United States ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "annual observations from 1900 to 1989 \nnumber of observations : 90 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Mpyr.csv", "filename": "Mpyr", "name": "Money, National Product and Interest Rate ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 265 observations from 1979 to 1987 \nnumber of observations : 2385 \nobservation : regional \ncountry : Sweden ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/MunExp.csv", "filename": "MunExp", "name": "Municipal Expenditure Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "quarterly observations from 1963-3 to 1975-4 \nnumber of observations : 50 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/MW.csv", "filename": "MW", "name": "Growth of Disposable Income and Treasury Bill Rate ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1955 to 1955 \nnumber of observations : 159 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Nerlove.csv", "filename": "Nerlove", "name": "Cost Function for Electricity Producers, 1955 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1969 to 1992 \nnumber of observations : 53 \nobservation : production units \ncountry : United Kingdown ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Oil.csv", "filename": "Oil", "name": "Oil Investment ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 872 \nobservation : individuals \ncountry : Switzerland ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Participation.csv", "filename": "Participation", "name": "Labor Force Participation ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 346 observations from 1975 to 1979 \nnumber of observations : 1730 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/PatentsHGH.csv", "filename": "PatentsHGH", "name": "Dynamic Relation Between Patents and R\\&D ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 181 observations from 1983 to 1991 \nnumber of observations : 1629 \nobservation : production units \ncountry : world ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/PatentsRD.csv", "filename": "PatentsRD", "name": "Patents, R\\&D and Technological Spillovers for a Panel of Firms ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "annual observations from 1800 to 1931 \nnumber of observations : 132 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/PE.csv", "filename": "PE", "name": "Price and Earnings Index ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "weekly observations from 1975 to 1989 \nnumber of observations : 778 \nobservation : country \ncountry : Germany ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Pound.csv", "filename": "Pound", "name": "Pound-dollar Exchange Rate ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "monthly observations from 1981–01 to 1996–06 \nnumber of observations : 186 \nobservation : country \ncountry : France and Italy ", + "description": "monthly observations from 1981-01 to 1996-06 \nnumber of observations : 186 \nobservation : country \ncountry : France and Italy ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/PPP.csv", "filename": "PPP", "name": "Exchange Rates and Price Indices for France and Italy ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "monthly observations from 1959–02 to 1993–11 \nnumber of observations : 418 ", + "description": "monthly observations from 1959-02 to 1993-11 \nnumber of observations : 418 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Pricing.csv", "filename": "Pricing", "name": "Returns of Size-based Portfolios ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 48 observations from 1970 to 1986 \nnumber of observations : 816 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Produc.csv", "filename": "Produc", "name": "Us States Production ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1993 \nnumber of observations : 4856 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/PSID.csv", "filename": "PSID", "name": "Panel Survey of Income Dynamics ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "quarterly observations from 1950-1 to 1996-4 \nnumber of observations : 188 \nobservation : country \ncountry : Canada ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Tbrate.csv", "filename": "Tbrate", "name": "Interest Rate, GDP and Inflation ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1995-96 \nnumber of observations : 2724 \nobservation : individuals \ncountry : Belgium ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Tobacco.csv", "filename": "Tobacco", "name": "Households Tobacco Budget Share ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1974 \nnumber of observations : 2675 \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Treatment.csv", "filename": "Treatment", "name": "Evaluating Treatment Effect of Training on Earnings ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data.frame giving the profits of the finance industry in the United States as a proportion of total corporate domestic profits. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/USFinanceIndustry.csv", "filename": "USFinanceIndustry", "name": " US Finance Industry Profits ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 595 observations from 1976 to 1982 \nnumber of observations : 4165 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Wages.csv", "filename": "Wages", "name": "Panel Datas of Individual Wages ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 595 observations from 1976 to 1982 \nnumber of observations : 3294 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Wages1.csv", "filename": "Wages1", "name": "Wages, Experience and Schooling ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1987 \nnumber of observations : 3382 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Workinghours.csv", "filename": "Workinghours", "name": "Wife Working Hours ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "weekly observations from 1975 to 1989 \nnumber of observations : 778 \nobservation : country \ncountry : Japan ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Yen.csv", "filename": "Yen", "name": "Yen-dollar Exchange Rate ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are the daily log returns on BMW share price from Tuesday 2nd January 1973 until Tuesday 23rd July 1996. The data are contained in a numeric vector. The dates of each observation are contained in a times attribute, which is an object of class \"POSIXct\" (see DateTimeClasses). Note that these data form an irregular time series because no trading takes place at the weekend. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/evir/bmw.csv", "filename": "bmw", "name": "Daily Log Returns on BMW Share Price", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data describe large fire insurance claims in Denmark from Thursday 3rd January 1980 until Monday 31st December 1990. The data are contained in a numeric vector. The dates of each observation are contained in a times attribute, which is an object of class \"POSIXct\" (see DateTimeClasses). They were supplied by Mette Rytgaard of Copenhagen Re. Note that these data form an irregular time series. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/evir/danish.csv", "filename": "danish", "name": "Danish Fire Insurance Claims", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are the daily log returns on Siemens share price from Tuesday 2nd January 1973 until Tuesday 23rd July 1996. The data are contained in a numeric vector. The dates of each observation are contained in a times attribute, which is an object of class \"POSIXct\" (see DateTimeClasses). Note that these data form an irregular time series because no trading takes place at the weekend. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/evir/siemens.csv", "filename": "siemens", "name": "Daily Log Returns on Siemens Share Price", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Australian monthly gas production: 1956–1995. ", + "description": "Australian monthly gas production: 1956-1995. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/forecast/gas.csv", "filename": "gas", "name": "Australian monthly gas production", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily morning gold prices in US dollars. 1 January 1985 – 31 March 1989. ", + "description": "Daily morning gold prices in US dollars. 1 January 1985 - 31 March 1989. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/forecast/gold.csv", "filename": "gold", "name": "Daily morning gold prices", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Australian total wine sales by wine makers in bottles <= 1 litre. Jan 1980 – Aug 1994. ", + "description": "Australian total wine sales by wine makers in bottles <= 1 litre. Jan 1980 - Aug 1994. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/forecast/wineind.csv", "filename": "wineind", "name": "Australian total wine sales", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total quarterly beer production in Australia (in megalitres) from 1956:Q1 to 2010:Q2. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/ausbeer.csv", "filename": "ausbeer", "name": "Quarterly Australian Beer production", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The total monthly expenditure on cafes, restaurants and takeaway food services in Australia ($billion). April 1982 - September 2017. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/auscafe.csv", "filename": "auscafe", "name": "Monthly expenditure on eating out in Australia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Annual electricity sales for South Australia in GWh from 1989 to 2008. Electricity used for hot water has been excluded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/elecsales.csv", "filename": "elecsales", "name": "Electricity sales to residential customers in South Australia.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly retail trade index in the Euro area (17 countries), 1996-2011, covering wholesale and retail trade, and repair of motor vehicles and motorcycles. (Index: 2005 = 100). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/euretail.csv", "filename": "euretail", "name": "Quarterly retail trade: Euro area.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Weekly data beginning 2 February 1991, ending 20 January 2017. Units are \"million barrels per day\". ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/gasoline.csv", "filename": "gasoline", "name": "US finished motor gasoline product supplied.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Closing stock prices of GOOG from the NASDAQ exchange, for 1000 consecutive trading days between 25 February 2013 and 13 February 2017. Adjusted for splits. goog200 contains the first 200 observations from goog. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/goog.csv", "filename": "goog", "name": "Daily closing stock prices of Google Inc", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly quotations and monthly television advertising expenditure for a US insurance company. January 2002 to April 2005. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/insurance.csv", "filename": "insurance", "name": "Insurance quotations and advertising expenditure.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Annual oil production (millions of tonnes), Saudi Arabia, 1965-2013. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/oil.csv", "filename": "oil", "name": "Annual oil production in Saudi Arabia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total quarterly electricity production in Australia (in billion kWh) from 1956:Q1 to 2010:Q2. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/qauselec.csv", "filename": "qauselec", "name": "Quarterly Australian Electricity production", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total quarterly production of Portland cement in Australia (in millions of tonnes) from 1956:Q1 to 2014:Q1. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/qcement.csv", "filename": "qcement", "name": "Quarterly Australian Portland Cement production", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Total quarterly gas production in Australia (in petajoules) from 1956:Q1 to 2010:Q2. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/qgas.csv", "filename": "qgas", "name": "Quarterly Australian Gas Production", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Percentage changes in quarterly personal consumption expenditure, personal disposable income, production, savings and the unemployment rate for the US, 1960 to 2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/uschange.csv", "filename": "uschange", "name": "Growth rates of personal consumption and personal income in the USA.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with 1000 observations on the following 21 variables. \nV1\na factor with levels A11 A12 A13 A14\nV2\na numeric vector\nV3\na factor with levels A30 A31 A32 A33 A34\nV4\na factor with levels A40 A41 A410 A42 A43 A44 A45 A46 A48 A49\nV5\na numeric vector\nV6\na factor with levels A61 A62 A63 A64 A65\nV7\na factor with levels A71 A72 A73 A74 A75\nV8\na numeric vector\nV9\na factor with levels A91 A92 A93 A94\nV10\na factor with levels A101 A102 A103\nV11\na numeric vector\nV12\na factor with levels A121 A122 A123 A124\nV13\na numeric vector\nV14\na factor with levels A141 A142 A143\nV15\na factor with levels A151 A152 A153\nV16\na numeric vector\nV17\na factor with levels A171 A172 A173 A174\nV18\na factor with levels good bad\nV19\na factor with levels A191 A192\nV20\na factor with levels A201 A202\nV21\na numeric vector", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/german.csv", "filename": "german", "name": " German credit scoring data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A dataset containing the prices and other attributes of almost 54,000 diamonds. The variables are as follows: ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/diamonds.csv", "filename": "diamonds", "name": "Prices of 50,000 round cut diamonds", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset was produced from US economic time series data available from http://research.stlouisfed.org/fred2. economics is in \"wide\" format, economics_long is in \"long\" format. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/economics.csv", "filename": "economics", "name": "US economic time series", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains a subset of the fuel economy data that the EPA makes available on http://fueleconomy.gov. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/mpg.csv", "filename": "mpg", "name": "Fuel economy data from 1999 and 2008 for 38 popular models of car", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Information about the housing market in Texas provided by the TAMU real estate center, http://recenter.tamu.edu/. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/txhousing.csv", "filename": "txhousing", "name": "Housing sales in TX", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In one of the first statistical textbooks, Arthur Bowley (1901) used these data to illustrate an arithmetic and graphical analysis of time-series data using the total value of British and Irish exports from 1855-1899. He presented a line graph of the time-series data, supplemented by overlaid line graphs of 3-, 5- and 10-year moving averages. His goal was to show that while the initial series showed wide variability, moving averages made the series progressively smoother. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Bowley.csv", "filename": "Bowley", "name": " Bowley's data on values of British and Irish trade, 1855-1899 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In 1910, Karl Pearson weighed in on the debate, fostered by the temperance movement, on the evils done by alcohol not only to drinkers, but to their families. The report \"A first study of the influence of parental alcholism on the physique and ability of their offspring\" was an ambitious attempt to the new methods of statistics to bear on an important question of social policy, to see if the hypothesis that children were damaged by parental alcoholism would stand up to statistical scrutiny. \nWorking with his assistant, Ethel M. Elderton, Pearson collected voluminous data in Edinburgh and Manchester on many aspects of health, stature, intelligence, etc. of children classified according to the drinking habits of their parents. His conclusions where almost invariably negative: the tendency of parents to drink appeared unrelated to any thing he had measured. \nThe firestorm that this report set off is well described by Stigler (1999), Chapter 1. The data set DrinksWages is just one of Pearsons many tables, that he published in a letter to The Times, August 10, 1910. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/DrinksWages.csv", "filename": "DrinksWages", "name": " Elderton and Pearson's (1910) data on drinking and wages ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Stigler (1997, 1999) recounts the history of one of the oldest continuous schemes of sampling inspection carried out by the Royal Mint in London for about eight centuries. The Trial of the Pyx was the final, ceremonial stage in a process designed to ensure that the weight and quality of gold and silver coins from the mint met the standards for coinage. \nAt regular intervals, coins would be taken from production and deposited into a box called the Pyx. When a Trial of the Pyx was called, the contents of the Pyx would be counted, weighed and assayed for content, and the results would be compared with the standard set for the Royal Mint. \nThe data frame Pyx gives the results for the year 1848 (Great Britain, 1848) in which 10,000 gold sovereigns were assayed. The coins in each bag were classified according to the deviation from the standard content of gold for each coin, called the Remedy, R = 123 * (12/5760) = .25625, in grains, for a single sovereign. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Pyx.csv", "filename": "Pyx", "name": " Trial of the Pyx ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Playfair (1821) used a graph, showing parallel time-series of the price of wheat and the typical weekly wage for a \"good mechanic\" from 1565 to 1821 to argue that working men had never been as well-off in terms of purchasing power as they had become toward the end of this period. \nHis graph is a classic in the history of data visualization, but commits the sin of showing two non-commensurable Y variables on different axes. Scatterplots of wages vs. price or plots of ratios (e.g., wages/price) are in some ways better, but both of these ideas were unknown in 1821. \nIn this version, information on the reigns of British monarchs is provided in a separate data.frame, Wheat.monarch. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Wheat.csv", "filename": "Wheat", "name": " Playfair's Data on Wages and the Price of Wheat ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Playfair (1821) used a graph, showing parallel time-series of the price of wheat and the typical weekly wage for a \"good mechanic\" from 1565 to 1821 to argue that working men had never been as well-off in terms of purchasing power as they had become toward the end of this period. \nHis graph is a classic in the history of data visualization, but commits the sin of showing two non-commensurable Y variables on different axes. Scatterplots of wages vs. price or plots of ratios (e.g., wages/price) are in some ways better, but both of these ideas were unknown in 1821. \nIn this version, information on the reigns of British monarchs is provided in a separate data.frame, Wheat.monarch. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Wheat.monarchs.csv", "filename": "Wheat_monarchs", "name": " Playfair's Data on Wages and the Price of Wheat ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Forbes 2000 list is a ranking of the world's biggest companies, measured by sales, profits, assets and market value. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/Forbes2000.csv", "filename": "Forbes2000", "name": " The Forbes 2000 Ranking of the World's Biggest Companies (Year 2004) ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a survey from 1974 / 1975 asking both female and male responders about their opinion on the statement: Women should take care of running their homes and leave running the country up to men. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/womensrole.csv", "filename": "womensrole", "name": " Womens Role in Society ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data contains 5822 real customer records. Each record consists of 86 variables, containing sociodemographic data (variables 1-43) and product ownership (variables 44-86). The sociodemographic data is derived from zip codes. All customers living in areas with the same zip code have the same sociodemographic attributes. Variable 86 (Purchase) indicates whether the customer purchased a caravan insurance policy. Further information on the individual variables can be obtained at http://www.liacs.nl/~putten/library/cc2000/data.html ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Caravan.csv", "filename": "Caravan", "name": "The Insurance Company (TIC) Benchmark ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A simulated data set containing sales of child car seats at 400 different stores. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Carseats.csv", "filename": "Carseats", "name": "Sales of Child Car Seats ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A simulated data set containing information on ten thousand customers. The aim here is to predict which customers will default on their credit card debt. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Default.csv", "filename": "Default", "name": "Credit Card Default Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data contains 1070 purchases where the customer either purchased Citrus Hill or Minute Maid Orange Juice. A number of characteristics of the customer and product are recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/OJ.csv", "filename": "OJ", "name": "Orange Juice Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A simple simulated data set containing 100 returns for each of two assets, X and Y. The data is used to estimate the optimal fraction to invest in each asset to minimize investment risk of the combined portfolio. One can then use the Bootstrap to estimate the standard error of this estimate. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Portfolio.csv", "filename": "Portfolio", "name": "Portfolio Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily percentage returns for the S&P 500 stock index between 2001 and 2005. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Smarket.csv", "filename": "Smarket", "name": "S&P Stock Market Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Wage and other data for a group of 3000 male workers in the Mid-Atlantic region. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Wage.csv", "filename": "Wage", "name": "Mid-Atlantic Wage Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Cars93 data frame has 93 rows and 27 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Cars93.csv", "filename": "Cars93", "name": " Data from 93 Cars on Sale in the USA in 1993 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The housing data frame has 72 rows and 5 variables. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/housing.csv", "filename": "housing", "name": " Frequency Table from a Copenhagen Housing Conditions Survey ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data given in data frame Insurance consist of the numbers of policyholders of an insurance company who were exposed to risk, and the numbers of car insurance claims made by those policyholders in the third quarter of 1973. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Insurance.csv", "filename": "Insurance", "name": " Numbers of Car Insurance claims ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the locations, porosity and permeability (a measure of oil flow) on 104 oil wells in the US Naval Petroleum Reserve No. 1 in California. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/npr1.csv", "filename": "npr1", "name": " US Naval Petroleum Reserve No. 1 data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Job Search Intervention Study (JOBS II). JOBS II is a randomized field experiment that investigates the efficacy of a job training intervention on unemployed workers. The program is designed to not only increase reemployment among the unemployed but also enhance the mental health of the job seekers. In the JOBS II field experiment, 1,801 unemployed workers received a pre-screening questionnaire and were then randomly assigned to treatment and control groups. Those in the treatment group participated in job-skills workshops. In the workshops, respondents learned job-search skills and coping strategies for dealing with setbacks in the job-search process. Those in the control condition received a booklet describing job-search tips. In follow-up interviews, the two key outcome variables were measured; a continuous measure of depressive symptoms based on the Hopkins Symptom Checklist, and a binary variable, representing whether the respondent had become employed. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mediation/jobs.csv", "filename": "jobs_", "name": "JOBS II data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from utility bills at a residence. Utilities2 is a similar data set with some additional variables. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Utilities.csv", "filename": "Utilities", "name": "Utility bills", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from utility bills at a private residence. This is an augmented version of Utilities. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Utilities2.csv", "filename": "Utilities2", "name": "Utility bills", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 46 observations from 1963 to 1992 \ntotal number of observations : 1380 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Cigar.csv", "filename": "Cigar_", "name": "Cigarette Consumption ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An unbalanced panel of 140 observations from 1976 to 1984 \ntotal number of observations : 1031 \nobservation : firms \ncountry : United Kingdom ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/EmplUK.csv", "filename": "EmplUK", "name": "Employment and Wages in the United Kingdom", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 18 observations from 1960 to 1978 \ntotal number of observations : 342 \nobservation : country \ncountry : OECD ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Gasoline.csv", "filename": "Gasoline_", "name": "Gasoline Consumption", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A balanced panel of 10 observational units (firms) from 1935 to 1954 \ntotal number of observations : 200 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Grunfeld.csv", "filename": "Grunfeld_", "name": "Grunfeld's Investment Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A cross-section \nnumber of observations : 506 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Hedonic.csv", "filename": "Hedonic_", "name": "Hedonic Prices of Census Tracts in the Boston Area", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 532 observations from 1979 to 1988 \nnumber of observations : 5320 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/LaborSupply.csv", "filename": "LaborSupply_", "name": "Wages and Hours Worked", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 104 quarterly observations from 1973Q1 to 1998Q4 \ntotal number of observations : 1768 \nobservation : country \ncountry : OECD ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Parity.csv", "filename": "Parity", "name": "Purchasing Power Parity and other parity relationships", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 48 observations from 1970 to 1986 \ntotal number of observations : 816 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Produc.csv", "filename": "Produc_", "name": "US States Production", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 738 observations from 1983 to 1990 \ntotal number of observations: 5904 \nobservation: firms \ncountry: Spain ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Snmesp.csv", "filename": "Snmesp", "name": "Employment and Wages in Spain", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 595 individuals from 1976 to 1982, taken from the Panel Study of Income Dynamics (PSID).\n\nThe data are organized as a stacked time series/balanced panel, see Examples on how to convert to a pdata.frame. \ntotal number of observations : 4165 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Wages.csv", "filename": "Wages_", "name": "Panel Data of Individual Wages", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Cross-national data on relative size of the trade unions and predictors, in 20 countries. Two of the predictors are highly collinear, and are the source of a debate between Stephens and Wallerstein (1991), later reviewed by Western and Jackman (1994). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/unionDensity.csv", "filename": "unionDensity", "name": "cross national rates of trade union density", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "US census data on family income from 2008 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/income.csv", "filename": "income", "name": "US family income from US census 2008 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Version of the Barro Growth Data used in Koenker and Machado(1999). This is a regression data set consisting of 161 observations on determinants of cross country GDP growth rates. There are 13 covariates with dimnames corresponding to the original Barro and Lee source. See http://www.nber.org/pub/barro.lee/. The first 71 observations are on the period 1965-75, remainder on 1987-85. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/quantreg/barro.csv", "filename": "barro", "name": "Barro Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Engel food expenditure data used in Koenker and Bassett(1982). This is a regression data set consisting of 235 observations on income and expenditure on food for Belgian working class households. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/quantreg/engel.csv", "filename": "engel", "name": "Engel Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Time Series of Weekly US Gasoline Prices: 1990:8 – 2003:26 ", + "description": "Time Series of Weekly US Gasoline Prices: 1990:8 - 2003:26 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/quantreg/gasprice.csv", "filename": "gasprice", "name": "Time Series of US Gasoline Prices ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "One waiter recorded information about each tip he received over a period of a few months working in one restaurant. He collected several variables: ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/reshape2/tips.csv", "filename": "tips", "name": "Tipping data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The total 1981 premium income of pension funds of Dutch firms, for 18 Professional Branches, from de Wit (1982). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/pension.csv", "filename": "pension", "name": "Pension Funds Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Wagner (1994) investigates the rate of employment growth (y) as function of percentage of people engaged in producation activities (PA) and higher services (HS) and of the growth of these percentages (GPA, GHS) during three time periods in 21 geographical regions of the greater Hannover area. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/wagnerGrowth.csv", "filename": "wagnerGrowth", "name": " Wagner's Hannover Employment Growth Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The car.test.frame data frame has 60 rows and 8 columns, giving data on makes of cars taken from the April, 1990 issue of Consumer Reports. This is part of a larger dataset, some columns of which are given in cu.summary. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/rpart/car.test.frame.csv", "filename": "car_test_frame", "name": "Automobile Data from 'Consumer Reports' 1990", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data on 111 cars, taken from pages 235–255, 281–285 and 287–288 of the April 1990 Consumer Reports Magazine. ", + "description": "Data on 111 cars, taken from pages 235-255, 281-285 and 287-288 of the April 1990 Consumer Reports Magazine. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/rpart/car90.csv", "filename": "car90", "name": "Automobile Data from 'Consumer Reports' 1990", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The cu.summary data frame has 117 rows and 5 columns, giving data on makes of cars taken from the April, 1990 issue of Consumer Reports. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/rpart/cu.summary.csv", "filename": "cu_summary", "name": "Automobile Data from 'Consumer Reports' 1990", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Firm-level panel data on innovation and institutional ownership from 1991 to 1999 over 803 firms. The observations refer to different firms over different years. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sandwich/InstInnovation.csv", "filename": "InstInnovation", "name": "Innovation and Institutional Ownership", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "US data for fitting an investment equation. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sandwich/Investment.csv", "filename": "Investment", "name": "US Investment Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data for Klein's (1950) simple econometric model of the U. S. economy. \nThe Klein data frame has 22 rows and 10 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sem/Klein.csv", "filename": "Klein_", "name": "Klein's Data on the U. S. Economy", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These are partly contrived data from Kmenta (1986), constructed to illustrate estimation of a simultaneous-equation model. \nThe Kmenta data frame has 20 rows and 5 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sem/Kmenta.csv", "filename": "Kmenta", "name": "Partly Artificial Data on the U. S. Economy", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Age, price, and mileage of used Honda Accords in 2017 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/AccordPrice.csv", "filename": "AccordPrice", "name": "Prices of Used Honda Accords (in 2017)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Poll attitudes towards British trade unions ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BritishUnions.csv", "filename": "BritishUnions", "name": "Attitudes Towards British Trade Unions", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on 60 customers at a clothing retailer \nThis dataset represents a random sample of 60 customers from a large clothing retailer. The manager of the store is interested in predicting how much a customer will spend on his or her next purchase based on one or more of the available explanatory variables. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Clothing.csv", "filename": "Clothing_", "name": "Sales for a Clothing Retailer", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Variables that might be related to whether students overdraw a checking account. \nResearchers conducted a survey of 450 undergraduates in large introductory courses at either Mississippi State University or the University of Mississippi. There were close to 150 questions on the survey, but only four of these variables are included in this dataset. (You can consult the paper to learn how the variables beyond these 4 affect the analysis.) The primary interest for the researchers was factors relating to whether or not a student has ever overdrawn a checking account. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CreditRisk.csv", "filename": "CreditRisk", "name": "Overdrawn Checking Account?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Price and characteristics for a sample of 351 diamonds \nData for a sample of diamonds. The clarity of the diamonds ranges from IF (internally flawless) through VVS1 (very,very slightly included), VS1 (very slightly included), to SI3 (slightly included) in the order listed above. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Dinosaurs.csv", "filename": "Diamonds", "name": "Characteristics of a Sample of Diamonds", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A subset of 307 cases with the most frequent colors from the Diamonds data \nA subset of the Diamonds data, containing only those with most frequent colors D, E, F, and G. The clarity of the diamonds ranges from IF (internally flawless) through VVS1 (very,very slightly included), VS1 (very slightly included), to SI3 (slightly included) in the order listed above. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Diamonds2.csv", "filename": "Diamonds2", "name": "Characteristics of a Subset of the Diamond Sample", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on houses sold between 2005 and 2015 in Grinnell, Iowa.\nA local Grinnell realtor, Matt Karjalahti, put these data together to see what patterns might be found, perhaps with an improvement in how one sells houses or buys them. He asked Grinnell College economists, Lee Logan and Eric Ohrn, to help with the analysis and we obtained the data from them. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/GrinnellHouses.csv", "filename": "GrinnellHouses", "name": "House Sales in Grinnell, Iowa", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with 36 observations on the following 5 variables. \nGrocery stores and product manufacturers are always interested in how well the products on the store shelves sell. An experiment was designed to test whether the amount of discount given on products affected the amount of sales of that product. There were three levels of discount, 5%, 10%, and 15%, and sales were held for a week. The total number of products sold during the week of the sale was recorded. The researchers also recorded the wholesale price of the item put on sale. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Grocery.csv", "filename": "Grocery", "name": "Grocery Sales and Discounts", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Selling price and characteristics for a sample of 20 houses in a small town \nThis dataset contains selling prices for 20 houses that were sold in 2008 in a small midwestern town. The file also contains data on the size of each house (in square feet) and the size of the lot (in square feet) that the house is on. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Houses.csv", "filename": "Houses", "name": "House Prices, Sizes, and Lot Areas", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "House prices for a sample of houses in Canton NY \nData scraped from Zillow.com for a sample of houses near the 13617 area code (Canton, NY a small town in upstate NY). Houses on lots bigger than five acres (often farms) were excluded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/HousesNY.csv", "filename": "HousesNY", "name": "House Prices in Rural NY", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Consumer Price Index (CPI) each month for 2009 through 2016 \nMonthly Consumer Price Index for 2009 to 2016 as produced by the Bureau of Labor Statistics (Series Id. CUUR0000SA0). Based on prices for all items in U.S. city average for all consumers (not seasonally) Base period is 1982-1984-100. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Inflation.csv", "filename": "Inflation", "name": "Monthly Consumer Price Index (2009-2016)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains data on daily changes from two stock markets over 56 days from 06-Aug-09 to 02-Nov-09. The Dow Jones Industrial Average is based in New York and the Nikkei 225 is a stock index in Japan. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Markets.csv", "filename": "Markets", "name": "Daily Change in Dow Jones and Nikkei Stock Market Indices", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survey of college students to took at factors related to having overdrawn a checking account. \nResearchers conducted a survey of 450 undergraduates in large introductory courses at either Mississippi State University or the University of Mississippi. There were close to 150 questions on the survey, but only four of these variables are included in this dataset. (You can consult the paper to learn how the variables beyond these 4 affect the analysis.) The primary interest for the researchers was factors relating to whether or not a student has ever overdrawn a checking account. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Overdrawn.csv", "filename": "Overdrawn", "name": "Overdrawn Checking Account?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Compare prices for Porsche and Jaguar cars offered for sale at an internet site \nTwo students collected samples of Porsche and Jaguar cars that were offered for sale at an internet site. In addition to asking price, they recorded the model year (converting to age) and mileage of each advertised car. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PorscheJaguar.csv", "filename": "PorscheJaguar", "name": "Porsche and Jaguar Prices", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Prices for Porsche cars offered for sale at an internet site. \nA student was interested in prices for used Porsche sports cars being sold on the internet. He selected a random sample of 30 Porsches from the ones being advertised at autotrader.com. For each car he recorded the asking price, mileage, and model year (which he converted to age). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PorschePrice.csv", "filename": "PorschePrice", "name": "Porsche Prices", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Quarterly production of residual oil in the U.S. from 1983 to 2016 \nThe U.S. Energy Information Administration tracks the production and distribution of various types of petroleum products. The category for this dataset is called residual oil, which are heavier oils (often called No. 5. and No. 6) that remain after lighter oils (such as No. 4 home heating oil) are distilled away in the refining process. It is used in steam-powered ships, power plants, and other industrial applications. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ResidualOil.csv", "filename": "ResidualOil", "name": "US Residual Oil Production (Quarterly 1983-2016)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Contributions to a supplemental retirement account (1997-2012) \nA faculty member opened a supplemental retirement account (SRA) in 1997 to invest money for retirement. This dataset shows the annual contributions to that account. Annual contributions were adjusted downward during sabbatical years in order to maintain a steady family income. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Retirement.csv", "filename": "Retirement", "name": "Yearly Contributions to a Supplemental Retirement Account", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily closing prices of Apple, Google, and Microsoft stocks (12/1/2015 to 12/1/2017) \nClosing price of Apple (AAPL), Google/Alphabet (GOOG) and Microsoft (MSFT) stocks for each trading day in a two-year period from 12/1/2015 to 12/1/2017. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/TechStocks.csv", "filename": "TechStocks", "name": "Daily Prices of Three Tech Stocks ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Prices and number of pages for a sample of college textbooks \nTwo undergraduate students at Cal Poly - San Luis Obispo took a random sample of 30 textbooks from the campus bookstore in the fall of 2006. They recorded the price and number of pages in each book, in order to investigate the question of whether number of pages can be used to predict price. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/TextPrices.csv", "filename": "TextPrices", "name": "Textbook Prices", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Compare prices for Porsche, Jaguar, and BMW cars offered for sale at an internet site \nTwo students collected samples of Porsche, Jaguar, and BMW cars that were offered for sale at an internet site. In addition to asking price, they recorded the model year (converting to age) and mileage of each advertised car. The PorschePrice dataset (from the first edition) has only the Porsche data and the PorscheJaguar dataset has the data for those two models. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ThreeCars.csv", "filename": "ThreeCars", "name": "Prices of Three Used Car Models (2007)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from cars.com for a sample of three different models of used cars in 2017 \nData for a sample of cars from three models (Mazda6, Honda Accord, Toyota Maxima) from a website. The dataset AccordPrice is a subset of this file. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ThreeCars2017.csv", "filename": "ThreeCars2017", "name": "Price, Age, and Mileage of Three Used Car Models", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Price of US stamp for first class mail 1885-2012 \nThe data record the year and price for each change in price for a US first class (1 ounce, domestic letter) stamp since 1885. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/USstamps.csv", "filename": "USstamps", "name": "Price of US Stamps", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Intergenerational occupational mobility data with covariates. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/logan.csv", "filename": "logan", "name": "Data from the 1972-78 GSS data used by Logan", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Economists fit a parametric censored data model called the ‘tobit’. These data are from Tobin's original paper. ", + "description": "Economists fit a parametric censored data model called the \"tobit\". These data are from Tobin's original paper. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/tobin.csv", "filename": "tobin", "name": "Tobin's Tobit data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from the Danish Welfare Study. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/DanishWelfare.csv", "filename": "DanishWelfare", "name": "Danish Welfare Study Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a 1974 Danish study given by Andersen (1991) on the employees who had been laid off. The workers are classified by their employment status on 1975-01-01, the cause of their layoff and the length of employment before they were laid off. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Employment.csv", "filename": "Employment", "name": "Employment Status", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Petersen (1968) about the job satisfaction of 715 blue collar workers, selected from Danish Industry in 1968. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/JobSatisfaction.csv", "filename": "JobSatisfaction", "name": "Job Satisfaction Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Information on 665 households of Rochdale, Lancashire, UK. The study was conducted to identify influence factors on economical activity of wives. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Rochdale.csv", "filename": "Rochdale", "name": "Rochdale Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Selected macroeconomic indicators for Austria, Belgium, Canada, Denmark, Finland, France, Italy, Japan, the Netherlands, Norway, Sweden, the United Kingdom, the United States, and West Germany for the period 1966-1990. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/macro.csv", "filename": "macro", "name": "Macroeconomic Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Political Economic Risk Data from 62 Countries in 1987. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/PErisk.csv", "filename": "PErisk", "name": "Political Economic Risk Data from 62 Countries in 1987", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on bilateral sanctions behavior for selected years during the general period 1939-1983. This data contains errors that have since been corrected. Please contact Lisa Martin before using this data for publication. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/sanction.csv", "filename": "sanction", "name": "Multilateral Economic Sanctions", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data lists the quarterly unemployment figures (as a percentage) for the period 1993-1996 for five countries. The countries are Australia, Canada, New Zealand, the United Kingdom and the United States.", "url": "http://www.statsci.org/data/oz/unemfive.txt", "filename": "unemfive", "name": "Unemployment Data for Five Countries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "J. Crew is a clothing company known for its preppy\nfashions, including jeans, khakis, and other basic items\nsold to young professionals through its catalogs, websites,\nand some 260 retail and outlet stores in the United States.\n(Michelle Obama shops there.) The data reports their\nquarterly revenue from Q1 2003 through the first quarter of 2013", "url": "https://dasl.datadescription.com/download/data/3302", "filename": "JCrew", "name": "JCrew", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. The data include the monthly crude oil price in $/barrel from January 2001 to March 2007. ", "url": "https://dasl.datadescription.com/download/data/3378", "filename": "Oil-prices-monthly", "name": "Oil prices monthly", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A company that traditionally made rulers and\nyardsticks is setting up a manufacturing process to make\nmetersticks. Obviously, accuracy is important for their new\nproduct. The company runs a 24-hour production process.\nUsing a calibration set of data they found:\nX = 1.000\nR = 0.0016\nThey took a sample of 3 metersticks each hour and recorded\nthe results.", "url": "https://dasl.datadescription.com/download/data/3431", "filename": "Rulers", "name": "Rulers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Economics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data are simulated successive observations from a process in equilibrium. The process is assumed to have specification limits (5.49, 5.79). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/capability.csv", "filename": "capability", "name": "Simulated Manufacturing Process Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The cloth data frame has 32 rows and 2 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/cloth.csv", "filename": "cloth", "name": "Number of Flaws in Cloth ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data frame gives the dates of 191 explosions in coal mines which resulted in 10 or more fatalities. The time span of the data is from March 15, 1851 until March 22 1962. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/coal.csv", "filename": "coal", "name": "Dates of Coal Mining Disasters ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The “experimenters” were the actual subjects of the study. They collected ratings of the apparent success of people in pictures who were pre-selected for their average appearance of success. The experimenters were told prior to collecting data that particular subjects were either high or low in their tendency to rate appearance of success, and were instructed to get good data, scientific data, or were given no such instruction. Each experimenter collected ratings from 18 randomly assigned subjects. This version of the Adler data is taken from Erickson and Nosanchuk (1977). The data described in the original source, Adler (1973), have a more complex structure. ", + "description": "The \u201cexperimenters\u201d were the actual subjects of the study. They collected ratings of the apparent success of people in pictures who were pre-selected for their average appearance of success. The experimenters were told prior to collecting data that particular subjects were either high or low in their tendency to rate appearance of success, and were instructed to get good data, scientific data, or were given no such instruction. Each experimenter collected ratings from 18 randomly assigned subjects. This version of the Adler data is taken from Erickson and Nosanchuk (1977). The data described in the original source, Adler (1973), have a more complex structure. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Adler.csv", "filename": "Adler", "name": "Experimenter Expectations", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Counts of new PhDs in the mathematical sciences for 2008-09 and 2011-12 categorized by type of institution, gender, and US citizenship status. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/AMSsurvey.csv", "filename": "AMSsurvey", "name": " American Math Society Survey Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Baumann data frame has 66 rows and 6 columns. The data are from an experimental study conducted by Baumann and Jones, as reported by Moore and McCabe (1993) Students were randomly assigned to one of three experimental groups. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Baumann.csv", "filename": "Baumann", "name": "Methods of Teaching Reading Comprehension", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Mandel data frame has 8 rows and 3 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Mandel.csv", "filename": "Mandel", "name": "Contrived Collinear Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Quartet data frame has 11 rows and 5 columns. These are contrived data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Quartet.csv", "filename": "Quartet", "name": "Four Regression Datasets", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Information on the survival status, sex, age, and passenger class of 1309 passengers in the Titanic disaster of 1912. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/TitanicSurvival.csv", "filename": "TitanicSurvival", "name": "Survival of Passengers on the Titanic", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from the World Values Surveys 1995-1997 for Australia, Norway, Sweden, and the United States. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/WVS.csv", "filename": "WVS", "name": "World Values Surveys", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a small rounded subset of the C-horizon data chorizon from package mvoutlier. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/chorSub.csv", "filename": "chorSub", "name": "Subset of C-horizon of Kola Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Ruspini data set, consisting of 75 points in four groups that is popular for illustrating clustering techniques. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/ruspini.csv", "filename": "ruspini", "name": "Ruspini Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An artificial data set consisting of 3000 points in 3 quite well-separated clusters. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/xclara.csv", "filename": "xclara", "name": "Bivariate Data Set with 3 Clusters", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Fair (1978). Although Fair used a tobit model with the data, the outcome measure can be modeled as a count. In fact, Greene (2003) modeled it as Poisson, but given the amount of overdispersion in the data, employing a negative binomial model is an appropriate strategy. The data is stored in the affairs data set. Naffairs is the response variable, indicating the number of affairs reported by the participant in the past year. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/affairs.csv", "filename": "affairs", "name": "affairs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are taken from Loomis (2003). The study relates to a survey taken on reported frequency of visits to national parks during the year. The survey was taken at park sites, thus incurring possible effects of endogenous stratification. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/loomis.csv", "filename": "loomis", "name": "loomis", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The allbacks data frame gives measurements on the volume and weight of 15 books, some of which are softback (pb) and some of which are hardback (hb). Area of the hardback covers is also included. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/allbacks.csv", "filename": "allbacks", "name": "Measurements on a Selection of Books", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements of sugar content in frosted flakes breakfast cereal. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cerealsugar.csv", "filename": "cerealsugar", "name": "Percentage of Sugar in Breakfast Cereal", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A non-experimental \"control\" group, used in various studies of the effect of a labor training program, alternative to the experimental control group in nswdemo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cps1.csv", "filename": "cps1", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A non-experimental \"control\" group, used in various studies of the effect of a labor training program, alternative to the experimental control group in nswdemo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cps2.csv", "filename": "cps2", "name": "Labour Training Evaluation Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dewpoint data frame has 72 rows and 3 columns. Monthly data were obtained for a number of sites (in Australia) and a number of months. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/dewpoint.csv", "filename": "dewpoint", "name": "Dewpoint Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The elastic1 data frame has 7 rows and 2 columns giving, for each amount by which an elastic band is stretched over the end of a ruler, the distance that the band traveled when released. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/elastic1.csv", "filename": "elastic1", "name": "Elastic Band Data Replicated", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The elastic2 data frame has 9 rows and 2 columns giving, for each amount by which an elastic band is stretched over the end of a ruler, the distance that the band traveled when released. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/elastic2.csv", "filename": "elastic2", "name": "Elastic Band Data Replicated Again", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The elasticband data frame has 7 rows and 2 columns giving, for each amount by which an elastic band is stretched over the end of a ruler, the distance that the band traveled when released. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/elasticband.csv", "filename": "elasticband", "name": "Elastic Band Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The frosted flakes data frame has 101 rows and 2 columns giving the sugar concentration (in percent) for 25 g samples of a cereal as measured by 2 methods – high performance liquid chromatography (a slow accurate lab method) and a quick method using the infra-analyzer 400. ", + "description": "The frosted flakes data frame has 101 rows and 2 columns giving the sugar concentration (in percent) for 25 g samples of a cereal as measured by 2 methods - high performance liquid chromatography (a slow accurate lab method) and a quick method using the infra-analyzer 400. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/frostedflakes.csv", "filename": "frostedflakes", "name": "Frosted Flakes data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The modelcars data frame has 12 rows and 2 columns. The data are for an experiment in which a model car was released three times at each of four different distances up a 20 degree ramp. The experimenter recorded distances traveled from the bottom of the ramp across a concrete floor. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/modelcars.csv", "filename": "modelcars", "name": "Model Car Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "SASname and longname are from the SAS XPT file nass9702cor.XPT that is available from the webite noted below. The name shortname is the name used in the data frame nass9702cor, not included in this package, but available from my website that is noted below. It is also used in nassCDS, for columns that nassCDS includes. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nasshead.csv", "filename": "nasshead", "name": "Documentation of names of columns in nass9702cor", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data giving thickness (mm), height (cm), width (cm) and weight (g), of 12 books. Books were selected so that thickness decreased as page area increased ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/oddbooks.csv", "filename": "oddbooks", "name": "Measurements on 12 books", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Record of the number and type of O-ring failures prior to the tragic Challenger mission in January, 1986. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/orings.csv", "filename": "orings", "name": "Challenger O-rings Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The pair65 data frame has 9 rows and 2 columns. Eighteen elastic bands were divided into nine pairs, with bands of similar stretchiness placed in the same pair. One member of each pair was placed in hot water (60-65 degrees C) for four minutes, while the other was left at ambient temperature. After a wait of about ten minutes, the amounts of stretch, under a 1.35 kg weight, were recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/pair65.csv", "filename": "pair65", "name": "Heated Elastic Bands", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a subset of the allbacks data frame which gives measurements on the volume and weight of 8 paperback books. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/softbacks.csv", "filename": "softbacks", "name": "Measurements on a Selection of Paperback Books", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Closing numbers for S and P 500 Index, Jan. 1, 1990 through early 2000. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/SP500close.csv", "filename": "SP500close", "name": "Closing Numbers for S and P 500 Index", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Closing numbers for S and P 500 Index, Jan. 1, 1990 through early 2000. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/SP500W90.csv", "filename": "SP500W90", "name": "Closing Numbers for S and P 500 Index - First 100 Days of 1990", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Four x-y datasets which have the same traditional statistical properties (mean, variance, correlation, regression line, etc.), yet are quite different. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/anscombe.csv", "filename": "anscombe", - "name": "Anscombe's Quartet of ‘Identical’ Simple Linear Regressions", - "number_format": 31, - "remove_quotes": true, + "name": "Anscombe's Quartet of \"Identical\" Simple Linear Regressions", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The numbers of “great” inventions and scientific discoveries in each year from 1860 to 1959. ", + "description": "The numbers of \u201cgreat\u201d inventions and scientific discoveries in each year from 1860 to 1959. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/discoveries.csv", "filename": "discoveries", "name": "Yearly Numbers of Important Discoveries", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Freeny's data on quarterly revenue and explanatory variables. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/freeny.csv", "filename": "freeny", "name": "Freeny's Revenue Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A correlation matrix of eight physical measurements on 305 girls between ages seven and seventeen. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Harman23.cor.csv", "filename": "Harman23cor", "name": "Harman Example 2.3", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A classical N, P, K (nitrogen, phosphate, potassium) factorial experiment on the growth of peas conducted on 6 blocks. Each half of a fractional factorial design confounding the NPK interaction was used on 3 of the plots. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/npk.csv", "filename": "npk", "name": " Classical N, P, K Factorial Experiment ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "400 triples of successive random numbers were taken from the VAX FORTRAN function RANDU running under VMS 1.5. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/randu.csv", "filename": "randu", "name": "Random Numbers from Congruential Generator RANDU", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set gives the number of warp breaks per loom, where a loom corresponds to a fixed length of yarn. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/warpbreaks.csv", "filename": "warpbreaks", "name": "The Number of Breaks in Yarn during Weaving", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 2798 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Catsup.csv", "filename": "Catsup", "name": "Choice of Brand for Catsup ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "a cross-section from 2002–03 \nnumber of observations : 227 \nobservation : individuals \ncountry : United States ", + "description": "a cross-section from 2002-03 \nnumber of observations : 227 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/FriendFoe.csv", "filename": "FriendFoe", "name": "Data from the Television Game Show Friend Or Foe ? ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "four–weekly observations from 1951–03–18 to 1953–07–11 \nnumber of observations : 30 \nobservation : country \ncountry : United States ", + "description": "four-weekly observations from 1951-03-18 to 1953-07-11 \nnumber of observations : 30 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Icecream.csv", "filename": "Icecream", "name": "Ice Cream Consumption ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "monthly observations from 1946–12 to 1991–02 \nnumber of observations : 531 \nobservation : country \ncountry : United–States ", + "description": "monthly observations from 1946-12 to 1991-02 \nnumber of observations : 531 \nobservation : country \ncountry : United-States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Irates.csv", "filename": "Irates", "name": "Monthly Interest Rates ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 4956 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Ketchup.csv", "filename": "Ketchup", "name": "Choice of Brand for Ketchup ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 453 \nobservation : individuals ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Mode.csv", "filename": "Mode", "name": "Mode Choice ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "monthly observations from 1948-01 to 2001-06 \nnumber of observations : 642 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Orange.csv", "filename": "Orange_", "name": "The Orange Juice Data Set ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "daily observations from 1981–01 to 1991–04 \nnumber of observations : 2783 ", + "description": "daily observations from 1981-01 to 1991-04 \nnumber of observations : 2783 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/SP500.csv", "filename": "SP500", "name": "Returns on Standard \\& Poor's 500 Index ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 2412 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Yogurt.csv", "filename": "Yogurt", "name": "Choice of Brand for Yogurts ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The daily closing values of the S&P index from Monday 4th January 1960 until Friday 11th June 1993. The data are contained in a numeric vector. The dates of each observation are contained in a times attribute, which is an object of class \"POSIXct\" (see DateTimeClasses). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/evir/sp.raw.csv", "filename": "sp_raw", "name": "SP Data to June 1993", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The daily log returns on the S&P index value from Tuesday 5th January 1960 until Friday 16 October 1987. The data are contained in a numeric vector. The dates of each observation are contained in a times attribute, which is an object of class \"POSIXct\" (see DateTimeClasses). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/evir/spto87.csv", "filename": "spto87", "name": "SP Return Data to October 1987", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly production of woollen yarn in Australia: tonnes. Mar 1965 – Sep 1994. ", + "description": "Quarterly production of woollen yarn in Australia: tonnes. Mar 1965 - Sep 1994. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/forecast/woolyrnq.csv", "filename": "woolyrnq", "name": "Quarterly production of woollen yarn in Australia", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains a case-control indicator and twelve microsatellite markers. An extra unphased individual with the following genotype \n 2 7 7 7 1 3 2 2 2 2 6 3\n 3 8 10 8 3 9 3 4 2 2 7 5\nhas not been included. \nThe inter-marker distances (Morgan) are as follows, \n0.03, 0.065, 0.00125, 0.00125, 0.00125, 0.00125, 0.00125, 0.00125, 0.00125, 0.00125, 0.045 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/fa.csv", "filename": "fa", "name": "Friedreich Ataxia data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data contains data on 51 individuals in a pedigree. Below it is used for comparing results from various packages. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/l51.csv", "filename": "l51", "name": "An example pedigree data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is the companion data for ACEnucfam. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gap/mfblong.csv", "filename": "mfblong", "name": "Example data for ACEnucfam", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The koch data frame has 288 rows and 4 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/geepack/koch.csv", "filename": "koch", "name": "Ordinal Data from Koch", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "All built-in colors() translated into Luv colour space. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/luv_colours.csv", "filename": "luv_colours", "name": "colors() in Luv space", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Edgeworth (1885) took the first 75 lines in Book XI of Virgil's Aeneid and classified each of the first four \"feet\" of the line as a dactyl (one long syllable followed by two short ones) or not. \nGrouping the lines in blocks of five gave a 4 x 25 table of counts, represented here as a data frame with ordered factors, Foot and Lines. Edgeworth used this table in what was among the first examples of analysis of variance applied to a two-way classification. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Dactyl.csv", "filename": "Dactyl", "name": " Edgeworth's counts of dactyls in Virgil's Aeneid ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Andre-Michel Guerry (1833) was the first to systematically collect and analyze social data on such things as crime, literacy and suicide with the view to determining social laws and the relations among these variables. \nThe Guerry data frame comprises a collection of 'moral variables' on the 86 departments of France around 1830. A few additional variables have been added from other sources", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Guerry.csv", "filename": "Guerry", "name": " Data from A.-M. Guerry, \"Essay on the Moral Statistics of France\" ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In 1693 the famous English astronomer Edmond Halley studied the birth and death records of the city of Breslau, which had been transmitted to the Royal Society by Caspar Neumann. He produced a life table showing the number of people surviving to any age from a cohort born the same year. He also used his table to compute the price of life annuities. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/HalleyLifeTable.csv", "filename": "HalleyLifeTable", "name": " Halley's Life Table ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Charles Joseph Minard's graphic depiction of the fate of Napoleon's Grand Army in the Russian campaign of 1815 has been called the \"greatest statistical graphic ever drawn\" (Tufte, 1983). Friendly (2002) describes some background for this graphic, and presented it as Minard's Chalenge: to reproduce it using modern statistical or graphic software, in a way that showed the elegance of some computer language to both describe and produce this graphic. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Minard.cities.csv", "filename": "Minard", "name": " Data from Minard's famous graphic map of Napoleon's march on Moscow ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Charles Joseph Minard's graphic depiction of the fate of Napoleon's Grand Army in the Russian campaign of 1815 has been called the \"greatest statistical graphic ever drawn\" (Tufte, 1983). Friendly (2002) describes some background for this graphic, and presented it as Minard's Chalenge: to reproduce it using modern statistical or graphic software, in a way that showed the elegance of some computer language to both describe and produce this graphic. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Minard.temp.csv", "filename": "Minard_temp", "name": " Data from Minard's famous graphic map of Napoleon's march on Moscow ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Charles Joseph Minard's graphic depiction of the fate of Napoleon's Grand Army in the Russian campaign of 1815 has been called the \"greatest statistical graphic ever drawn\" (Tufte, 1983). Friendly (2002) describes some background for this graphic, and presented it as Minard's Chalenge: to reproduce it using modern statistical or graphic software, in a way that showed the elegance of some computer language to both describe and produce this graphic. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Minard.troops.csv", "filename": "Minard_troops", "name": " Data from Minard's famous graphic map of Napoleon's march on Moscow ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data set is concerned with the problem of aligning the coordinates of points read from old maps (1688 - 1818) of the Great Lakes area. 39 easily identifiable points were selected in the Great Lakes area, and their (lat, long) coordinates were recorded using a grid overlaid on each of 11 old maps, and using linear interpolation. \nIt was conjectured that maps might be systematically in error in five key ways: (a) constant error in latitude; (b)constant error in longitude; (c) proportional error in latitude; (d)proportional error in longitude; (e) angular error from a non-zero difference between true North and the map's North. \nOne challenge from these data is to produce useful analyses and graphical displays that relate to these characteristics or to other aspects of the data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/OldMaps.csv", "filename": "OldMaps", "name": " Latitudes and Longitudes of 39 Points in 11 Old Maps ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Meta-analysis of studies comparing two different toothpastes. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/toothpaste.csv", "filename": "toothpaste", "name": " Toothpaste Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the breakage angle of chocolate cakes made with three different recipes and baked at six different temperatures. This is a split-plot design with the recipes being whole-units and the different temperatures being applied to sub-units (within replicates). The experimental notes suggest that the replicate numbering represents temporal ordering. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/cake.csv", "filename": "cake", "name": "Breakage Angle of Chocolate Cakes", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These are the item responses to a questionaire on verbal aggression. These data are used throughout De Boeck and Wilson, Explanatory Item Response Models (Springer, 2004) to illustrate various forms of item response models. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/VerbAgg.csv", "filename": "VerbAgg", "name": "Verbal Aggression item responses", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Dyestuff data frame provides the yield of dyestuff (Naphthalene Black 12B) from 5 different preparations from each of 6 different batchs of an intermediate product (H-acid). The Dyestuff2 data were generated data in the same structure but with a large residual variance relative to the batch variance. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/Dyestuff2.csv", "filename": "Dyestuff2", "name": "Yield of dyestuff by batch", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A list of two vectors, giving the wear of shoes of materials A and B for one foot each of ten boys. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/shoes.csv", "filename": "shoes", "name": " Shoe wear data of Box, Hunter and Hunter ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The UScereal data frame has 65 rows and 11 columns. The data come from the 1993 ASA Statistical Graphics Exposition, and are taken from the mandatory F&DA food label. The data have been normalized here to a portion of one American cup. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/UScereal.csv", "filename": "UScereal", "name": " Nutritional and Marketing Information on US Cereals ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A random subsample of the simulated data used in Imai, Tingley, Yamamoto (2012). The data contains 1000 rows and 7 columns with no missing values. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mediation/boundsdata.csv", "filename": "boundsdata", "name": "Example Data for the Design Functions", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A randomly generated dataset containing 2000 rows and 7 columns with no missing values. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mediation/CEDdata.csv", "filename": "CEDdata", "name": "Example Data for the Crossover Encouragement Design", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A character vector with two or three character representations of each card in a standard 52-card deck. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Cards.csv", "filename": "Cards", "name": "Standard Deck of Cards", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Weights of a sample of dimes. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Dimes.csv", "filename": "Dimes", "name": "Weight of dimes", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "http://vincentarelbundock.github.io/Rdatasets/doc/mosaicData/KidsFeet.html", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/KidsFeet.csv", "filename": "KidsFeet", "name": "Foot measurements in children", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Bechtoldt.csv", "filename": "Bechtoldt", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Bechtoldt.1.csv", "filename": "Bechtoldt_1", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Bechtoldt.2.csv", "filename": "Bechtoldt_2", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Normally, min.res factor analysis and maximum likelihood produce very similar results. This data set (from Alexandra Blant) does not. Warnings are given for the min.res solution, the pa solution, but not the old.min nor the mle solution. Included as a test case for the factor analysis function. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/blant.csv", "filename": "blant", "name": "A 29 x 29 matrix that produces weird factor analytic results", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "35 items for 150 subjects from Bond's Logical Operations Test. A good example of Item Response Theory analysis using the Rasch model. One parameter (Rasch) analysis and two parameter IRT analyses produce somewhat different results. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/blot.csv", "filename": "blot", - "name": "Bond's Logical Operations Test – BLOT ", - "number_format": 31, - "remove_quotes": true, + "name": "Bond's Logical Operations Test - BLOT ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Gleser, Cronbach and Rajaratnam (1965) discuss the estimation of variance components and their ratios as part of their introduction to generalizability theory. This is a adaptation of their \"illustrative data for a completely matched G study\" (Table 3). 12 patients are rated on 6 symptoms by two judges. Components of variance are derived from the ANOVA. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Gleser.csv", "filename": "Gleser", "name": " Example data from Gleser, Cronbach and Rajaratnam (1965) to show basic principles of generalizability theory. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Gorsuch (1997) suggests an alternative to the classic Dwyer (1937) factor extension technique. This data set is taken from that article. Useful for comparing link{fa.extension} with and without the correct=TRUE option. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Gorsuch.csv", "filename": "Gorsuch", "name": "Example data set from Gorsuch (1997) for an example factor extension. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. \nHolzinger and Swineford (1937) introduced the bifactor model (one general factor and several group factors) for mental abilities. This is a nice demonstration data set of a hierarchical factor structure that can be analyzed using the omega function or using sem. The bifactor model is typically used in measures of cognitive ability. \nThere are several ways to analyze such data. One is to use the omega function to do a hierarchical factoring using the Schmid-Leiman transformation. This can then be done as an exploratory and then as a confirmatory model using omegaSem. Another way is to do a regular factor analysis and use either a bifactor or biquartimin rotation. These latter two functions implement the Jennrich and Bentler (2011) bifactor and biquartimin transformations. The bifactor rotation suffers from the problem of local minima (Mansolf and Reise, 2016) and thus a mixture of exploratory and confirmatory analysis might be preferred. \nThe 14 variables are ordered to reflect 3 spatial tests, 3 mental speed tests, 4 motor speed tests, and 4 verbal tests. The sample size is 355. \nAnother data set from Holzinger (Holzinger.9) represents 9 cognitive abilities (Holzinger, 1939) and is used as an example by Karl Joreskog (2003) for factor analysis by the MINRES algorithm and also appears in the LISREL manual as example NPV.KM. \nAnother classic data set is the 9 variable Thurstone problem which is discussed in detail by R. P. McDonald (1985, 1999) and and is used as example in the sem package as well as in the PROC CALIS manual for SAS. These nine tests were grouped by Thurstone and Thurstone, 1941 (based on other data) into three factors: Verbal Comprehension, Word Fluency, and Reasoning. The original data came from Thurstone and Thurstone (1941) but were reanalyzed by Bechthold (1961) who broke the data set into two. McDonald, in turn, selected these nine variables from the larger set of 17 found in Bechtoldt.2. The sample size is 213. \nAnother set of 9 cognitive variables attributed to Thurstone (1933) is the data set of 4,175 students reported by Professor Brigham of Princeton to the College Entrance Examination Board. This set does not show a clear bifactor solution but is included as a demonstration of the differences between a maximimum likelihood factor analysis solution versus a principal axis factor solution. \nTucker (1958) uses 9 variables from Thurstone and Thburstone (1941) for his example of interbattery factor analysis. \nMore recent applications of the bifactor model are to the measurement of psychological status. The Reise data set is a correlation matrix based upon >35,000 observations to the Consumer Assessment of Health Care Provideers and Systems survey instrument. Reise, Morizot, and Hays (2007) describe a bifactor solution based upon 1,000 cases. \nThe five factors from Reise et al. reflect Getting care quickly (1-3), Doctor communicates well (4-7), Courteous and helpful staff (8,9), Getting needed care (10-13), and Health plan customer service (14-16). \nThe two Bechtoldt data sets are two samples from Thurstone and Thurstone (1941). They include 17 variables, 9 of which were used by McDonald to form the Thurstone data set. The sample sizes are 212 and 213 respectively. The six proposed factors reflect memory, verbal, words, space, number and reasoning with three markers for all expect the rote memory factor. 9 variables from this set appear in the Thurstone data set. \nTwo more data sets with similar structures are found in the Harman data set. This includes the another 9 variables (with 696 subjects) from Holzinger used by Harman link{Harman.Holzinger} as well as 8 affective variables from link{burt}. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Holzinger.csv", "filename": "Holzinger", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Reise.csv", "filename": "Reise", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "John Schmid and John M. Leiman (1957) discuss how to transform a hierarchical factor structure to a bifactor structure. Schmid contains the example 12 x 12 correlation matrix. schmid.leiman is a 12 x 12 correlation matrix with communalities on the diagonal. This can be used to show the effect of correcting for attenuation. Two additional data sets are taken from Chen et al. (2006). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Schmid.csv", "filename": "Schmid", "name": "12 variables created by Schmid and Leiman to show the Schmid-Leiman Transformation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Shapiro and ten Berge use the Schutz correlation matrix as an example for Minimum Rank Factor Analysis. The Schutz data set is also a nice example of how normal minres or maximum likelihood will lead to a Heywood case, but minrank factoring will not. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Schutz.csv", "filename": "Schutz", "name": " The Schutz correlation matrix example from Shapiro and ten Berge", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Thurstone.csv", "filename": "Thurstone", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Thurstone.33.csv", "filename": "Thurstone_33", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/Thurstone.9.csv", "filename": "Thurstone_9", "name": "Seven data sets showing a bifactor solution.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A demonstration that a correlation may be decomposed to a within group correlation and a between group correlations and these two correlations are independent. Between group correlations are sometimes called ecological correlations, the decomposition into within and between group correlations is a basic concept in multilevel modeling. This data set shows the composite correlations between 9 variables, representing 16 cases with four groups. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/withinBetween.csv", "filename": "withinBetween", "name": "An example of the distinction between within group and between group correlations", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data was collected from a sensory experiment conducted at Iowa State University in 2004. The investigators were interested in the effect of using three different fryer oils had on the taste of the fries. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/reshape2/french_fries.csv", "filename": "french_fries", "name": "Sensory data from a french fries experiment.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A small demo dataset describing John and Mary Smith. Used in the introductory vignette. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/reshape2/smiths.csv", "filename": "smiths", "name": "Demo data describing the Smiths.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This is an artificial data set, cleverly construced and used by Antille and May to demonstrate ‘problems’ with LMS and LTS. ", + "description": "This is an artificial data set, cleverly construced and used by Antille and May to demonstrate \"problems\" with LMS and LTS. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/exAM.csv", "filename": "exAM", "name": "Example Data of Antille and May - for Simple Regression", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Artificial Data Set generated by Hawkins, Bradu, and Kass (1984). The data set consists of 75 observations in four dimensions (one response and three explanatory variables). It provides a good example of the masking effect. The first 14 observations are outliers, created in two groups: 1–10 and 11–14. Only observations 12, 13 and 14 appear as outliers when using classical methods, but can be easily unmasked using robust distances computed by, e.g., MCD - covMcd(). ", + "description": "Artificial Data Set generated by Hawkins, Bradu, and Kass (1984). The data set consists of 75 observations in four dimensions (one response and three explanatory variables). It provides a good example of the masking effect. The first 14 observations are outliers, created in two groups: 1-10 and 11-14. Only observations 12, 13 and 14 appear as outliers when using classical methods, but can be easily unmasked using robust distances computed by, e.g., MCD - covMcd(). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/hbk.csv", "filename": "hbk", "name": "Hawkins, Bradu, Kass's Artificial Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A small counterexample data set devised by Andrew Siegel. Six (out of nine) data points lie on the line y = 0 such that some robust regression estimators exhibit the “exact fit” property. ", + "description": "A small counterexample data set devised by Andrew Siegel. Six (out of nine) data points lie on the line y = 0 such that some robust regression estimators exhibit the \u201cexact fit\u201d property. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/SiegelsEx.csv", "filename": "SiegelsEx", "name": "Siegel's Exact Fit Example Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The monthly use of steam (Steam) in a factory may be modeled and described as function of the operating days per month (Operating.Days) and mean outside temperature per month (Temperature). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/steamUse.csv", "filename": "steamUse", "name": "Steam Usage Data (Excerpt)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Artificial balanced panel data set from Petersen (2009) for illustrating and benchmarking clustered standard errors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sandwich/PetersenCL.csv", "filename": "PetersenCL", "name": "Petersen's Simulated Data for Assessing Clustered Standard Errors", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Nutrition content for a sample of 36 different brands of breakfast cereals \nData give nutrition contents (per serving) for 36 breakfast cereals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Cereal.csv", "filename": "Cereal", "name": "Nutrition Content of Breakfast Cereals", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Grip strength, attractiveness, and shoulder-hip ratio for men \nFacial attractiveness of several men was rated by female college students. Maximum grip strength was also measured, along with shoulder to hip ratio, age of first sex, and number of sex partners. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Faces.csv", "filename": "Faces", "name": "Facial Attractiveness of Men", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Film data from Maltin's Movie and Video Guide \nOne statistician movie fan decided to use statistics to study the movie ratings in his favorite movie guide, Movie and Video Guide (1996), by Leonard Maltin. Maltin rates movies on a one-star to four-star system, in increments of half-stars, with higher numbers being better. The guide also includes additional information on each film. The statistician used a random number generator to select a simple random sample of 100 movies rated by the Guide. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Film.csv", "filename": "Film", "name": "Film Data from Leonard Maltin's Guide", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly US residential consumption of fuel oil (1983-2016) \nU.S. residential consumption of distillate fuel oil each month from January 1983 through December 2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/HeatingOil.csv", "filename": "HeatingOil", "name": "Heating Oil Consumption", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Which \"lost\" letters will be returned by the public? \nIn 1999 Grinnell College students Laurelin Muir and Adam Gratch conducted an experiment for an introductory statistics class. They intentionally \"lost\" 140 letters in either the city of Des Moines, the town of Grinnell, or on the Grinnell College campus. Half of each sample were addressed to Friends of the Confederacy and the other half to Iowa Peaceworks. The students kept track of which letters were eventually returned. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LostLetter.csv", "filename": "LostLetter", "name": "Return Rates for \"Lost\" Letters", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Unpopped kernels in bags of microwave popcorn \nTwo students, Lara and Lisa, conducted an experiment to compare Orville Redenbacher's Light Butter Flavor vs. Seaway microwave popcorn. They made 12 batches of popcorn, 6 of each type, cooking each batch for four minutes. They noted that the microwave oven seemed to get warmer as they went along so they kept track of six trials and randomly chose which brand would go first for each trial. For a response variable they counted the number of unpopped kernels and then adjusted the count for Seaway for having more ounces per bag of popcorn (3.5 vs 3.0). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Popcorn.csv", "filename": "Popcorn", "name": "Popcorn Popping Success", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements for a hypothetical set of nine rectangles. \nAreas for rectangles of width 1, 4, or 10 and length of 1, 4, or 10. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Rectangles.csv", "filename": "Rectangles", "name": "Measurements of Rectangles", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results from the online game Words with Friends (solo play) ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/WordsWithFriends.csv", "filename": "WordsWithFriends", "name": "Words with Friends Scores", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results from an experiment to move wet/dry objects with wrinkled/dry fingers \nEach of 20 participants were measured doing a \"transfer task\" several times under each of four conditions. The transfer task was to pick up an item with the right hand thumb and index finger, pass the item through a small hole and grab it with the left hand, and then put the item into a box that had a hole in the lid. Sometimes the participant's fingers were wrinkled; sometimes the items were sitting in water. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Wrinkle.csv", "filename": "Wrinkle", "name": "Moving Wet Objects with Wrinkled Fingers", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data from Mosteller & Wallace (1984) investigating the use of certain keywords (‘may’ in this data set) to identify the author of 12 disputed ‘Federalist Papers’ by Alexander Hamilton, John Jay and James Madison. ", + "description": "Data from Mosteller & Wallace (1984) investigating the use of certain keywords (\"may\" in this data set) to identify the author of 12 disputed \"Federalist Papers\" by Alexander Hamilton, John Jay and James Madison. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Federalist.csv", "filename": "Federalist", - "name": "‘May’ in Federalist Papers", - "number_format": 31, - "remove_quotes": true, + "name": "\"May\" in Federalist Papers", "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data about non-response for a Danish survey in 1965. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/NonResponse.csv", "filename": "NonResponse", "name": "Non-Response Survey Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Thornes \\& Collard (1979), reported in Gilbert (1981), on pre- and extra-marital sex and divorce. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/PreSex.csv", "filename": "PreSex", "name": "Pre-marital Sex and Divorce", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Pearson (1900) about the frequency of 5s and 6s in throws of 12 dice. Weldon tossed the dice 26,306 times and reported his results in a letter to Francis Galton on 1894-02-02. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/WeldonDice.csv", "filename": "WeldonDice", "name": "Weldon's Dice Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Jinkinson \\& Slater (1981) and Hoaglin \\& Tukey (1985) reporting the frequency distribution of females in 100 queues of length 10 in a London Underground station. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv", "filename": "WomenQueue", "name": "Women in Queues", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sample data for the bivariate probit regression. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/bivariate.csv", "filename": "bivariate", "name": "Sample data for bivariate probit regression", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dataframe contains 20 annual observations from 1935 to 1954 of 7 variables for two firms General Electric and Westinghouse. Columns are Year; Ige and Iw = Gross investment for GE and W,respectively; Fge and Fw=Market value of Firm as of begin of the year; Cge and Cw= Capital stock measure as of begin of the year. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/grunfeld.csv", "filename": "grunfeld", "name": "Simulation Data for model Seemingly Unrelated Regression (sur) that corresponds to method SUR of systemfit", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dataframe contains annual observations of US economy from 1920 to 1940. The columns are, Year, C=Consumption, P=Corporate profits, P1=Previous year corporate profit,Wtot=Total wage, Wp=Private wage bill, Wg=Government wage bill,I=Investment, K1=Previous year capital stock,X=GNP,G=Government spending, T=Taxes, X1=Previous year GNP, Tm=Year-1931. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/klein.csv", "filename": "klein", "name": "Simulation Data for model Two-Stage Least Square (twosls) that corresponds to method 2SLS of systemfit", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Dataframe contains 20 annual observations of a supply/demand model with 5 variables. Columns are q=Food consumption per capita, p=Ratio of food price to general consumer prices, d=Disposable income in contstant dollars, f=Ratio of preceding year's prices received by farmers to general consumer prices, a=Time index. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/kmenta.csv", "filename": "kmenta", "name": "Simulation Data for model Three-Stage Least Square (threesls) that corresponds to method 3SLS of systemfit", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Table of links for help.zelig for the companion MatchIt package. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/MatchIt.url.csv", "filename": "MatchIt_url", "name": "Table of links for Zelig", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The original painters data contain the subjective assessment, on a 0 to 20 integer scale, of 54 classical painters. The newpainters data discretizes the subjective assessment by quartiles with thresholds 25%, 50%, 75%. The painters were assessed on four characteristics: composition, drawing, colour and expression. The data is due to the Eighteenth century art critic, de Piles. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/newpainters.csv", "filename": "newpainters", "name": "The Discretized Painter's Data of de Piles", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Table of links for help.zelig for the core Zelig package. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/Zelig.url.csv", "filename": "Zelig_url", "name": "Table of links for Zelig", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on police treatment of individuals arrested in Toronto for simple possession of small quantities of marijuana. The data are part of a larger data set featured in a series of articles in the Toronto Star newspaper. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Arrests.csv", "filename": "Arrests", "name": "Arrests for Marijuana Possession", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are drawn from the 1997-2001 British Election Panel Study (BEPS). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/BEPS.csv", "filename": "BEPS", "name": "British Election Panel Study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Bfox data frame has 30 rows and 7 columns. Time-series data on Canadian women's labor-force participation, 1946–1975. ", + "description": "The Bfox data frame has 30 rows and 7 columns. Time-series data on Canadian women's labor-force participation, 1946-1975. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Bfox.csv", "filename": "Bfox", "name": "Canadian Women's Labour-Force Participation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are drawn from the 2011 Canadian National Election Study, including a question on banning abortion and variables relelated to the sampling design.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/CES11.csv", "filename": "CES11", "name": " 2011 Canadian National Election Study, With Attitude Toward Abortion", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Chile data frame has 2700 rows and 8 columns. The data are from a national survey conducted in April and May of 1988 by FLACSO/Chile. There are some missing data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Chile.csv", "filename": "Chile", "name": "Voting Intentions in the 1988 Chilean Plebiscite", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Florida data frame has 67 rows and 11 columns. Vote by county in Florida for President in the 2000 election. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Florida.csv", "filename": "Florida", "name": "Florida County Voting", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Freedman data frame has 110 rows and 4 columns. The observations are U. S. metropolitan areas with 1968 populations of 250,000 or more. There are some missing data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Freedman.csv", "filename": "Freedman", "name": "Crowding and Crime in U. S. Metropolitan Areas", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Greene data frame has 384 rows and 7 columns. These are cases filed in 1990, in which refugee claimants rejected by the Canadian Immigration and Refugee Board asked the Federal Court of Appeal for leave to appeal the negative ruling of the Board. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Greene.csv", "filename": "Greene", "name": "Refugee Appeals", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Hartnagel data frame has 38 rows and 7 columns. The data are an annual time-series from 1931 to 1968. There are some missing data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Hartnagel.csv", "filename": "Hartnagel", "name": "Canadian Crime-Rates Time Series", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results of nearly all stops made by the Minneapolis Police Department for the year 2017. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/MplsStops.csv", "filename": "MplsStops", "name": " Minneapolis Police Department 2017 Stop Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set is originally from Rossi et al. (1980), and is used as an example in Allison (1995). The data pertain to 432 convicts who were released from Maryland state prisons in the 1970s and who were followed up for one year after release. Half the released convicts were assigned at random to an experimental treatment in which they were given financial aid; half did not receive aid. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Rossi.csv", "filename": "Rossi", "name": "Rossi et al.'s Criminal Recidivism Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame with the percents of votes given to the republican candidate in presidential elections from 1856 to 1976. Rows represent the 50 states, and columns the 31 elections. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/votes.repub.csv", "filename": "votes_repub", "name": "Votes for Republican Candidate in Presidential Elections", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data of 3000 male criminals over 20 years old undergoing their sentences in the chief prisons of England and Wales. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/crimtab.csv", "filename": "crimtab", "name": "Student's 3000 Criminals Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The (approximately) quarterly approval rating for the President of the United States from the first quarter of 1945 to the last quarter of 1974. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/presidents.csv", "filename": "presidents", "name": "Quarterly Approval Ratings of US Presidents", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains statistics, in arrests per 100,000 residents for assault, murder, and rape in each of the 50 US states in 1973. Also given is the percent of the population living in urban areas. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/USArrests.csv", "filename": "USArrests", "name": "Violent Crime Rates by US State", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Lawyers' ratings of state judges in the US Superior Court. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/USJudgeRatings.csv", "filename": "USJudgeRatings", "name": "Lawyers' Ratings of State Judges in the US Superior Court", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 90 observations from 1981 to 1987 \nnumber of observations : 630 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Crime.csv", "filename": "Crime", "name": "Crime in North Carolina ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 601 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Fair.csv", "filename": "Fair", "name": "Extramarital Affairs Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Global Terrorism Database (GTD) \"is a database of incidents of terrorism from 1970 onward\". Through 2015, this database contains information on 141,966 incidents. \nterrorism provides a few summary statistics along with an ordered factor methodology, which Pape et al. insisted is necessary, because an increase of over 70 percent in suicide terrorism between 2007 and 2013 is best explained by a methodology change in GTD that occurred on 2011-11-01; Pape's own Suicide Attack Database showed a 19 percent decrease over the same period. \n\nAs noted with the \"description\" above, Pape et al. noted that the GTD reported an increase in suicide terrorism of over 70 percent between 2007 and 2013, while their Suicide Attack Database showed a 19 percent decrease over the same period. Pape et al. insisted that the most likely explanation for this difference is the change in the organization responsible for managing that data collection from ISVG to START. \nIf the issue is restricted to how incidents are classified as \"suicide terrorism\", this concern does not affect the other variables in this summary. \nHowever, if it also impacts what incidents are classified as \"terrorism\", it suggests larger problems. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/incidents.byCountryYr.csv", "filename": "terrorism", "name": " Global Terrorism Database yearly summaries ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "annual observations from 1947 to 1962 \nnumber of observations : 16 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Longley.csv", "filename": "Longley", "name": "The Longley Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Global Terrorism Database (GTD) \"is a database of incidents of terrorism from 1970 onward\". Through 2015, this database contains information on 141,966 incidents. \nterrorism provides a few summary statistics along with an ordered factor methodology, which Pape et al. insisted is necessary, because an increase of over 70 percent in suicide terrorism between 2007 and 2013 is best explained by a methodology change in GTD that occurred on 2011-11-01; Pape's own Suicide Attack Database showed a 19 percent decrease over the same period. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/nkill.byCountryYr.csv", "filename": "terrorism2", "name": " Global Terrorism Database yearly summaries ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data.frame describing names containing character codes rare or non-existent in standard English text, e.g., with various accent marks that may not be coded consistenty in different locales or by different software. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/nonEnglishNames.csv", "filename": "nonEnglishNames", "name": " Names with Character Set Problems ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1968 to 1976 \nnumber of observations : 62 \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Strike.csv", "filename": "Strike", "name": "Strike Duration Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1968 to 1976 \nnumber of observations : 566 \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/StrikeDur.csv", "filename": "StrikeDur", "name": "Strikes Duration ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "monthly observations from 1968(1) to 1976 (12) \nnumber of observations : 108 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/StrikeNb.csv", "filename": "StrikeNb", "name": "Number of Strikes in Us Manufacturing ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Global Terrorism Database (GTD) \"is a database of incidents of terrorism from 1970 onward\". Through 2015, this database contains information on 141,966 incidents. \nterrorism provides a few summary statistics along with an ordered factor methodology, which Pape et al. insisted is necessary, because an increase of over 70 percent in suicide terrorism between 2007 and 2013 is best explained by a methodology change in GTD that occurred on 2011-11-01; Pape's own Suicide Attack Database showed a 19 percent decrease over the same period. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/terrorism.csv", "filename": "terrorism3", "name": "Global Terrorism Database yearly summaries ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Journal of Business Economics and Statistics web site : http://amstat.tandfonline.com/loi/ubes20 \nnumber of observations : 3343 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/UnempDur.csv", "filename": "UnempDur", "name": "Unemployment Duration ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1993 \nnumber of observations : 452 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Unemployment.csv", "filename": "Unemployment", "name": "Unemployment Duration ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on classification activity of the United States government. \nFitzpatrick (2013) notes that the dramatic jump in derivative classification activity (DerivClassActivity) that occurred in 2009 coincided with \"New guidance issued to include electronic environment\". Apart from the jump in 2009, the DerivClassActivity tended to increase by roughly 12 percent per year (with a standard deviation of the increase in the natural logarithm of DerivClassActivity of 0.18). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/USclassifiedDocuments.csv", "filename": "USclassifiedDocuments", "name": " Official Secrecy of the United States Government ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "It is commonly claimed that Franklin Roosevelt (FDR) did not end the Great Depression: World War II (WW2) did. This is supported by the 10.6 percent growth per year in Gross Domestic Product (GDP) per capita seen in the standard GDP estimates from 1940 to 1945. It is also supported by the rapid decline in unemployment during the war. \nHowever, no comparable growth spurts in GDP per capita catch the eye in a plot of log(GDP per capita) from 1790 to 2015, whether associated with a war or not, using the Measuring Worth data. The only other features of that plot that seem visually comparable are the economic disaster of Herbert Hoover's presidency (when GDP per capital fell by 10 percent per year, 1929-1932), the impressive growth of the US economy during the first seven years of Franklin Roosevelt's presidency (6.4 percent per year, 1933-1940), and the post-World War II recession (when GDP per capita fell by 7.9 percent per year, 1945-1947). \nCloser inspection of this plot suggests that the US economy has generally grown faster after FDR than before. This might plausibly be attributed to \"The Keynesian Ascendancy 1939-1979\". \nUnemployment dropped during the First World War as it did during WW2. Comparable data are not available for the U.S. during other major wars, most notably the American Civil War and the Mexican-American War. \nThis data set provides a platform for testing the effects of presidency, war, and Keynes. It does this by combining the numbers for US population and real GDP per capital dollars from Measuring Worth with the presidency and a list of major wars and an estimate of the battle deaths by year per million population. US unemployment is also considered. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/USGDPpresidents.csv", "filename": "USGDPpresidents", "name": " US GDP per capita with presidents and wars ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The object returned by readUSstateAbbreviations() on May 20, 2013. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/USstateAbbreviations.csv", "filename": "USstateAbbreviations", "name": " Standard abbreviations for states of the United States ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Thousands of words in US tax law for 1995 to 2015 in 10 year intervals. This includes income taxes and all taxes in the code itself (written by congress) and regulations (written by government administrators). For 2015 only \"EntireTaxCodeAndRegs\" is given; for other years, this number is broken down by income tax vs. other taxes and code vs. regulations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/UStaxWords.csv", "filename": "UStaxWords", "name": " Number of Words in US Tax Law", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "prison", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/prison.csv", "filename": "prison", "name": "prison", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The names of each president, the start and end date of their term, and their party of 11 US presidents from Eisenhower to Obama. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/presidential.csv", "filename": "presidential", "name": "Terms of 11 presidents from Eisenhower to Obama", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Waite (1915) was interested in analyzing the association of patterns in fingerprints, and produced a table of counts for 2000 right hands, classified by the number of fingers describable as a \"whorl\", a \"small loop\" (or neither). Because each hand contributes five fingers, the number of Whorls + Loops cannot exceed 5, so the contingency table is necessarily triangular. \nKarl Pearson (1904) introduced the test for independence in contingency tables, and by 1913 had developed methods for \"restricted contingency tables,\" such as the triangular table analyzed by Waite. The general formulation of such tests for association in restricted tables is now referred to as models for quasi-independence. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Fingerprints.csv", "filename": "Fingerprints", "name": " Waite's data on Patterns in Fingerprints ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Statistics Of Deadly Quarrels by Lewis Fry Richardson (1960) is one of the earlier attempts at quantification of historical conflict behavior. \nThe data set contains 779 dyadic deadly quarrels that cover a time period from 1809 to 1949. A quarrel consists of one pair of belligerents, and is identified by its beginning date and magnitude (log 10 of the number of deaths). Neither actor in a quarrel is identified by name. \nBecause Richardson took a dyad of belligerents as his unit, a given war, such as World War I or World War II comprises multiple observations, for all pairs of belligerents. For example, there are forty-four pairs of belligerents coded for World War I. \nFor each quarrel, the nominal variables include the type of quarrel, as well as political, cultural, and economic similarities and dissimilarities between the pair of combatants. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Quarrels.csv", "filename": "Quarrels", "name": " Statistics of Deadly Quarrels ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rearrests of juventile felons by type of court in which they were tried. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/rearrests.csv", "filename": "rearrests", "name": " Rearrests of Juvenile Felons ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Voting results for 15 congressmen from New Jersey. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/voting.csv", "filename": "voting", "name": " House of Representatives Voting Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Criminologists are interested in the effect of punishment regimes on crime rates. This has been studied using aggregate data on 47 states of the USA for 1960 given in this data frame. The variables seem to have been re-scaled to convenient numbers. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/UScrime.csv", "filename": "UScrime", "name": " The Effect of Punishment Regimes on Crime Rates ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset pertains to children and their families in the United States and is intended to illustrate missing data issues. Note that although the original data are longitudinal, this extract is not. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mi/nlsyV.csv", "filename": "nlsyV", "name": " National Longitudinal Survey of Youth Extract ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A data frame containing country names as used by Gapminder and the maps package to facilitate conversation between the two. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Countries.csv", "filename": "Countries", "name": "Countries", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Marriage records from the Mobile County, Alabama, probate court. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/Marriage.csv", "filename": "Marriage", "name": "Marriage records", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 90 observational units (counties) from 1981 to 1987 \ntotal number of observations : 630 \nobservation : regional \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Crime.csv", "filename": "Crime_", "name": "Crime in North Carolina", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 125 observations from 1960 to 1985 \ntotal number of observations : 3250 \nobservation : country \ncountry : World \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/SumHes.csv", "filename": "SumHes_", "name": "The Penn World Table, v. 5", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Absentee ballot outcomes contrasted with machine ballots, cast in Pennsylvania State Senate elections, selected districts, 1982-1993. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/absentee.csv", "filename": "absentee", "name": "Absentee and Machine Ballots in Pennsylvania State Senate Races", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The results of 239 published opinion polls measuring vote intentions (1st preference vote intention in a House of Representatives election) between the 2004 and 2007 Australian Federal elections, from 4 survey houses. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/AustralianElectionPolling.csv", "filename": "AustralianElectionPolling", "name": " Political opinion polls in Australia, 2004-07 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Aggregate data on the 24 elections to Australia's House of Representatives, 1949 to 2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/AustralianElections.csv", "filename": "AustralianElections", "name": "elections to Australian House of Representatives, 1949-2016", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Election returns and identifying information, California's 53 congressional districts in the 2006 Congresisonal elections. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/ca2006.csv", "filename": "ca2006", "name": "California Congressional Districts in 2006", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "On October 11, 2002, the United States Senate voted 77-23 to authorize the use of military force against Iraq. This data set lists the “Ayes” and “Nays” for each Senator and some covariates. ", + "description": "On October 11, 2002, the United States Senate voted 77-23 to authorize the use of military force against Iraq. This data set lists the \u201cAyes\u201d and \u201cNays\u201d for each Senator and some covariates. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/iraqVote.csv", "filename": "iraqVote", "name": " U.S. Senate vote on the use of force against Iraq, 2002. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Numeric codes and names of 85 political parties appearing in Poole and Rosenthal's collection of U.S. Congressional roll calls. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/partycodes.csv", "filename": "partycodes", "name": "political parties appearing in the U.S. Congress", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Interviewers administering the 2000 American National Election Studies assigned an ordinal rating to each respondent's \"general level of information\" about politics and public affairs. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/politicalInformation.csv", "filename": "politicalInformation", "name": "Interviewer ratings of respondent levels of political information", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Democratic share of the presidential vote, 1932-2016, in each state and the District of Columbia.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/presidentialElections.csv", "filename": "presidentialElections", "name": "elections for U.S. President, 1932-2016, by state", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Numeric codes and names of 50 states and the District of Columbia, required to parse Keith Poole and Howard Rosenthal's collections of U.S. Congressional roll calls. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/state.info.csv", "filename": "state_info", "name": "information about the American states needed for U.S. Congress", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Electoral returns, selected constituencies, 1992 general election for the British House of Commons", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/UKHouseOfCommons.csv", "filename": "UKHouseOfCommons", "name": "1992 United Kingdom electoral returns", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survey data containing self-reports of vote choice in the 1992 U.S. Presidential election, with numerous covariates, from the 1992 American National Election Studies. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/vote92.csv", "filename": "vote92", "name": " Reports of voting in the 1992 U.S. Presidential election. ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set includes four measures of democracy at two points in time, 1960 and 1965, and three measures of industrialization in 1960, for 75 developing countries. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sem/Bollen.csv", "filename": "Bollen", "name": " Bollen's Data on Industrialization and Political Democracy ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These variables are from the mailback questionnaire to the 1997 Canadian National Election Study, and are intended to tap attitude towards “traditional values.” ", + "description": "These variables are from the mailback questionnaire to the 1997 Canadian National Election Study, and are intended to tap attitude towards \u201ctraditional values.\u201d ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sem/CNES.csv", "filename": "CNES", "name": "Variables from the 1997 Canadian National Election Study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Congressional votes on the American Health Care Act in 2017 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/AHCAvote2017.csv", "filename": "AHCAvote2017", "name": "Congressional Votes on American Health Care Act (in 2017)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "US Senate party affiliatoin and votes on confirming Samuel Alito for the Supreme Court ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/AlitoConfirmation.csv", "filename": "AlitoConfirmation", "name": "US Senate Votes on Samuel Alito for the Supreme Court", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Senate votes for Corporate Average Fuel Economy (CAFE) bill ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CAFE.csv", "filename": "CAFE", "name": "US Senate Votes on Corporate Average Fuel Economy Bill", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "US Senate vote on Klobuchar amendment to lower drug prices ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CanadianDrugs.csv", "filename": "CanadianDrugs", "name": "Canadian Drugs Senate Vote", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "2016 US Democratic Presidential primary results ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ClintonSanders.csv", "filename": "ClintonSanders", "name": "Clinton/Sanders Primary Results (2016)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Medical facilities and doctors in a sample of counties. \nData compiled from information provided by the American Medical Association on the availability of health care in counties in the United States. A random sample of 53 counties was chosen from among counties with at least two community hospitals. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CountyHealth.csv", "filename": "CountyHealth", "name": "County Health Resources", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "State-by-state information from the 2008 U.S. presidential election \nThis dataset contains information from all 50 states and the District of Columbia for the 2008 U.S. presidential election. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Election08.csv", "filename": "Election08", "name": "2008 U.S. Presidential Election", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "2016 presidential election and state demographic data \nThis dataset contains information from all 50 states and the District of Columbia for the 2016 U.S. presidential election. It is similar to Election08 for the 2008 election. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Election16.csv", "filename": "Election16", "name": "2016 U.S. Presidential Election", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Florida death penalty cases by race of defendant and victim \nMike Radelet's data on imposition of the death penalty for murderers in Florida broken down by race of the victim and defendant. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FloridaDP.csv", "filename": "FloridaDP", "name": "Florida Death Penalty Cases", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Congressional votes on an ObamaCare health insurance bill in 2009 \nOn 7 November 2009 the U.S. House of Representatives voted, by the narrow margin of 220-215, for a bill to enact health insurance reform. Most Democrats voted yes while almost all Republicans voted no. This dataset contains data for each of the 435 representatives. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/InsuranceVote.csv", "filename": "InsuranceVote", "name": "Congressional Votes on a Health Insurance Bill", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Reporting rates for bi-weekly jury pools in Franklin County Court (Columbus, OH). \nTom Shields, jury commissioner for the Franklin County Municipal Court in Columbus, Ohio, is responsible for making sure that the judges have enough potential jurors to conduct jury trials. Jury duty for this court is two weeks long, so Tom must bring together a new group of potential jurors twenty-six times a year. Random sampling methods are used to obtain a sample of registered voters in Franklin County every two weeks, and these individuals are sent a summons to appear for jury duty. One of the most difficult aspects of Tom's job is to get those registered voters who receive a summons to actually appear at the courthouse for jury duty. This dataset contains the 1998 and 2000 data for the percentages of individuals who reported for jury duty after receiving a summons. The reporting dates vary slightly from year to year, so they are coded sequentially from 1, the first group to report in January, to 26, the last group to report in December. A variety of methods were used after 1998 to try to increase participation rates. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Jurors.csv", "filename": "Jurors", "name": "Reporting Rates for Jurors", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Characteristics of nursing homes in New Mexico. \nThe data were collected by the Department of Health and Social Services of the State of New Mexico and cover 52 of the 60 licensed nursing facilities in New Mexico in 1988. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Nursing.csv", "filename": "Nursing", "name": "Nursing Homes", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Votes for Geroge Bush and Pat Buchanan in Florida counties for the 2000 U.S. presidential election \nThe race for the presidency of the United States in the fall of 2000 was very close, with the electoral votes from Florida determining the outcome. In the disputed final tally in Florida, George W. Bush won by just 537 votes over Al Gore, out of almost 6 million votes cast. About 2.3% of the votes cast in Florida were awarded to other candidates. One of those other candidates was Pat Buchanan, who did much better in Palm Beach County than he did anywhere else. Palm Beach County used a unique \"butterfly ballot\" that had candidate names on either side of the page with \"chads\" to be punched in the middle. This non-standard ballot seemed to confuse some voters, who punched votes for Buchanan that may have been intended for a different candidate. This dataset shows the number of votes for Bush and Buchanan in each Florida county. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/PalmBeach.csv", "filename": "PalmBeach", "name": "Palm Beach Butterfly Ballot", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Polls for 2008 U.S. presidential election \nThe file Pollster08 contains data from 102 polls that were taken during the 2008 U.S. Presidential campaign. These data include all presidential polls reported on the internet site pollster.com that were taken between August 29th, when John McCain announced that Sarah Palin would be his running mate as the Republican nominee for vice president, and the end of September. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Pollster08.csv", "filename": "Pollster08", "name": "2008 U.S. Presidential Election Polls", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Votes in the US Senate on Clarence Thomas nomination for the US Supreme Court \nData from the U.S. Senate vote on October 15, 1991 to confirm Clarence Thomas to a position on the Supreme Court. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ThomasConfirmation.csv", "filename": "ThomasConfirmation", "name": "US Senate Votes on Clarence Thomas Confirmation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from 220 cases in Florida where a \"Stand your ground\" defense was used. \nInspired by the Travon Martin case, combined fatal and non-fatal cases of assault in Florida for which the defendant used the Stand Your Ground law in defense. These data show Simpson's Paradox. Race of the victim is more important than race of the defendant. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Zimmerman.csv", "filename": "Zimmerman", "name": "Stand Your Ground Simpson's Paradox ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Number of votes by province in the German Bundestag election 2005 (for the parties that eventually entered the parliament). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Bundestag2005.csv", "filename": "Bundestag2005", "name": "Votes in German Bundestag Election 2005", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a study of the Gallup Institute in Denmark in 1979 about the attitude of a random sample of 1,456 persons towards corporal punishment of children. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Punishment.csv", "filename": "Punishment", "name": "Corporal Punishment Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Reiss (1980) given by Fienberg (1980) about instances of repeat victimization for households in the U.S. National Crime Survey. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/RepVict.csv", "filename": "RepVict", "name": "Repeat Victimization Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthy public opinion data for 2001-2006. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/approval.csv", "filename": "approval", "name": "U.S. Presidential Approval Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains survival data on government coalitions in parliamentary democracies (Belgium, Canada, Denmark, Finland, France, Iceland, Ireland, Israel, Italy, Netherlands, Norway, Portugal, Spain, Sweden, and the United Kingdom) for the period 1945-1987. For parsimony, country indicator variables are omitted in the sample data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/coalition.csv", "filename": "coalition", "name": "Coalition Dissolution in Parliamentary Democracies", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains survival data on government coalitions in parliamentary democracies (Belgium, Canada, Denmark, Finland, France, Iceland, Ireland, Israel, Italy, Netherlands, Norway, Portugal, Spain, Sweden, and the United Kingdom) for the period 1945-1987. Country indicator variables are included in the sample data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/coalition2.csv", "filename": "coalition2", "name": "Coalition Dissolution in Parliamentary Democracies, Modified Version", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A table with 150 observations and 12 variables. \nsex1 for men and 0 for women \nageAge of respondent in years \neducLevels of education, coded as a numeric variable with \n1No formal education \n2Less than primary school education \n3Completed primary school \n4Completed secondary school \n5Completed high school \n6Completed college \n7Completed post-graduate degree \ncountryCharacter strings consisting of \"Oceana\", \"Eurasia\", and \"Eastasia\", after Orwell's 1984. \nySelf assessment (see below). \nv1-v6Response to vignettes (see below). \nSurvey respondents were asked in almost the same language for a self-assessment and for an assessment of several hypothetical persons described by written vignettes. The self assessment (self, in the data set), \"How free do you think [name/you] [is/are] to express [him-her/your]self without fear of government reprisal?\" was first asked of the survey respondent with respect to him or herself, and then after each of vignette. The possible response categories are: \n1Completely free \n2Very free \n3Moderately free \n4Slightly free \n5Not free at all \nThe vignettes, ordered from most free to least free, are: \nvign1[Kay] does not like many of the government's policies. She frequently publishes her opinion in newspapers, criticizing decisions by officials and calling for change. She sees little reason these actions could lead to government reprisal. \nvign2[Michael] disagrees with many of the government's policies. Though he knows criticism is frowned upon, he doesn't believe the government would punish someone for expressing critical views. He makes his opinion known on most issues without regard to who is listening. \nvign3[Bob] has political views at odds with the government. He has heard of people occasionally being arrested for speaking out against the government, and government leaders sometimes make political speeches condemning those who criticize. He sometimes writes letters to newspapers about politics, but he is careful not to use his real name. \nvign4[Connie] does not like the government's stance on many issues. She has a friend who was arrested for being too openly critical of governmental leaders, and so she avoids voicing her opinions in public places. \nvign5[Vito] disagrees with many of the government's policies, and is very careful about whom he says this to, reserving his real opinions for family and close friends only. He knows several men who have been taken away by government officials for saying negative things in public. \nvign6[Sonny] lives in fear of being harassed for his political views. Everyone he knows who has spoken out against the government has been arrested or taken away. He never says a word about anything the government does, not even when he is at home alone with his family. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/free1.csv", "filename": "free1", "name": "Freedom of Speech Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Selection of individual-level survey data for freedom of speech. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/free2.csv", "filename": "free2", "name": "Freedom of Speech Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains annual social security expenditure (as percent of budget lagged by two years), the relative frequency of mentions social justice received in the party's platform in each year, and whether the president is Republican or Democrat. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/hoff.csv", "filename": "hoff", "name": "Social Security Expenditure Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains voting data for the 1988 Mexican presidential election. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/mexico.csv", "filename": "mexico", "name": "Voting Data from the 1988 Mexican Presidental Election", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A small sample from the militarized interstate disputes (MID) database. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/mid.csv", "filename": "mid", "name": "Militarized Interstate Disputes", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains time-series data of the seat shares in the lower legislative house of left leaning parties over time, as well as the level of unemployment. Data follows the style used in Hibbs (1977).", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/seatshare.csv", "filename": "seatshare", "name": "Left Party Seat Share in 11 OECD Countries", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataframe contains a matrix votes cast by U.S. Supreme Court justices in all cases in the 2000 term. \nThe dataframe has contains data for justices Rehnquist, Stevens, O'Connor, Scalia, Kennedy, Souter, Thomas, Ginsburg, and Breyer for the 2000 term of the U.S. Supreme Court. It contains data from 43 non-unanimous cases. The votes are coded liberal (1) and conservative (0) using the protocol of Spaeth (2003). The unit of analysis is the case citation (ANALU=0). We are concerned with formally decided cases issued with written opinions, after full oral argument and cases decided by an equally divided vote (DECTYPE=1,5,6,7).", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/SupremeCourt.csv", "filename": "SupremeCourt", "name": "U.S. Supreme Court Vote Matrix", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains individual-level turnout data. It pools several American National Election Surveys conducted during the 1992 presidential election year. Only the first 2,000 observations (from a total of 15,837 observations) are included in the sample data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/turnout.csv", "filename": "turnout", "name": "Turnout Data Set from the National Election Survey", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains election results for 10 kreise (equivalent to precincts) from the 1932 Weimar (German) election. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/Weimar.csv", "filename": "Weimar", "name": "1932 Weimar election data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Administration" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data were collected in a study of how data on various characteristics of the bloood varied with sport body size and sex of the athlete. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/ais.csv", "filename": "ais", "name": "Australian athletes data set", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Year and birth, lifespan, etc, of British first class cricketers, born 1840-1960, whose handedness could be determined from information in the Who's who of cricketers. The status (alive=0, dead =1), and lifetime or lifespan, is for 1992. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/cricketer.csv", "filename": "cricketer", "name": "Lifespans of UK 1st class cricketers born 1840-1960", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data were from the 2007 calendar for the Northern Ireland Mountain Running Association. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/nihills.csv", "filename": "nihills", "name": "Record times for Northern Ireland mountain running events", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Winning times (in minutes) for the Boston Marathon Men's Open Division. 1897-2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/marathon.csv", "filename": "marathon", "name": "Boston marathon winning times since 1897", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Times in seconds for the gold-medal winner of the men's 400m track final at each Olympics since 1896. Missing values occur in 1916, 1940 and 1944 due to the World Wars. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/mens400.csv", "filename": "mens400", "name": "Winning times in Olympic men's 400m track final. 1896-2016.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results of the olympic heptathlon competition, Seoul, 1988. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/heptathlon.csv", "filename": "heptathlon", "name": " Olympic Heptathlon Seoul 1988 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Major League Baseball Data from the 1986 and 1987 seasons. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Hitters.csv", "filename": "Hitters", "name": "Baseball Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "237 hunters were each offered one of 11 cash amounts (bids) ranging from $1 to $200 in return for their goose permits. Hunters returned either their permit or the cash. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/GoosePermits.csv", "filename": "GoosePermits", "name": "Goose Permit Study", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "World records for men and women over time from 1905 through 2004. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/SwimRecords.csv", "filename": "SwimRecords", "name": "100 m Swimming World Records", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Cherry Blossom 10 Mile Run is a road race held in Washington, D.C. in April each year. (The name comes from the famous cherry trees that are in bloom in April in Washington.) The results of this race are published. This data frame contains the results from the 2005 race. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/TenMileRace.csv", "filename": "TenMileRace", "name": "Cherry Blossom Race", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data frame contains batting statistics for a subset of players collected from http://www.baseball-databank.org/. There are a total of 21,699 records, covering 1,228 players from 1871 to 2007. Only players with more 15 seasons of play are included. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plyr/baseball.csv", "filename": "baseball", "name": "Yearly batting records for all major league baseball players", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Batting averages for 18 major league baseball players, first 45 at bats of the 1970 season.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/EfronMorris.csv", "filename": "EfronMorris", "name": "Batting Averages for 18 major league baseball players, 1970", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Score results from an archery class ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ArcheryData.csv", "filename": "ArcheryData", "name": "Scores in an Archery Class", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Six-year graduation data for 214,555 students in 2004 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/AthleteGrad.csv", "filename": "AthleteGrad", "name": "Athletic Participation, Race, and Graduation", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Game times and boxscore information for baseball games ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BaseballTimes.csv", "filename": "BaseballTimes", "name": "Baseball Game Times of One Day in 2008", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Times for one day's major league baseball games ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BaseballTimes2017.csv", "filename": "BaseballTimes2017", "name": "Baseball Game Times of One Day in 2017", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Passing statistics for football quarterback Drew Brees in 2016 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/BreesPass.csv", "filename": "BreesPass", "name": "Drew Brees Passing Statistics (2016)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Shooting percentages for two Cav players ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/CavsShooting.csv", "filename": "CavsShooting", "name": "Cleveland Cavalier's Shooting (2016-2017)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Draft selection times for a fantasy baseball league \nTime (in seconds) for participants in a draft for a fantasy baseball league to make a selection at each round. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FantasyBaseball.csv", "filename": "FantasyBaseball", "name": "Selection Times in a Fantasy Baseball Draft", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Field goal results in the National Football League (NFL) by distance. This dataset summarizes all 8520 field goals attempted by place kickers in the National Football League (NFL) during regular season games for the 2000 through the 2008 seasons. Results are counts (attempted, made, and blocked) and proportions (made and blocked) for each distance. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FGByDistance.csv", "filename": "FGByDistance", "name": "Results of NFL Field Goal Attempts", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NCAA Final Four by seed with indicator for Tom Izzo's teams from 1985 - 2010. \nEach year 64 college teams are selected for the NCAA Division I Men's Basketball tournament, with 16 teams placed in each of four regions. Within each region the teams are seeded from 1 to 16, with the (presumed) best team as the 1 seed and the (presumed) weakest team as the 16 seed; this practice of seeding teams began in 1979 for the NCAA tournament. Only one team from each region (so four teams each year) advances to the Final Four. This dataset is the same as FinalFourLong, except the data starts in 1985 and we have a extra column that is an indicator for Michigan State teams coached by Tom Izzo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FinalFourIzzo.csv", "filename": "FinalFourIzzo", "name": "NCAA Final Four by Seed and Tom Izzo (through 2010)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NCAA Final Four by seed with indicator for Tom Izzo's teams for 1985 - 2017 \nEach year 64 college teams are selected for the NCAA Division I Men's Basketball tournament, with 16 teams placed in each of four regions. Within each region the teams are seeded from 1 to 16, with the (presumed) best team as the 1 seed and the (presumed) weakest team as the 16 seed; this practice of seeding teams began in 1979 for the NCAA tournament. Only one team from each region (so four teams each year) advances to the Final Four. This dataset is an extention of FinalFourIzzo (that ended in 2017) and the same as FinalFourLong2017, except the data starts in 1985 and we have an extra column that is an indicator for Michigan State teams coached by Tom Izzo. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FinalFourIzzo17.csv", "filename": "FinalFourIzzo17", "name": "NCAA Final Four by Seed and Tom Izzo (through 2017)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NCAA Final Four by seed with individual cases for each team each year \nEach year 64 college teams are selected for the NCAA Division I Men's Basketball tournament, with 16 teams placed in each of four regions. Within each region the teams are seeded from 1 to 16, with the (presumed) best team as the 1 seed and the (presumed) weakest team as the 16 seed; this practice of seeding teams began in 1979 for the NCAA tournament. Only one team from each region (so four teams each year) advances to the Final Four. This dataset has a row (case) for each team in the NCAA Division I Men's Basketball tournament from 1979 to 2010 along with its seed and an indicator for whether the team made the Final Four that year. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FinalFourLong.csv", "filename": "FinalFourLong", "name": "NCAA Final Four by Seed (Long Version through 2010)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NCAA Final Four by seed with individual cases for each team each year \nEach year 64 college teams are selected for the NCAA Division I Men's Basketball tournament, with 16 teams placed in each of four regions. Within each region the teams are seeded from 1 to 16, with the (presumed) best team as the 1 seed and the (presumed) weakest team as the 16 seed; this practice of seeding teams began in 1979 for the NCAA tournament. Only one team from each region (so four teams each year) advances to the Final Four. This dataset has a row (case) for each team in the NCAA Division I Men's Basketball tournament from 1979 to 2017 along with its seed and an indicator for whether the team made the Final Four that year. This dataset is an extention of FinalFourLong (that went through 2010). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FinalFourLong17.csv", "filename": "FinalFourLong17", "name": "NCAA Final Four by Seed (Long Version through 2017)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NCAA Final Four participation summarized each year by seed \nEach year 64 college teams are selected for the NCAA Division I Men's Basketball tournament, with 16 teams placed in each of four regions. Within each region the teams are seeded from 1 to 16, with the (presumed) best team as the 1 seed and the (presumed) weakest team as the 16 seed; this practice of seeding teams began in 1979 for the NCAA tournament. Only one team from each region (so four teams each year) advances to the Final Four. This dataset is similar to FinalFourLong, except that each row combines the count of the results (make/don't make the Final Four) for each seed, so that In+Out= 4 for each row. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FinalFourShort.csv", "filename": "FinalFourShort", "name": "CAA Final Four by Seed (Short Version through 2010)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NCAA Final Four participation summarized each year by seed \nEach year 64 college teams are selected for the NCAA Division I Men's Basketball tournament, with 16 teams placed in each of four regions. Within each region the teams are seeded from 1 to 16, with the (presumed) best team as the 1 seed and the (presumed) weakest team as the 16 seed; this practice of seeding teams began in 1979 for the NCAA tournament. Only one team from each region (so four teams each year) advances to the Final Four. This dataset is similar to FinalFourLong2017, except that each row combines the count of the results (make/don't make the Final Four) for each seed, so that In+Out= 4 for each row. This dataset is an extention of FinalFourShort (that went though 2010). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FinalFourShort17.csv", "filename": "FinalFourShort17", "name": "NCAA Final Four by Seed (Short Version through 2017)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on hiking trails for each of the 46 \"High Peaks\" in the Adirondack mountains \nForty-six mountains in the Adirondacks of upstate New York are known as the High Peaks with elevations near or above 4000 feet (although modern measurements show a couple of the peaks are actually slightly under 4000 feet). A goal for hikers in the region is to become a \"46er\" by scaling each of these peaks. This dataset gives information about the hiking trails up each of these peaks. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/HighPeaks.csv", "filename": "HighPeaks", "name": "Characteristics of Adirondack Hiking Trails", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from games played by the Grinnell College men's basketball team between 1997 and 2006 \nSince 1991, David Arseneault, men's basketball coach of Grinnell College, has developed a unique, fast-paced style of basketball that he calls \"the system.\" This dataset comes from the 147 games the Grinnell team played within its athletics conference between the 1997-98 season through the 2005-06 season. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Hoops.csv", "filename": "Hoops", "name": "Grinnell College Basketball Games", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Pitch-by-pitch data for baseball pitcher Clayton Kershaw in the 2013 season \nDataset includes information for 3,402 individual pitches thrown by Los Angeles Dodger baseball pitcher Clayton Kershaw during the 2013 regular season when he won the Cy Young award as the best pitcher in the National League. Many variables are measured using Major League Baseball's PITCHf/x system that uses camera systems in each ballpark to track characteristics of each pitch thrown. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Kershaw.csv", "filename": "Kershaw", "name": "Kershaw Pitch Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Winning distances in men's Olympic long jump competitions (1900 - 2008) \nGold medal winning distances for the men's long jump at the Olympics from 1900 to 2008. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LongJumpOlympics.csv", "filename": "LongJumpOlympics", "name": "Olympic Men's Long Jump Gold Medal Distance (1900 - 2008)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Gold medal distance for Olympic men's long jump \nGold medal winning distances for the men's long jump at the Olympics from 1900 to 2016. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/LongJumpOlympics2016.csv", "filename": "LongJumpOlympics2016", "name": "Olympic Men's Long Jump Gold Medal Distance (1900 - 2016)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Training records for a marathon runner \nInformation from training records of a marathoner over a five-year period from 2002-2006. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Marathon.csv", "filename": "Marathon", "name": "Daily Training for a Marathon Runner", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data for Major League Baseball teams from the 2007 regular season \nData for all 30 Major League Baseball (MLB) teams for the 2007 regular season. This includes team batting statistics (BattingAvg through SLG) and team pitching statistics (ERA through WHIP) ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MLB2007Standings.csv", "filename": "MLB2007Standings", "name": "Standings and Team Statistics from the 2007 Baseball Season", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Major League Baseball (MLB) standings and team statistics for the 2016 season ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MLBStandings2016.csv", "filename": "MLBStandings2016", "name": "MLB Standings in 2016", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Standings for National Football League teams in 2007 \nData for all 32 National Football League (NFL) teams for the 2007 regular season. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/NFL2007Standings.csv", "filename": "NFL2007Standings", "name": "NFL Standings for 2007 Regular Season", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Standings and team statistics for National Football League (NFL) teams in the 2016 season \nStandings for the 2016 regular season of the National Football League (NFL) along with points and scored and allowed for each team in its16 games. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/NFLStandings2016.csv", "filename": "NFLStandings2016", "name": "NFL Standings for 2016 Regular Season", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily walking amounts recorded on a personal pedometer from September-December 2011 \nA statistics professor regularly keeps a pedometer in his pocket. It records not only the number of steps taken each day, but also the number of steps taken at a moderate pace, the number of minutes walked at a moderate pace, and the number of miles total that he walked. He also added to the data set the day of the week, whether it was rainy, sunny, or cold (on sunny days he often biked, but on rainy or cold days he did not), and whether it was a weekday or weekend. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Pedometer.csv", "filename": "Pedometer", "name": "Pedometer Walking Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Pulse rates before and after exercise for a sample of statistics students .\nStudents in a Stat2 class recorded resting pulse rates (in class), did three \"laps\" walking up/down a nearby set of stairs, and then measured their pulse rate after the exercise. They provided additional information about height, weight, exercise, and smoking habits via a survey. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Pulse.csv", "filename": "Pulse", "name": "Pulse Rates and Exercise", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Putting results for a golfing statistician \nA statistician golfer kept careful records of every putt he attempted when playing golf, recording the length of the putt and whether or not he was successful in making the putt. This dataset has one case for each of the 587 attempted putts. A different form of the same data (Putts2) accumulates counts of makes and misses for each putt length. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Putts1.csv", "filename": "Putts1", "name": "Putting Success by Length (Long Form)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Putting results for a golfing statistician (by length of the putts) \nA statistician golfer kept careful records of every putt he attempted when playing golf, recording the length of the putt and whether or not he was successful in making the putt. For each different length, this dataset records the number of putts made, missed, and the total number of attempts from that length. A similar dataset, Putts1, has one case for each of the 587 attempted putts, showing the length and outcome. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Putts2.csv", "filename": "Putts2", "name": "Putting Success by Length (Short Form)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hypothetical putting results for a golfing statistician \nThis is a hypothetical revision of the table of putting success in Putts2 that helps demonstrate overdispersion. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Putts3.csv", "filename": "Putts3", "name": "Hypothetical Putting Data (Short Form)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A sample of 30 field goal attempts in the National Football League \nThis is a subset of just 30 field goal attempts selected at random from the larger sample of attempts made by NFL kickers that is summarized in FGByDistance. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SampleFG.csv", "filename": "SampleFG", "name": "Field Goal Attempts in the NFL", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Rushing yards for each game LaDainian Tomlinson played in the 2006 National Football League (NFL regular) season. \nFor each of the sixteen games the San Diego Chargers played in the 2006 NFL regular season we have the number of times LaDainian Tomlinson ran the ball and the total yards he gained. \nThis data set from the first edition was replaced by BreesPass in the second edition. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/TomlinsonRush.csv", "filename": "TomlinsonRush", "name": " LaDainian Tomlinson Rushing Yards ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daily pedometer data for one of the authors \nOne of the authors recorded daily pedometer data, the weather, and whether or not he walked the dogs. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/WalkTheDogs.csv", "filename": "WalkTheDogs", "name": "Did the Author Walk the Dogs Today?", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Baseball data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Baseball.csv", "filename": "Baseball", "name": "Baseball Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results from the first German soccer league (1963-2008). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Bundesliga.csv", "filename": "Bundesliga", "name": "Ergebnisse der Fussball-Bundesliga", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set is deduced from the Baseball fielding data set: fielding performance basically includes the numbers of Errors, Putouts and Assists made by each player. In order to reduce the number of observations, the was compressed by calculating the mean number of errors, putouts and assists for each team and for only 6 positions (1B, 2B, 3B, C, OF, SS and UT). In addition, each of these three variables was scaled to a common range by dividing each variable by the maximum of the variable. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/Hitters.csv", "filename": "Hitters_", "name": "Hitters Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data from a Danish study in 1983 and 1985 about sports activities and the opinion about joint sports with the other gender among 16–19 year old high school students. ", + "description": "Data from a Danish study in 1983 and 1985 about sports activities and the opinion about joint sports with the other gender among 16-19 year old high school students. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/JointSports.csv", "filename": "JointSports", "name": "Opinions About Joint Sports", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Lee (1997), on the goals scored by Home and Away teams in the Premier Football League, 1995/6 season. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/UKSoccer.csv", "filename": "UKSoccer", "name": "UK Soccer Scores", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Game-by-game information for the 1998 season for Mark McGwire and Sammy Sosa. Data are a subset of the dataset provided in Simonoff (1998). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/homerun.csv", "filename": "homerun\n", "name": "Sample Data on Home Runs Hit By Mark McGwire and Sammy Sosa in 1998.", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Sport" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data consist of 4601 email items, of which 1813 items were identified as spam. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/spam7.csv", "filename": "spam7", "name": "Spam E-mail Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The number of telephones in various regions of the world (in thousands). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/WorldPhones.csv", "filename": "WorldPhones", "name": "The World's Telephones", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A time series of the numbers of users connected to the Internet through a server every minute. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv", "filename": "WWWusage", "name": "Internet Usage per Minute", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "data.frame of cyber security breaches involving health care records of 500 or more humans reported to the U.S. Department of Health and Human Services (HHS) as of June 27, 2014. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/breaches.csv", "filename": "breaches", "name": " Cyber Security Breaches ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1993 to 1995 \nnumber of observations : 6259 \nobservation : goods \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Computers.csv", "filename": "Computers", "name": "Prices of Personal Computers ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data casually collected on the number of packages on the Comprehensive R Archive Network (CRAN) at different dates. \nThis seems to provide the most widely available source for data on the growth of CRAN, manually recorded by John Fox and Spencer Graves. For a discussion of these and related data, see Fox (2009). \nFor more detail, see the CRAN packages data on Github maintained by Hadley Wickham. This contains the description file of every package uploaded to CRAN prior to the date of Hadley's most recent update. The current maintainer of the Ecdat and Ecfun packages would consider contributions along the following lines: \n1. It might be nice to have a more complete dataset or datasets showing CRAN growth. This might include code fitting multiple models and predicting future growth with error bounds computed using Bayesian Model Averaging. These model fits might make an interesting addition to the examples in this help file. With a little more effort, it might make an interesting note for R Journal. Functions written to fit those models might be added to the Ecfun package. \n2. It might be nice to have a function in Ecfun to download the CRAN packages data from Github and convert it to a format suitable for updating this dataset. \nThe current maintainer for Ecdat and Ecfun (Spencer Graves) might be willing to accept code and documentation for this but is not ready to do it himself at the present time. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/CRANpackages.csv", "filename": "CRANpackages", "name": " Growth of CRAN ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "daily observations from 1969-1-03 to 1998-12-31 \nnumber of observations : 2528 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/CRSPday.csv", "filename": "CRSPday", "name": "Daily Returns from the CRSP Database ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "monthly observations from 1969-1 to 1998-12 \nnumber of observations : 360 \nobservation : production units \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/CRSPmon.csv", "filename": "CRSPmon", "name": "Monthly Returns from the CRSP Database ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Since October 2009 organizations in the U.S. that store data on human health are required to report any incident that compromises the confidentiality of 500 or more patients / human subjects (45 C.F.R. 164.408) These reports are publicly available. HHSCyberSecurityBreaches was downloaded from the Office for Civil Rights of the U.S. Department of Health and Human Services, 2015-02-26\n\nThis contains the breach report data downloaded 2015-02-26 from the US Health and Human Services. This catalogues reports starting 2009-10-21. Earlier downloads included a few breaches prior to 2009 when the law was enacted (inconsistently reported), and a date for breach occurrence in addition to the date of the report. \nThe following corrections were made to the file: * UCLA Health System, breach date 11/4/2011, had cover entity added as \"Healthcare Provider\" * Wyoming Department of Health, breach date 3/2/2010 had breach type changed to \"Unauthorized Access / Disclosure\" * Computer Program and Systems, Inc. (CPSI), breach date 3/30/2010 had breach type changed to \"Unauthorized Access / Disclosure\" * Aetna, breach date 7/27/2010 had breach type changed to \"Improper Disposal' (see explanation below), breach date 5/24/2010 name changed to City of Charlotte, NC (Health Plan) and state changed to NC * Mercer, breach date 7/30/2010 state changed to MI * Not applicable, breach date 11/2/2011 name changed to Northridge Hospital Medical Center and state changed to CA * na, breach date 4/4/2011 name changed to Brian J Daniels DDS, Paul R Daniels DDS and state changed to AZ * NA, breach date 5/27/2011 name changed to and Spartanburg Regional Healthcare System state changed to SC * NA, breach date 7/4/2011 name changed to Yanz Dental Corporation and state changed to CA ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/HHSCyberSecurityBreaches.csv", "filename": "HHSCyberSecurityBreaches", "name": " Cybersecurity breaches reported to the US Department of Health and Human Services ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "annual observations from 1909 to 1949 \nnumber of observations : 41 \nobservation : country \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Solow.csv", "filename": "Solow", "name": "Solow's Technological Change Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Half-hourly electricity demand in England and Wales from Monday 5 June 2000 to Sunday 27 August 2000. Discussed in Taylor (2003), and kindly provided by James W Taylor. Units: Megawatts ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/forecast/taylor.csv", "filename": "taylor", "name": "Half-hourly electricity demand", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Five-minute call volume handled on weekdays between 7:00am and 9:05pm, from 3 March 2003 to 23 May 2003. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/calls.csv", "filename": "calls", "name": "Call volume for a large North American bank", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly retail debit card usage in Iceland (million ISK). January 2000 - August 2013. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/debitcards.csv", "filename": "debitcards", "name": "Retail debit card usage in Iceland.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "elecdemand is a half-hourly time series matrix with three columns: \nDemand: \nTotal electricity demand in GW for Victoria, Australia, every half-hour during 2014.\nWorkDay: \ntaking value 1 on work days, and 0 otherwise.\nTemperature: \nhalf-hourly temperatures for Melbourne (BOM site 086071). \n\nelecdaily is a daily time series matrix with three columns: \nDemand: \nTotal electricity demand in GW for Victoria, Australia, every day during 2014.\nWorkDay: \ntaking value 1 on work days, and 0 otherwise.\nTemperature: \nmaximum daily temperatures for Melbourne (BOM site 086071). \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/elecdaily.csv", "filename": "elecdemand", "name": "Half-hourly and daily electricity demand for Victoria, Australia, in 2014", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Monthly manufacture of electrical equipment: computer, electronic and optical products. January 1996 - March 2012. Data adjusted by working days; Euro area (17 countries). Industry new orders index. 2005=100. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/elecequip.csv", "filename": "elecequip", "name": "Electrical equipment manufactured in the Euro area.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hyndsight is Rob Hyndman's personal blog at https://robjhyndman.com/hyndsight. This series contains the daily pageviews for one year, beginning 30 April 2014. The frequency is set to 7, to allow the weekly pattern to be modelled. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/hyndsight.csv", "filename": "hyndsight", "name": "Daily pageviews for the Hyndsight blog. 30 April 2014 to 29 April 2015.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Electricity net generation measured in billions of kilowatt hours (kWh). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/usmelec.csv", "filename": "usmelec", "name": "Electricity monthly total net generation. January 1973 - June 2013.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Number of failures of piston rings in three legs of four steam-driven compressors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/pistonrings.csv", "filename": "pistonrings", "name": " Piston Rings Failures ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements of root mean square bending moment by two different mooring methods. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/waves.csv", "filename": "waves", "name": " Electricity from Wave Power at Sea ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Dyestuff data frame provides the yield of dyestuff (Naphthalene Black 12B) from 5 different preparations from each of 6 different batchs of an intermediate product (H-acid). The Dyestuff2 data were generated data in the same structure but with a large residual variance relative to the batch variance. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/Dyestuff.csv", "filename": "Dyestuff", "name": "Yield of dyestuff by batch", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A relative performance measure and characteristics of 209 CPUs. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/cpus.csv", "filename": "cpus", "name": " Performance of Computer CPUs ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The motors data frame has 40 rows and 3 columns. It describes an accelerated life test at each of four temperatures of 10 motorettes, and has rather discrete times. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/motors.csv", "filename": "motors", "name": " Accelerated Life Testing of Motorettes ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The yield of a petroleum refining process with four covariates. The crude oil appears to come from only 10 distinct samples. \nThese data were originally used by Prater (1956) to build an estimation equation for the yield of the refining process of crude oil to gasoline. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/petrol.csv", "filename": "petrol", "name": " N. L. Prater's Petrol Refinery Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A list object with the annual numbers of telephone calls, in Belgium. The components are: ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/phones.csv", "filename": "Belgian-phones", "name": " Belgium Phone Calls 1950-1973 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data frame from accelerated testing of tyre rubber. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Rubber.csv", "filename": "Rubber", "name": " Accelerated Testing of Tyre Rubber ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The shuttle data frame has 256 rows and 7 columns. The first six columns are categorical variables giving example conditions; the seventh is the decision. The first 253 rows are the training set, the last 3 the test conditions. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/shuttle.csv", "filename": "shuttle", "name": " Space Shuttle Autolander Problem ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Temperature and pressure in a saturated steam driven experimental device. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/steam.csv", "filename": "steam", "name": " The Saturated Steam Pressure Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Voter turnout data spanning 85 cable TV systems, randomly allocated to a voter mobilization experiment targetting 18-19 year olds with \"Rock the Vote\" television advertisments ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/RockTheVote.csv", "filename": "RockTheVote", "name": "Voter turnout experiment, using Rock The Vote ads", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Pilot-Plant data from Daniel and Wood (1971). The response variable corresponds to the acid content determined by titration and the explanatory variable is the organic acid content determined by extraction and weighing. This data set was analyzed also by Yale and Forsythe (1976). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/pilot.csv", "filename": "pilot", "name": "Pilot-Plant Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data were supplied by A. Frery. They are a part of a synthetic aperture satellite radar image corresponding to a suburb of Munich. Provided are coordinates and values corresponding to three frequency bands for each of 1573 pixels. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/radarImage.csv", "filename": "radarImage", "name": "Satellite Radar Image Data from near Munich", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Number of international calls from Belgium, taken from the Belgian Statistical Survey, published by the Ministry of Economy. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/telef.csv", "filename": "telef", "name": "Number of International Calls from Belgium", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The solder.balance data frame has 720 rows and 6 columns, representing a balanced subset of a designed experiment varying 5 factors on the soldering of components on printed-circuit boards. \nThe solder data frame is the full version of the data with 900 rows. It is located in both the rpart and the survival packages. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/rpart/solder.csv", "filename": "solder_balance", "name": "Soldering of Components on Printed-Circuit Boards", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiment to measure the effect of ultrasound on deapsorbing oil from sand \nThis data set is the result of a science fair experiment run by a high school student. The basic question was whether exposing sand with oil in it (think oil spill) to ultrasound could help the oil deapsorb from it better than sand that was not exposed to ultrasound. There were two levels of ultrasound tested (5 minutes and 10 minutes) and two levels of oil (5 ml and 10 ml). There was also a question of whether exposure to salt water or fresh water made a difference so half the samples had salt water, the others distilled water. Each combination of factor levels was replicated 5 times. There were also an equivalent number of control observations run, all factors being the same but without any exposure to ultrasound. Each experimental run was paired with an appropriate control run and the response variable is the difference in the amount of oil removed in the experimental run and the control run. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/OilDeapsorbtion.csv", "filename": "OilDeapsorbtion", "name": "Effect of Ultrasound on Oil Deapsorbtion", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Number of damaged O-rings on space shuttle launches and launch temperature \nThe space shuttle Challenger exploded shortly after liftoff in 1987. The subsequent investigation focused on the failure of O-ring seals, which allowed liquid hydrogen and oxygen to mix and explode. These failures might be related to temperature at the launch site which was near freezing (32 degrees F) on that day. This dataset shows the number of O-ring failures on previous shuttle launches, along with an indicator for whether the temperature was above or below 65 degrees F. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Orings.csv", "filename": "Orings", "name": "Space Shuttle O-Rings", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In 1988 an experiment was designed and implemented at one of AT&T's factories to investigate alternatives in the \"wave soldering\" procedure for mounting electronic componentes to printed circuit boards. The experiment varied a number of factors relevant to the process. The response, measured by eye, is the number of visible solder skips. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/survival/solder.csv", "filename": "solder", "name": "Data from a soldering experiment", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from Dalal et al. (1989) about O-ring failures in the NASA space shuttle program. The damage index comes from a discussion of the data by Tufte (1997). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/vcd/SpaceShuttle.csv", "filename": "SpaceShuttle", "name": "Space Shuttle O-ring Failures", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give 29 consecutive measurements on the inner diameter of a landing gear triunion. Each value given here is the average of four original observations.", "url": "http://www.statsci.org/data/general/diameter.txt", "filename": "diameter", "name": "Inner Diameter of Landing Gear Triunion", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "To see how much of a difference time of\nday made on the speed at which he could download files, a\ncollege sophomore performed an experiment. He placed a\nfile on a remote server and then proceeded to download it\nat three different time periods of the day. He downloaded\nthe file 48 times in all, 16 times at each Time of Day, and\nrecorded the Time in seconds that the download took.", "url": "https://dasl.datadescription.com/download/data/3173", "filename": "Downloading", "name": "Downloading", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Some camera lenses have an adjustable aperture, the hole that lets the light in. The\nsize of the aperture is expressed in a mysterious number called the f/stop. Each increase of one f/stop number corresponds to a halving of the light that is allowed to come through. When you halve the shutter speed, you cut down the light, so you have to open the\naperture one notch. We could experiment to find the best f/stop value for each shutter\nspeed. A table of recommended shutter speeds and f/stops for a camera lists the relationship like those given here:", "url": "https://dasl.datadescription.com/download/data/3198", "filename": "F-stops", "name": "F-stops", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Technology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Aggregate data on applicants to graduate school at Berkeley for the six largest departments in 1973 classified by admission and sex. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/UCBAdmissions.csv", "filename": "UCBAdmissions", "name": "Student Admissions at UC Berkeley", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1998-1999 \nnumber of observations : 420 \nobservation : schools \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Caschool.csv", "filename": "Caschool", "name": "The California Test Score Data Set ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 545 observations from 1980 to 1987 \nnumber of observations : 4360 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Males.csv", "filename": "Males", "name": "Wages and Education of Young Males ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1983 to 1986 \nnumber of observations : 609 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Mathlevel.csv", "filename": "Mathlevel", - "name": "Level of Calculus Attained for Students Taking Advanced Micro–economics ", - "number_format": 31, - "remove_quotes": true, + "name": "Level of Calculus Attained for Students Taking Advanced Micro-economics ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1997-1998 \nnumber of observations : 220 \nobservation : schools \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/MCAS.csv", "filename": "MCAS", "name": "The Massachusetts Test Score Data Set ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from McChesney and Nichols (2010) on domestic and international knowledge in Denmark, Finland, the UK and the US among college graduates, people with some college, and roughly 12th grade only. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/politicalKnowledge.csv", "filename": "politicalKnowledge", "name": " Political knowledge in the US and Europe ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a panel of 48 observations from 1970 to 1986 \nnumber of observations : 5225 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/RetSchool.csv", "filename": "RetSchool", "name": "Return to Schooling ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1976 \nnumber of observations : 3010 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Schooling.csv", "filename": "Schooling", "name": "Wages and Schooling ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1985-89 \nnumber of observations : 5748 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Star.csv", "filename": "Star", "name": "Effects on Learning of Small Class Sizes ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section from 1988 \nnumber of observations : 62 \nobservation : schools \ncountry : United Kingdown ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/University.csv", "filename": "University", "name": "Provision of University Teaching and Research ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Lecture room width estimated by students in two different units. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/roomwidth.csv", "filename": "roomwidth", "name": " Students Estimates of Lecture Room Width ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a sociological study, the number of days absent from school is the response variable. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/schooldays.csv", "filename": "schooldays", "name": " Days not Spent at School ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Students were administered two parallel forms of a test after a random assignment to three different treatments. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/students.csv", "filename": "students", "name": " Student Risk Taking ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Statistics for a large number of US Colleges from the 1995 issue of US News and World Report. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/College.csv", "filename": "College", "name": "U.S. News and World Report's College Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A simulated data set containing information on ten thousand customers. The aim here is to predict which customers will default on their credit card debt. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Credit.csv", "filename": "Credit", "name": "Credit Card Balance Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "University lecture evaluations by students at ETH Zurich, anonymized for privacy protection. This is an interesting “medium” sized example of a partially nested mixed effect model. ", + "description": "University lecture evaluations by students at ETH Zurich, anonymized for privacy protection. This is an interesting \u201cmedium\u201d sized example of a partially nested mixed effect model. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/InstEval.csv", "filename": "InstEval", "name": "University Lecture/Instructor Evaluations by Students at ETH", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Minnesota high school graduates of 1938 were classified according to four factors, described below. The minn38 data frame has 168 rows and 5 columns. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/minn38.csv", "filename": "minn38", "name": " Minnesota High School Graduates of 1938 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Snijders and Bosker (1999) use as a running example a study of 2287 eighth-grade pupils (aged about 11) in 132 classes in 131 schools in the Netherlands. Only the variables used in our examples are supplied. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/nlschools.csv", "filename": "nlschools", "name": " Eighth-Grade Pupils in the Netherlands ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The subjective assessment, on a 0 to 20 integer scale, of 54 classical painters. The painters were assessed on four characteristics: composition, drawing, colour and expression. The data is due to the Eighteenth century art critic, de Piles. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/painters.csv", "filename": "painters", "name": " The Painter's Data of de Piles ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The quine data frame has 146 rows and 5 columns. Children from Walgett, New South Wales, Australia, were classified by Culture, Age, Sex and Learner status and the number of days absent from school in a particular school year was recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/quine.csv", "filename": "quine", "name": " Absenteeism from School in Rural New South Wales ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data frame contains the responses of 237 Statistics I students at the University of Adelaide to a number of questions. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/survey.csv", "filename": "survey", "name": " Student Survey Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The synth.tr data frame has 250 rows and 3 columns. The synth.te data frame has 100 rows and 3 columns. It is intended that synth.tr be used from training and synth.te for testing. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/synth.tr.csv", "filename": "synth_tr", "name": " Synthetic Classification Problem ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The original data source is the Education Longitudinal Study of 2002. To deal with the issue on individually identifiable information, we generated hypothetical student-level data using a multiple imputation method. The Education Longitudinal Study of 2002 used a two-stage sample selection process. First, a national sample of schools was selected using stratified probability proportional to size (PPS), and school contacting resulted in 1,221 eligible public, Catholic, and other private schools from a population of approximately 27,000 schools containing 10th grade students. Of the eligible schools, 752 participated in the study. In the second stage of sample selection, a sample of approximately 26 sophomores, from within each of the participating public and private schools was selected. Each school was asked to provide a list of 10th grade students, and quality assurance (QA) checks were performed on each list that was received.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mediation/school.csv", "filename": "school", "name": "School-level data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The original data source is the Education Longitudinal Study of 2002. To deal with the issue on individually identifiable information, we generated hypothetical student-level data using a multiple imputation method. The Education Longitudinal Study of 2002 used a two-stage sample selection process. First, a national sample of schools was selected using stratified probability proportional to size (PPS), and school contacting resulted in 1,221 eligible public, Catholic, and other private schools from a population of approximately 27,000 schools containing 10th grade students. Of the eligible schools, 752 participated in the study. In the second stage of sample selection, a sample of approximately 26 sophomores, from within each of the participating public and private schools was selected. Each school was asked to provide a list of 10th grade students, and quality assurance (QA) checks were performed on each list that was received.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mediation/student.csv", "filename": "student", "name": "Hypothetical student-level data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "SAT data assembled for a statistics education journal article on the link between SAT scores and measures of educational expenditures ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/SAT.csv", "filename": "SAT", "name": "State by State SAT data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A panel of 545 observations from 1980 to 1987 \ntotal number of observations : 4360 \nobservation : individuals \ncountry : United States ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/plm/Males.csv", "filename": "Males_", "name": "Wages and Education of Young Males", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Ordinal ratings (faculty evaluations) of applicants to a Political Science PhD Program.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/admit.csv", "filename": "admit", "name": "Applications to a Political Science PhD Program", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A sample of 915 biochemistry graduate students. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/pscl/bioChemists.csv", "filename": "bioChemists", "name": "article production by graduate students in biochemistry Ph.D. programs", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "16 multiple choice ability items 1525 subjects taken from the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project are saved as iqitems. Those data are shown as examples of how to score multiple choice tests and analyses of response alternatives. When scored correct or incorrect, the data are useful for demonstrations of tetrachoric based factor analysis irt.fa and finding tetrachoric correlations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/ability.csv", "filename": "ability", "name": "16 ability items scored as correct or incorrect.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Francis Galton introduced the 'co-relation' in 1888 with a paper discussing how to measure the relationship between two variables. His primary example was the relationship between height and forearm length. The data table (cubits) is taken from Galton (1888). Unfortunately, there seem to be some errors in the original data table in that the marginal totals do not match the table. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/psych/cubits.csv", "filename": "cubits", "name": "Galton's example of the relationship between height and 'cubit' or forearm length", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Education Expenditure Data, from Chatterjee and Price (1977, p.108). This data set, representing the education expenditure variables in the 50 US states, providing an interesting example of heteroscedacity. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/education.csv", "filename": "education", "name": "Education Expenditure Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Per capita expenditure on public schools and per capita income by state in 1979. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sandwich/PublicSchools.csv", "filename": "PublicSchools", "name": "US Expenditures for Public Schools", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are from the SAS manual and consist of six mental tests for 32 students, with some missing data. The three x variables are intended to load on a verbal factor, and the three y variables on a math factor. The data can be used to illustrate the estimation of a confirmatory factor analysis model by multinormal full-information maximum-likelihood in the presence of missing data. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/sem/Tests.csv", "filename": "Tests", "name": " Six Mental Tests ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Backpack weights for a sample of college students ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Backpack.csv", "filename": "Backpack", "name": "Weights of College Student Backpacks", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from a first day class survey in an introductory statistics course \nAn instructor at a small liberal arts college distributed a data survey on the first day of class. The data for two different sections of the course are given in this dataset. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Day1Survey.csv", "filename": "Day1Survey", "name": "First Day Survey of Statistics Students", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Predicting first-year college GPA .\nThe data in FirstYearGPA contains information from a sample of 219 first year students at a midwestern college that might be used to build a model to predict their first year GPA. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FirstYearGPA.csv", "filename": "FirstYearGPA", "name": "First Year GPA for College Students", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Semester enrollments in mathematics courses \nTotal enrollments in mathematics courses at a small liberal arts college were obtained for each semester from Fall 2001 to Spring 2012. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MathEnrollment.csv", "filename": "MathEnrollment", "name": "Enrollments in Math Courses", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Results from a Math Placement exam at a liberal arts college \nScores and course results for students taking a math placement exam at a college. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MathPlacement.csv", "filename": "MathPlacement", "name": "Math Placement Exam Results", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Medical school admission status and information on GPA and standardized test scores \nThis dataset has information gathered on 55 medical school applicants from a liberal arts college in the Midwest. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/MedGPA.csv", "filename": "MedGPA", "name": "GPA and Medical School Admission", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Survey of political activity for Grinnell College students \nStudents Jennifer Wolfson and Meredith Goulet conducted a survey in the spring of 1992 of Grinnell College students to ascertain patterns of political behavior. They took a simple random sample of 60 students who were U.S. citizens and conducted phone interviews. Using several \"call backs\" they obtained 59 responses. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Political.csv", "filename": "Political", "name": "Political Behavior of College Students", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on firefighter promotion exams as part of the Ricci v. DeStafano court case \nThe city of New Haven, Connecticut administered exams (both written and oral) in November and December of 2003 to firefighters hoping to qualify for promotion to either Lieutenant or Captain in the city fire department. A final score consisting of a 60% weight for the written exam and a 40% weight for the oral exam was computed for each person who took the exam. For each person who took the exams, there are measurements on their race (black, white, or Hispanic), which position they were trying for (Lieutenant, Captain), scores on the oral and written exams, and the combined score. These data were used as part of a court case (Ricci v.DeStefano) dealing with racial discrimination ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Ricci.csv", "filename": "Ricci", "name": "Firefighter Promotion Exam Scores", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A sample of SAT scores and grade point averages for statistics students \nIn recent years many colleges have re-examined the traditional role the scores on the Scholastic Aptitude Tests (SAT's) play in making decisions on which students to admit. Do SAT scores really help predict success in college? To investigate this question a group of 24 introductory statistics students supplied the data in this dataset showing their score on the Verbal and Math portions of the SAT as well as their current grade point average (GPA) on a 0.0-4.0 scale. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SATGPA.csv", "filename": "SATGPA", "name": "SAT Scores and GPA", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Attitudes towards the Swahili language among Kenyan school children \nHamisi Babusa, a Kenyan scholar, administered a survey to 480 students from Pwani and Nairobi provinces about their attitudes towards the Swahili language. In addition, the students took an exam on Swahili. From each province, the students were from 6 schools (3 girls schools and 3 boys schools) with 40 students sampled at each school, so half of the students from each province were males and the other half females. The survey instrument contained 40 statements about attitudes towards Swahili and students rated their level of agreement to each. Of these questions, 30 were positive questions and the remaining 10 were negative questions. On an individual question the most positive response would be assigned a value of 5 while the most negative response would be assigned a value of 1. By summing (adding) the responses to each question, we can find an overall Attitude Score for each student. The highest possible score would be 200 (an individual who gave the most positive possible response to every question). The lowest possible score would be 40 (an individual who gave the most negative response to every question). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Swahili.csv", "filename": "Swahili", "name": "Attitudes Towards Swahili in Kenyan Schools", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Percentage of different types of words recalled \nOne hundred words were presented to each subject in a randomized order. The goal of the experiment was to see whether some kinds of words were easier to remember than others. In particular, are common words like potato, love, diet, and magazine easier to remember than less common words like manatee, hangnail, fillip, and apostasy? Are concrete words like coffee, dog, kale, and tamborine easier than abstract words like beauty, sympathy, fauna, and guile? There were 25 words each of four kinds, obtained by crossing the two factors of interest, Abstraction (concrete or abstract) and Frequency (common or rare). \nThis dataset appears in the first edition, but is not used in the second edition. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/WordMemory.csv", "filename": "WordMemory", "name": "Experiment on Word Memory", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains six sociomatrices of simulated data on friendship ties among schoolchildren.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Zelig/friendship.csv", "filename": "friendship", "name": "Simulated Example of Schoolchildren Friendship Network", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Education" } ] }, { "name": "Physics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Between May 1934 and July 1935, the National Bureau of Standards in Washington D.C. conducted a series of experiments to estimate the acceleration due to gravity, g, at Washington. Each experiment produced a number of replicate estimates of g using the same methodology. Although the basic method remained the same for all experiments, that of the reversible pendulum, there were changes in configuration. \nThe gravity data frame contains the data from all eight experiments. The grav data frame contains the data from the experiments 7 and 8. The data are expressed as deviations from 980.000 in centimetres per second squared. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/grav.csv", "filename": "grav", "name": " Acceleration Due to Gravity ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Between May 1934 and July 1935, the National Bureau of Standards in Washington D.C. conducted a series of experiments to estimate the acceleration due to gravity, g, at Washington. Each experiment produced a number of replicate estimates of g using the same methodology. Although the basic method remained the same for all experiments, that of the reversible pendulum, there were changes in configuration. \nThe gravity data frame contains the data from all eight experiments. The grav data frame contains the data from the experiments 7 and 8. The data are expressed as deviations from 980.000 in centimetres per second squared. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/gravity.csv", "filename": "gravity", "name": "Acceleration Due to Gravity ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The toycars data frame has 27 rows and 3 columns. Observations are on the distance traveled by one of three different toy cars on a smooth surface, starting from rest at the top of a 16 inch long ramp tilted at varying angles. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/toycars.csv", "filename": "toycars", "name": "Toy Cars Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The original data are from Draper and Smith (1966) and were used to determine the influence of anatomical factors on wood specific gravity, with five explanatory variables and an intercept. These data were contaminated by replacing a few observations with outliers. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/wood.csv", "filename": "wood", "name": "Modified Data on Wood Specific Gravity", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiment with a ball swirling thorough a funnel \nData from a class experiment to see where a steel ball was rolled through a plastic tube into a long plastic funnel. The angle of the funnel and the angle of the tube with respect to the flat table could be adjusted by changing the height of either (Funnel measured from the table, Tube measured from the top of the funnel). The ball rolls down the tube, then swirls around the funnel until dropping out at the bottom. Total trip time was measured with a stopwatch. Heights were adjusted after every two drops in a randomized order. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/FunnelDrop.csv", "filename": "FunnelDrop", "name": "Funnel Drop Times", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Mechanics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "PET film is used in electrical insulation. In this accelerated life test the failure times for 44 samples in gas insulated transformers. 4 different voltage levels were used. ", "description_url": "http://vincentarelbundock.github.io/Rdatasets/doc/boot/hirose.html", "url": "https://vincentarelbundock.github.io/Rdatasets/csv/boot/hirose.csv", "filename": "hirose", "name": " Failure Time of PET Film ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Voltage drop over time as a capacitor discharges \nA capacitor was charged with a nine-volt battery and then a voltmeter recorded the voltage as the capacitor was discharged. Measurements were taken every 0.02 seconds. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Volts.csv", "filename": "Volts", "name": "Voltage Drop for a Discharging Capacitor", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Electronics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The tau particle is a heavy electron-like particle discovered in the 1970's by Martin Perl at the Stanford Linear Accelerator Center. Soon after its production the tau particle decays into various collections of more stable particles. About 86% of the time the decay involves just one charged particle. This rate has been measured independently 13 times. \nThe one-charged-particle event is made up of four major modes of decay as well as a collection of other events. The four main types of decay are denoted rho, pi, e and mu. These rates have been measured independently 6, 7, 14 and 19 times respectively. Due to physical constraints each experiment can only estimate the composite one-charged-particle decay rate or the rate of one of the major modes of decay. \nEach experiment consists of a major research project involving many years work. One of the goals of the experiments was to estimate the rate of decay due to events other than the four main modes of decay. These are uncertain events and so cannot themselves be observed directly. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/boot/tau.csv", "filename": "tau", "name": " Tau Particle Decay Modes ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A classical data of Michelson (but not this one with Morley) on measurements done in 1879 on the speed of light. The data consists of five experiments, each consisting of 20 consecutive ‘runs’. The response is the speed of light measurement, suitably coded (km/sec, with 299000 subtracted). ", + "description": "A classical data of Michelson (but not this one with Morley) on measurements done in 1879 on the speed of light. The data consists of five experiments, each consisting of 20 consecutive \"runs\". The response is the speed of light measurement, suitably coded (km/sec, with 299000 subtracted). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/morley.csv", "filename": "morley", "name": "Michelson Speed of Light Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Annual averages of the daily sunspot areas (in units of millionths of a hemisphere) for the full sun. Sunspots are magnetic regions that appear as dark spots on the surface of the sun. The Royal Greenwich Observatory compiled daily sunspot observations from May 1874 to 1976. Later data are from the US Air Force and the US National Oceanic and Atmospheric Administration. The data have been calibrated to be consistent across the whole history of observations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/fpp2/sunspotarea.csv", "filename": "sunspotarea", "name": "Annual average sunspot area (1875-2015)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements made beteween 1675 and 1972 ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/gamclass/cvalues.csv", "filename": "cvalues", "name": " Historical speed of light measurements ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data frame Michelson gives Albert Michelson's measurements of the velocity of light in air, made from June 5 to July 2, 1879, reported in Michelson (1882). The given values + 299,000 are Michelson's measurements in km/sec. The number of cases is 100 and the \"true\" value on this scale is 734.5. \nStigler (1977) used these data to illustrate properties of robust estimators with real, historical data. For this purpose, he divided the 100 measurements into 5 sets of 20 each. These are contained in MichelsonSets. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Michelson.csv", "filename": "Michelson", "name": " Michelson's Determinations of the Velocity of Light ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Nuclear" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1833 J. F. W. Herschel published two papers in the Memoirs of the Royal Astronomical Society detailing his investigations of calculating the orbits of twin stars from observations of their relative position angle and angular distance. \nIn the process, he invented the scatterplot, and the use of visual smoothing to obtain a reliable curve that surpassed the accuracy of individual observations (Friendly & Denis, 2005). His data on the recordings of the twin stars γ Virginis provide an accessible example of his methods. \n", + "description": "In 1833 J. F. W. Herschel published two papers in the Memoirs of the Royal Astronomical Society detailing his investigations of calculating the orbits of twin stars from observations of their relative position angle and angular distance. \nIn the process, he invented the scatterplot, and the use of visual smoothing to obtain a reliable curve that surpassed the accuracy of individual observations (Friendly & Denis, 2005). His data on the recordings of the twin stars \u03b3 Virginis provide an accessible example of his methods. \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Virginis.csv", "filename": "Virginis", - "name": " John F. W. Herschel's Data on the Orbit of the Twin Stars γ Virginis ", - "number_format": 31, - "remove_quotes": true, + "name": " John F. W. Herschel's Data on the Orbit of the Twin Stars \u03b3 Virginis ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1833 J. F. W. Herschel published two papers in the Memoirs of the Royal Astronomical Society detailing his investigations of calculating the orbits of twin stars from observations of their relative position angle and angular distance. \nIn the process, he invented the scatterplot, and the use of visual smoothing to obtain a reliable curve that surpassed the accuracy of individual observations (Friendly & Denis, 2005). His data on the recordings of the twin stars γ Virginis provide an accessible example of his methods. ", + "description": "In 1833 J. F. W. Herschel published two papers in the Memoirs of the Royal Astronomical Society detailing his investigations of calculating the orbits of twin stars from observations of their relative position angle and angular distance. \nIn the process, he invented the scatterplot, and the use of visual smoothing to obtain a reliable curve that surpassed the accuracy of individual observations (Friendly & Denis, 2005). His data on the recordings of the twin stars \u03b3 Virginis provide an accessible example of his methods. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Virginis.interp.csv", "filename": "Virginis_interp", - "name": " John F. W. Herschel's Data on the Orbit of the Twin Stars γ Virginis ", - "number_format": 31, - "remove_quotes": true, + "name": " John F. W. Herschel's Data on the Orbit of the Twin Stars \u03b3 Virginis ", "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Energy output and surface termperature for Star Cluster CYG OB1. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/CYGOB1.csv", "filename": "CYGOB1", "name": " CYG OB1 Star Cluster Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on planets outside the Solar System. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/planets.csv", "filename": "planets", "name": "Exoplanets", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A numeric vector of velocities in km/sec of 82 galaxies from 6 well-separated conic sections of an unfilled survey of the Corona Borealis region. Multimodality in such surveys is evidence for voids and superclusters in the far universe. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/galaxies.csv", "filename": "galaxies", "name": " Velocities for 82 Galaxies ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements of the speed of light in air, made between 5th June and 2nd July, 1879. The data consists of five experiments, each consisting of 20 consecutive runs. The response is the speed of light in km/s, less 299000. The currently accepted value, on this scale of measurement, is 734.5. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/michelson.csv", "filename": "michelson", "name": " Michelson's Speed of Light Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A numeric vector giving the ‘Third Series’ of measurements of the passage time of light recorded by Newcomb in 1882. The given values divided by 1000 plus 24 give the time in millionths of a second for light to traverse a known distance. The ‘true’ value is now considered to be 33.02. ", + "description": "A numeric vector giving the \"Third Series\" of measurements of the passage time of light recorded by Newcomb in 1882. The given values divided by 1000 plus 24 give the time in millionths of a second for light to traverse a known distance. The \"true\" value is now considered to be 33.02. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/newcomb.csv", "filename": "newcomb", "name": " Newcomb's Measurements of the Passage Time of Light ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Boscovich data used to estimate the ellipticity of the earth. There are five measurements of the arc length of one degree of latitude taken at 5 different latitudes. See Koenker (2005) for further details and references. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/quantreg/Bosco.csv", "filename": "Bosco", "name": "Boscovich Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data for the Hertzsprung-Russell Diagram of the Star Cluster CYG OB1, which contains 47 stars in the direction of Cygnus, from C.Doom. The first variable is the logarithm of the effective temperature at the surface of the star (Te) and the second one is the logarithm of its light intencity (L/L_0). \nIn the Hertzsprung-Russell diagram, which is the scatterplot of these data points, where the log temperature is plotted from left to right, two groups of points are seen:\nthe majority which tend to follow a steep band and four stars in the upper corner. In the astronomy the 43 stars are said to lie on the main sequence and the four remaining stars are called “giants” (the points 11, 20, 30, 34). ", + "description": "Data for the Hertzsprung-Russell Diagram of the Star Cluster CYG OB1, which contains 47 stars in the direction of Cygnus, from C.Doom. The first variable is the logarithm of the effective temperature at the surface of the star (Te) and the second one is the logarithm of its light intencity (L/L_0). \nIn the Hertzsprung-Russell diagram, which is the scatterplot of these data points, where the log temperature is plotted from left to right, two groups of points are seen:\nthe majority which tend to follow a steep band and four stars in the upper corner. In the astronomy the 43 stars are said to lie on the main sequence and the four remaining stars are called \u201cgiants\u201d (the points 11, 20, 30, 34). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/starsCYG.csv", "filename": "starsCYG", "name": "Hertzsprung-Russell Diagram Data of Star Cluster CYG OB1", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Astronomy" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Temperature of a mug of water as it cools ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/CoolingWater.csv", "filename": "CoolingWater", "name": "CoolingWater", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data were collected by engineering students at Calvin College. The apparatus consists of concentric pipes insulated from the environment so that as nearly as can be managed the only heat exchange is between the hot and cold water. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/mosaicData/HeatX.csv", "filename": "HeatX", "name": "Data from a heat exchanger laboratory", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains the measurements concerning the cloud point of a Liquid, from Draper and Smith (1969). The cloud point is a measure of the degree of crystallization in a stock. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/cloud.csv", "filename": "cloud", "name": "Cloud point of a Liquid", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "Thermodynamics" } ] }, { "name": "Chemistry", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data give the chemical composition of ancient pottery found at four sites in Great Britain. They appear in Hand, et al. (1994), and are used to illustrate MANOVA in the SAS Manual. (Suggested by Michael Friendly.) ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Pottery.csv", "filename": "Pottery", "name": "Chemical Composition of Pottery", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Soil characteristics were measured on samples from three types of contours (Top, Slope, and Depression) and at four depths (0-10cm, 10-30cm, 30-60cm, and 60-90cm). The area was divided into 4 blocks, in a randomized block design. (Suggested by Michael Friendly.) ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/carData/Soils.csv", "filename": "Soils", "name": "Soil Compositions of Physical and Chemical Characteristics", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The pluton data frame has 45 rows and 4 columns, containing percentages of isotopic composition of 45 Plutonium batches. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/cluster/pluton.csv", "filename": "pluton", "name": "Isotopic Composition Plutonium Batches", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ironslag data frame has 53 rows and 2 columns. Two methods for measuring the iron content in samples of slag were compared, a chemical and a magnetic method. The chemical method requires greater effort than the magnetic method. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/ironslag.csv", "filename": "ironslag", "name": "Iron Content Measurements", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The BOD data frame has 6 rows and 2 columns giving the biochemical oxygen demand versus time in an evaluation of water quality. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/BOD.csv", "filename": "BOD", "name": " Biochemical Oxygen Demand ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data are from a chemical experiment to prepare a standard curve for the determination of formaldehyde by the addition of chromatropic acid and concentrated sulphuric acid and the reading of the resulting purple color on a spectrophotometer. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Formaldehyde.csv", "filename": "Formaldehyde", "name": "Determination of Formaldehyde", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Indometh data frame has 66 rows and 3 columns of data on the pharmacokinetics of indometacin (or, older spelling, ‘indomethacin’). ", + "description": "The Indometh data frame has 66 rows and 3 columns of data on the pharmacokinetics of indometacin (or, older spelling, \"indomethacin\"). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Indometh.csv", "filename": "Indometh", "name": "Pharmacokinetics of Indomethacin", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the relation between temperature in degrees Celsius and vapor pressure of mercury in millimeters (of mercury). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/pressure.csv", "filename": "pressure", "name": "Vapor Pressure of Mercury as a Function of Temperature", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Puromycin data frame has 23 rows and 3 columns of the reaction velocity versus substrate concentration in an enzymatic reaction involving untreated cells or cells treated with Puromycin. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Puromycin.csv", "filename": "Puromycin", "name": "Reaction Velocity of an Enzymatic Reaction", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Theoph data frame has 132 rows and 5 columns of data from an experiment on the pharmacokinetics of theophylline.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/datasets/Theoph.csv", "filename": "Theoph", "name": "Pharmacokinetics of Theophylline", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment where the chemicals acifluorfen and diquat tested on Lemna minor. The dataset has 7 mixtures used in 8 dilutions with three replicates and 12 common controls, in total 180 observations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/acidiq.csv", "filename": "acidiq", "name": "Acifluorfen and diquat tested on Lemna minor.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The two decontaminants 1-hexadecylpyridium chloride and oxalic acid were used. Additionally there was a control group (coded as concentration 0 and only included under oxalic acid). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/decontaminants.csv", "filename": "decontaminants", "name": " Performance of decontaminants used in the culturing of a micro-organism ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Relative growth rate in biomass of mixed sewage microorganisms (per hour) as a function of increasing concentrations of the antibiotic erythromycin (mg/l). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/etmotc.csv", "filename": "etmotc", "name": "Effect of erythromycin on mixed sewage microorganisms", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For each of six concentration of an insecticid the number of insects affected (out of the number of insects) was recorded. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/finney71.csv", "filename": "finney71", "name": "Example from Finney (1971)", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset has 7 mixtures, 8 dilutions, two replicates and 5 common control controls. Four observations are missing, giving a total of 113 observations. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/glymet.csv", "filename": "glymet", "name": "Glyphosate and metsulfuron-methyl tested on algae.", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Estimation of the degradation profile of an agrochemical based on soil samples at depth 0-10cm from a calibration experiment.", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/lepidium.csv", "filename": "lepidium", "name": "Dose-response profile of degradation of agrochemical using lepidium", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from a study of the response of the cyanobacterial self-luminescent metallothionein-based whole-cell biosensor Synechoccocus elongatus PCC 7942 pBG2120 to binary mixtures of 6 heavy metals (Zn, Cu, Cd, Ag, Co and Hg). ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/metals.csv", "filename": "metals", "name": " Data from heavy metal mixture experiments ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Estimation of the degradation profile of an agrochemical based on soil samples at depth 0-10cm from a calibration experiment. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/nasturtium.csv", "filename": "nasturtium", "name": "Dose-response profile of degradation of agrochemical using nasturtium", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Comparison of toxicity of four types of selenium by means of dose-response analysis ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/drc/selenium.csv", "filename": "selenium", "name": " Data from toxicology experiments with selenium ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "a cross-section \nnumber of observations : 27 \nobservation : regional \ncountry : United States \n", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Metal.csv", "filename": "Metal", "name": "Production for SIC 33 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment investigating the use of massive amounts of silver iodide (100 to 1000 grams per cloud) in cloud seeding to increase rainfall. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/clouds.csv", "filename": "clouds", "name": " Cloud Seeding Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Plasma inorganic phosphate levels from 33 subjects. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/phosphate.csv", "filename": "phosphate", "name": " Phosphate Level Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Chemical composition of Romano-British pottery. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/pottery.csv", "filename": "pottery", "name": " Romano-British Pottery Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Engine exhaust fumes from burning ethanol ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lattice/ethanol.csv", "filename": "H_ethanol", "name": " Engine exhaust fumes from burning ethanol ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Strength of a chemical paste product; its quality depending on the delivery batch, and the cask within the delivery. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/lme4/Pastes.csv", "filename": "Pastes", "name": "Paste strength by batch and cask", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A numeric vector of 24 determinations of copper in wholemeal flour, in parts per million. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/chem.csv", "filename": "chem", "name": " Copper in Wholemeal Flour ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Seven specimens were sent to 6 laboratories in 3 separate batches and each analysed for Analyte. Each analysis was duplicated. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/coop.csv", "filename": "coop", "name": " Co-operative Trial in Analytical Chemistry ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The fgl data frame has 214 rows and 10 columns. It was collected by B. German on fragments of glass collected in forensic work. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/fgl.csv", "filename": "fgl", "name": " Measurements of Forensic Glass Fragments ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The stormer viscometer measures the viscosity of a fluid by measuring the time taken for an inner cylinder in the mechanism to perform a fixed number of revolutions in response to an actuating weight. The viscometer is calibrated by measuring the time taken with varying weights while the mechanism is suspended in fluids of accurately known viscosity. The data comes from such a calibration, and theoretical considerations suggest a nonlinear relationship between time, weight and viscosity, of the form Time = (B1*Viscosity)/(Weight - B2) + E where B1 and B2 are unknown parameters to be estimated, and E is error. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/stormer.csv", "filename": "stormer", "name": " The Stormer Viscometer Data ", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The solubility of alcohols in water is important in understanding alcohol transport in living organisms. This dataset from (Romanelli et al., 2001) contains physicochemical characteristics of 44 aliphatic alcohols. The aim of the experiment was the prediction of the solubility on the basis of molecular descriptors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/alcohol.csv", "filename": "alcohol", "name": "Alcohol Solubility in Water Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data on the Calibration of an Instrument that Measures Lactic Acid Concentration in Blood, from Afifi and Azen (1979) - comparing the true concentration X with the measured value Y. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/lactic.csv", "filename": "lactic", "name": "Lactic Acid Concentration Measurement Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Daudin et al.(1988) give 8 readings on the composition of 86 containers of milk. They speak about 85 observations, but this can be explained with the fact that observations 63 and 64 are identical (as noted by Rocke (1996)). \nThe data set was used for analysing the stability of principal component analysis by the bootstrap method. In the same context, but using high breakdown point robust PCA, these data were analysed by Todorov et al. (1994). Atkinson (1994) used these data for ilustration of the forward search algorithm for identifying of multiple outliers. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/milk.csv", "filename": "milk_", "name": "Daudin's Milk Composition Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The aim of the experiment was to predict the toxicity of carboxylic acids on the basis of several molecular descriptors. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/robustbase/toxicity.csv", "filename": "toxicity", "name": "Toxicity of Carboxylic Acids Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiment on the effects of oxygen on sugar metabolism by bacteria \nExperiment on the effects of oxygen on sugar metabolism by bacteria \nMany biochemical reactions are slowed or prevented by the presence of oxygen. For example, there are two simple forms of fermentation, one which converts each molecule of sugar to two molecules of lactic acid, and a second which converts each molecule of sugar to one each of lactic acid, ethanol, and carbon dioxide. This experiment was designed to compare the inhibiting effect of oxygen on the metabolism of two different sugars, glucose and galactose, by Streptococcus bacteria. In this case there were four levels of oxygen that were applied to the two kinds of sugar. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Ethanol.csv", "filename": "Ethanol", "name": "Effects of Oxygen on Sugar Metabolism", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data from an experiment on calcium binding to proteins \nSuzanne Rohrback used a novel approach in a series of experiments to examine calcium binding proteins. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/Fluorescence.csv", "filename": "Fluorescence", "name": "Measuring Calcium Binding to Proteins", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Concentrations of elements in river water samples from upstate NY \nSome geologists were interested in the water chemistry of rivers in upstate New York. They took water samples at three different locations in four rivers (Grasse, Oswegatchie, Raquette, and St. Regis). The sampling sites were chosen to investigate how the composition of the water changes as it flows from the source to the mouth of each river. The sampling sites were labeled as upstream, midstream, and downstream. This dataset contains the concentrations (parts per million) of a variety of elements in those water samples. The dataset RiverIron contains the information for iron (FE) alone, along with the log of the concentration. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/RiverElements.csv", "filename": "RiverElements", "name": "Elements in River Water Samples", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Experiment on the effects of oxygen on sugar metabolism by bacteria \nMany biochemical reactions are slowed or prevented by the presence of oxygen. For example, there are two simple forms of fermentation, one which converts each molecule of sugar to two molecules of lactic acid, and a second which converts each molecule of sugar to one each of lactic acid, ethanol, and carbon dioxide. This experiment was designed to compare the inhibiting effect of oxygen on the metabolism of two different sugars, glucose and galactose, by Streptococcus bacteria. In this case there were four levels of oxygen that were applied to the two kinds of sugar. ", "url": "http://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/SugarEthanol.csv", "filename": "SugarEthanol", "name": "Effects of Oxygen on Sugar Metabolism", - "number_format": 31, - "remove_quotes": true, "separator": ",", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true } ], "name": "General" } ] } ] } diff --git a/data/datasets/Socrata.json b/data/datasets/Socrata.json index c7a5ea9d4..25fd6241d 100644 --- a/data/datasets/Socrata.json +++ b/data/datasets/Socrata.json @@ -1,1090 +1,614 @@ { "name": "Socrata", "categories": [ { "name": "Statistics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Since 1995, the White House has been required to deliver a report to Congress listing the title and salary of every White House Office employee. Consistent with President Obama's commitment to transparency, this report is being publicly disclosed on our website as it is transmitted to Congress. In addition, this report also contains the title and salary details of administration officials who work at the Office of Policy Development, including the Domestic Policy Council and the National Economic Council -- along with White House Office employees.", "url": "https://opendata.socrata.com/api/views/jv7a-cjdv/rows.csv?accessType=DOWNLOAD", "filename": "rows", "name": "2012 Annual Report to Congress on White House Staff", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Since 1995, the White House has been required to deliver a report to Congress listing the title and salary of every White House Office employee. Consistent with President Obama's commitment to transparency, this report is being publicly disclosed on our website as it is transmitted to Congress. In addition, this report also contains the title and salary details of administration officials who work at the Office of Policy Development, including the Domestic Policy Council and the National Economic Council -- along with White House Office employees. Note: Salaries listed do not reflect salary reductions staff have taken due to furloughs and commissioned officer salary reductions.", "url": "https://opendata.socrata.com/api/views/44xn-rs2p/rows.csv?accessType=DOWNLOAD", "filename": "Report-Congress", "name": "2013 Report to Congress on White House Staff", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "All employees of the State of South Carolina that make at least $50,000 per year. Imported from the South Carolina Budget and Control Board website.", "url": "https://opendata.socrata.com/api/views/67f6-9d58/rows.csv?accessType=DOWNLOAD", "filename": "State-Employee-Salary", "name": "South Carolina State Employee Salary Database", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An LNP analysis of Pennsylvania Liquor Control Board data found that nearly 700 townships and boroughs – about 27 percent of the state’s 2,562 local governments – ban the sale of beer and liquor altogether or are at least dry in some form.", + "description": "An LNP analysis of Pennsylvania Liquor Control Board data found that nearly 700 townships and boroughs - about 27 percent of the state's 2,562 local governments - ban the sale of beer and liquor altogether or are at least dry in some form.", "url": "https://opendata.socrata.com/api/views/vr4q-nrmd/rows.csv?accessType=DOWNLOAD", "filename": "Dry-Municipalities", "name": "Pennsylvania's Dry Municipalities", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Six Utah counties reported more registered voters than adults in 2010.", "url": "https://opendata.socrata.com/api/views/jinf-pspj/rows.csv?accessType=DOWNLOAD", "filename": "County", "name": "County comparison", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "EMS Incidents Projections ", "url": "https://opendata.socrata.com/api/views/evnp-32vr/rows.csv?accessType=DOWNLOAD", "filename": "Incidents", "name": "EMS Incidents Projections ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "WBEZ FOIA-obtained contact cards for the 19th District of the Chicago Police Department for 2012 and 2013 year to date.", "url": "https://opendata.socrata.com/api/views/6gqi-4u4s/rows.csv?accessType=DOWNLOAD", "filename": "Contact-Cards", "name": "Chicago's 19th District Contact Cards For 2012-2013", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Index of U.S. Government Departments and Agencies", "url": "https://opendata.socrata.com/api/views/2yen-rixh/rows.csv?accessType=DOWNLOAD", "filename": "Government-Departments", "name": "Index of U.S. Government Departments and Agencies", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "OCCC Faculty Staff Directory", "url": "https://opendata.socrata.com/api/views/bzra-p8dk/rows.csv?accessType=DOWNLOAD", "filename": "Faculty-Staff-Directory", "name": "OCCC Faculty Staff Directory", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Since 1995, the White House has been required to deliver a report to Congress listing the title and salary of every White House Office employee. Consistent with President Obama's commitment to transparency, this report is being publicly disclosed on our website as it is transmitted to Congress. In addition, this report also contains the title and salary details of administration officials who work at the Office of Policy Development, including the Domestic Policy Council and the National Economic Council -- along with White House Office employees. Note: Salaries listed do not reflect salary reductions staff have taken due to furloughs and commissioned officer salary reductions.", "url": "https://opendata.socrata.com/api/views/ib7r-prw9/rows.csv?accessType=DOWNLOAD", "filename": "White-House-Staff", "name": "Name, Salary and Position of the 2013 White House Staff ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Which countries have sent how many troups to Afghanistan.", "url": "https://opendata.socrata.com/api/views/xjiz-z25n/rows.csv?accessType=DOWNLOAD", "filename": "Troops-In-Afghanistan", "name": "Troops In Afghanistan By Country", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Administration" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Pennsylvania State System of Higher Education salaries for 2013.", "url": "https://opendata.socrata.com/api/views/26jq-uk2i/rows.csv?accessType=DOWNLOAD", "filename": "Salaries_", "name": "2013 Salaries: Pennsylvania State System of Higher Education", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "SBA Portfolio Performance by Franchise Code - Data as of 09/30/2011 \nCombined 7(a) & 504 loan performance data based on loans approved between 10/01/2001 and 09/30/2011, which were designated with a franchise code* and \nsubsequently disbursed. This list is redacted to 10 or more disbursements per franchising brand.", "url": "https://opendata.socrata.com/api/views/5qh7-7usu/rows.csv?accessType=DOWNLOAD", "filename": "Failureby", "name": "Franchise Failureby Brand2011", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "City Of Trenton - 2017 Certified Tax List", "url": "https://opendata.socrata.com/api/views/azkq-6qqc/rows.csv?accessType=DOWNLOAD", "filename": "Certified-Tax", "name": "City Of Trenton - 2017 Certified Tax List", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "City Of Trenton - 2015 Certified Tax List", "url": "https://opendata.socrata.com/api/views/f3qm-jqfs/rows.csv?accessType=DOWNLOAD", "filename": "Trenton-Tax", "name": "City Of Trenton - 2015 Certified Tax List", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data show salaries for public school teachers, administrators and other officials in Lancaster County for the 2012-2013 academic year. The data are published by the Pennsylvania Department of Education.", "url": "https://opendata.socrata.com/api/views/mpsw-g7js/rows.csv?accessType=DOWNLOAD", "filename": "School-Salaries", "name": "Lancaster County School Salaries ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NSA Water and money", "url": "https://opendata.socrata.com/api/views/3rks-twmf/rows.csv?accessType=DOWNLOAD", "filename": "Water-money", "name": "NSA Water Jan 2015 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An export of all the employees of local governments in the Midlands area of South Carolina that make at least $50,000 per year. Scraped from the database made available thanks to The State newspaper.\n\nThe government agencies included:\n\nCounties: Kershaw, Lexington, Richland\n\nLibraries: Richland County\n\nMunicipalities: Batesburg-Leesville, Blythewood, Camden, Cayce, Columbia, Forest Acres, Irmo, Lexington, West Columbia\n\nSchool Districts: Kershaw, Lexington 1, Lexington 2, Lexington 3, Lexington 4, Lexington-Richland 5, Richland 1, Richland 2", "url": "https://opendata.socrata.com/api/views/weqv-6vsn/rows.csv?accessType=DOWNLOAD", "filename": "Salary-Database", "name": "South Carolina Midlands Employee Salary Database", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Personal financial disclosure statements for nearly 225 Texas elected officials, including Gov. Rick Perry, the Texas House and Senate, Supreme Court, Railroad Commission and State Board of Education. The financial statements cover calendar year 2012 and are being made available in this database because the Texas Legislature has snuffed out efforts to make them available online for the public to easily view.", "url": "https://opendata.socrata.com/api/views/k2dw-gwje/rows.csv?accessType=DOWNLOAD", "filename": "Financial-Statements", "name": "Texas Lawmaker Personal Financial Statements For CY 2012-2", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Car Sales from California and across the United States, 1996-Present.", "url": "https://opendata.socrata.com/api/views/da8m-smts/rows.csv?accessType=DOWNLOAD", "filename": "Car-Sales", "name": "Car Sales Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Asset-based lending deals", "url": "https://opendata.socrata.com/api/views/8it9-ht43/rows.csv?accessType=DOWNLOAD", "filename": "lending-deals", "name": "Deal Table Live", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "All the Target store locations in the US", "url": "https://opendata.socrata.com/api/views/4mte-zfws/rows.csv?accessType=DOWNLOAD", "filename": "Target", "name": "Target Stores in USA", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Asset-based lending deals", "url": "https://opendata.socrata.com/api/views/j4w7-jnxu/rows.csv?accessType=DOWNLOAD", "filename": "TLS", "name": "TSL-NewSiteDeals", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Utah - Statewide Occupational Wages Hourly Wage Information by Occupational Title Published May, 2008 Below is a summary of the wage estimates for the state of Utah, which are based on data gathered in May of 2007. Occupations are listed alphabetically along with the occupation code, which is a unique number assigned to each occupation. Wages for occupations in education are at the end of the wage table. Wages are presented as: the Inexperienced Wage (the average of the bottom-third of all wages collected), Average Wage, Median Wage (which is the wage of the middle worker in the survey where half of the workers earned wages below the median and half above the median), and the Middle Range (which represents the wages of the middle fifty percent of workers in each occupation). Questions may be addressed to Mark Knold (801) 526-9458, Chief Economist for the Utah Department of Workforce Services.", "url": "https://opendata.socrata.com/api/views/us9r-qb2p/rows.csv?accessType=DOWNLOAD", "filename": "Average-Wage", "name": "Utah Average Wage For Hourly Workers", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset provides summarized expenditure data for State of Iowa. The state fiscal year runs from July 1 to the following June 30 and is numbered for the calendar year in which it ends. The State of Iowa operates on a modified accrual basis which provides that encumbrances on June 30 must be paid within 60 days after year end. The expenditures are summarized by Fiscal Year, Month, Fund, Appropriation, Department, Unit, and Object Class.", "url": "https://opendata.socrata.com/api/views/dqqm-s2r5/rows.csv?accessType=DOWNLOAD", "filename": "Iowa-Expenditures", "name": "State of Iowa Expenditures", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An export of all Starbucks locations in the world. This dataset was scraped from the Starbucks website on Thursday, November 8th, 2013.", "url": "https://opendata.socrata.com/api/views/nt5z-pju4/rows.csv?accessType=DOWNLOAD", "filename": "Starbucks-Locations", "name": "All Starbucks Locations in the World - Heat Map", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Volkswagen US TDI Production Data for Non-Passat Common Rail TDIs through 11-30-2012 \nCollected from the NHTSA Defect Investigation EA11003 http://www-odi.nhtsa.dot.gov/cars/problems/defect/results.cfm?action_number=EA11003&SearchType=QuickSearch&summary=true", "url": "https://opendata.socrata.com/api/views/f2gv-p6q4/rows.csv?accessType=DOWNLOAD", "filename": "vijay", "name": "Volkswagen US TDI Production Data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An export of all the employees of the City of Columbia, South Carolina, that make at least $50,000 per year. Scraped from the database made available thanks to The State newspaper.", "url": "https://opendata.socrata.com/api/views/86mz-659b/rows.csv?accessType=DOWNLOAD", "filename": "Employee-Salaries", "name": "City of Columbia Employee Salaries ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Michigan Oil and Gas Wells", "url": "https://opendata.socrata.com/api/views/ixt6-qcyf/rows.csv?accessType=DOWNLOAD", "filename": "Gas-Wells", "name": "MIOil And Gas Wells", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "NSA Water and money", "url": "https://opendata.socrata.com/api/views/k72b-hbra/rows.csv?accessType=DOWNLOAD", "filename": "NSAWaterBill", "name": "NSAWaterBill ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Links to sites that offer discounts to government employees.", "url": "https://opendata.socrata.com/api/views/3592-x988/rows.csv?accessType=DOWNLOAD", "filename": "Govenment-Employee-Discounts", "name": "Govenment Employee Discounts", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Asset-based lending deals", "url": "https://opendata.socrata.com/api/views/cbha-gzag/rows.csv?accessType=DOWNLOAD", "filename": "Deal-Table-Full", "name": "Deal Table Full ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Economics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A list of the Guardian's \"Top 1,000 Songs to Hear Before You Die\"", "url": "https://opendata.socrata.com/api/views/ed74-c6ni/rows.csv?accessType=DOWNLOAD", "filename": "Top-1000", "name": "Top 1,000 Songs To Hear Before You Die", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "An export of all Starbucks locations in the world. This dataset was scraped from the Starbucks website. \n\nUnfortunately it is no longer updated, the API I was using has been discontinued.", "url": "https://opendata.socrata.com/api/views/xy4y-c4mk/rows.csv?accessType=DOWNLOAD", "filename": "All-Starbucks", "name": "All Starbucks Locations in the World", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Should polygamy be decriminalized in Utah?", "url": "https://opendata.socrata.com/api/views/yk6u-pg2n/rows.csv?accessType=DOWNLOAD", "filename": "TotalMaleFemale", "name": "Should polygamy be decriminalized in Utah?", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Should polygamy be decriminalized in Utah?", "url": "https://opendata.socrata.com/api/views/kxif-we5g/rows.csv?accessType=DOWNLOAD", "filename": "Agebreakdown", "name": "Should polygamy be decriminalized in Utah?", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "U.S. Dept. of Transportation", "url": "https://opendata.socrata.com/api/views/y3ea-t8vj/rows.csv?accessType=DOWNLOAD", "filename": "Bridges", "name": "Washington's structurally deficient bridges", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sample - Superstore Subset ( Excel)", "url": "https://opendata.socrata.com/api/views/2dgv-cxpb/rows.csv?accessType=DOWNLOAD", "filename": "Superstore-Subset", "name": "Sample - Superstore Subset ( Excel)", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The Juvenile Justice Information Exchange has compiled five years of federal grants from the Office of Juvenile Justice and Delinquency Prevention. Search and filter the individual formula, block, and project awards awarded by the OJJDP over the last five years. \n\n(Note about the database: Because some data on specific grants was not made publicly available by OJJDP, the total amount of the grants in this database is about $113 million less than what OJJDP has reported. For example, the OJJDP website recorded giving $88 million in earmarks and $921,000 in recovery funds in 2010, but did not include the specific amounts given to each recipient of these grants).", "url": "https://opendata.socrata.com/api/views/43x8-khei/rows.csv?accessType=DOWNLOAD", "filename": "OJJDP-Awards", "name": "OJJDP Grant Awards 2009-2013", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset includes the most significant product actions over the last five years based on the extent of distribution and the degree of health risk. In this section, you will find a listing of FDA and industry press releases regarding the product recalls. \nThe recalls on the list are mainly Class I. A record of all recalls (Class I, II, and III) can be found in the FDA Enforcement Report10. See also Definitions of Class I, II, and III recalls11.", "url": "https://opendata.socrata.com/api/views/nfrv-axb5/rows.csv?accessType=DOWNLOAD", "filename": "Name", "name": "Name ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Location of graves in Hessenford Churchyard", "url": "https://opendata.socrata.com/api/views/7nsh-qace/rows.csv?accessType=DOWNLOAD", "filename": "Hessenford-Graveyard", "name": "Hessenford Graveyard", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A list of the Guardian's \"Top 1,000 Songs to Hear Before You Die\"", "url": "https://opendata.socrata.com/api/views/479k-3ibx/rows.csv?accessType=DOWNLOAD", "filename": "LoveSongs", "name": "LoveSongs", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Full history of airplane crashes throughout the world, from 1908-present.", "url": "https://opendata.socrata.com/api/views/q2te-8cvq/rows.csv?accessType=DOWNLOAD", "filename": "Airplane-Crashes", "name": "Airplane Crashes and Fatalities Since 1908", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Travel" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Births in Hildale, Utah, and Colorado City, Ariz., have declined over a decade, according to data from the respective state health departments.", "url": "https://opendata.socrata.com/api/views/k3x5-s922/rows.csv?accessType=DOWNLOAD", "filename": "Short-Creek", "name": "Short Creek births 2005-2013 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "2012 Workplace Fatalities by State", "url": "https://opendata.socrata.com/api/views/vcx3-xxtb/rows.csv?accessType=DOWNLOAD", "filename": "Workplace-Fatalities", "name": "2012 Workplace Fatalities by State", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Hours-based workplace fatality rates.", "url": "https://opendata.socrata.com/api/views/dq7n-btuf/rows.csv?accessType=DOWNLOAD", "filename": "Fatal-Work", "name": "Rate of Fatal Work Injuries per 100,000 Workers, 2006-2012 ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Latino Worker Fatalities, 1995-2012 ", "url": "https://opendata.socrata.com/api/views/sqfx-68bm/rows.csv?accessType=DOWNLOAD", "filename": "Latino-Work-Fat", "name": "Latino Worker Fatalities", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Population totals across towns and counties in Maine.", "url": "https://opendata.socrata.com/api/views/ndfi-nmyu/rows.csv?accessType=DOWNLOAD", "filename": "Maine-Population-By-Town", "name": "Maine Population By Town", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Estimated populations of cities for each year since the 2000 census", "url": "https://opendata.socrata.com/api/views/b6q7-qg3s/rows.csv?accessType=DOWNLOAD", "filename": "City-Populations", "name": "City Populations", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Demographics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The World Health Organization (WHO)'s breakdown of per capita alcohol consumption among adults over 15.", "url": "https://opendata.socrata.com/api/views/hj43-2bpj/rows.csv?accessType=DOWNLOAD", "filename": "Alcohol-Cons", "name": "Alcohol Consumption Per Country", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Population" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A table of data from Sandusky area schools, listing scores for 2012-2013 state report cards.", "url": "https://opendata.socrata.com/api/views/7w3m-4vn3/rows.csv?accessType=DOWNLOAD", "filename": "Report-Card", "name": "Local District Report Card Data Chart ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data is a 'mashup' of OSSE's DC CAS: http://osse.dc.gov/release/mayor-vincent-c-gray-announces-2012-dc-cas-results, 'Scrape' of DCPS for Title 1: http://dcps.dc.gov/portal/site/DCPS/menuitem.06de50edb2b17a932c69621014f62010/?vgnextoid=253bcdb2eaf52210VgnVCM100000416f0201RCRD&vgnextchannel=464afa5c266d1210VgnVCM100000b912010aRCRD, and a compilation of Librarian headcount for DCPS schools. It has not been verified by OSSE or DCPS officials for accuracy.", "url": "https://opendata.socrata.com/api/views/cugb-wvbg/rows.csv?accessType=DOWNLOAD", "filename": "Overall-Proficiency", "name": "By Overall Proficiency", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "DCPS with < 1% improvement", "url": "https://opendata.socrata.com/api/views/iyh7-tkme/rows.csv?accessType=DOWNLOAD", "filename": "DCPS-improvement", "name": "DCPS with < 1% improvement", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "DCPS by 2012 Reading ", "url": "https://opendata.socrata.com/api/views/a5ax-qw2c/rows.csv?accessType=DOWNLOAD", "filename": "DCPS-Reading", "name": "DCPS by 2012 Reading ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "DCPS by Proficienc", "url": "https://opendata.socrata.com/api/views/cvsh-34pj/rows.csv?accessType=DOWNLOAD", "filename": "DCPS-Proficienc", "name": " DCPS by Proficienc", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Staff and Faculty list for Oregon Coast Community College", "url": "https://opendata.socrata.com/api/views/h4zk-vnka/rows.csv?accessType=DOWNLOAD", "filename": "Faculty-Staff-List", "name": "Faculty Staff List", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "2014 Pennsylvania Public School Profile Scores ", "url": "https://opendata.socrata.com/api/views/69zb-px69/rows.csv?accessType=DOWNLOAD", "filename": "2014-Pennsylvania-Public-School-Profile-Scores", "name": "2014 Pennsylvania Public School Profile Scores ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Avg math and reading in 2012 with and without librarians", "url": "https://opendata.socrata.com/api/views/3zkh-39md/rows.csv?accessType=DOWNLOAD", "filename": "Avg-math-and-reading-in-2012-with-and-without-librarians", "name": "Avg math and reading in 2012 with and without librarians", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Break out of how DCPS students spend their days, per DCPS Calendar http://dcps.dc.gov/DCPS/calendars + scheduled Paced Interim Assessment (PAIS) and DC CAS hours. Note, Lunch/Recess is counted for all days (instructional 145 + testing 37) and DCPS Budget Development Guide http://dcps.dc.gov/DCPS/Files/downloads/ABOUT%20DCPS/Budget%20-%20Finance/FINAL_DCPS%20Budget%20Development%20Guide_032813.pdf.", "url": "https://opendata.socrata.com/api/views/r2yv-d4i3/rows.csv?accessType=DOWNLOAD", "filename": "Percentage-of-time-DCPS-students-spend-by-activity", "name": "Percentage of time DCPS students spend by activity", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Education" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "US House of Representatives--Winners Raw Vote 2012", "url": "https://opendata.socrata.com/api/views/cgpm-2jt7/rows.csv?accessType=DOWNLOAD", "filename": "House-of-Representative", "name": "US House of Representatives--Winners Raw Vote 2012", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "9th Presidential Election in Iran (First Round)", "url": "https://opendata.socrata.com/api/views/f2me-8989/rows.csv?accessType=DOWNLOAD", "filename": "9th-Presidential-Election-in-Iran", "name": "9th Presidential Election in Iran (First Round)", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "8th Presidential Election in Iran ", "url": "https://opendata.socrata.com/api/views/b3wr-24m8/rows.csv?accessType=DOWNLOAD", "filename": "8th-Presidential-Election-in-Iran", "name": "8th Presidential Election in Iran ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "5th Presidential Election in Iran", "url": "https://opendata.socrata.com/api/views/aa6m-g5ha/rows.csv?accessType=DOWNLOAD", "filename": "5th-Presidential-Election-in-Iran", "name": "5th Presidential Election in Iran", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Politics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This is a database of EPA Enforcement and Compliance History Online facility violation records, as of July 10, 2014. It will be updated quarterly.", "url": "https://opendata.socrata.com/api/views/r65z-8y77/rows.csv?accessType=DOWNLOAD", "filename": "Mini-EPA-Violators", "name": "Mini-EPA Violators Database", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "BYUpolicegraphic ", "url": "https://opendata.socrata.com/api/views/g8fw-hj8p/rows.csv?accessType=DOWNLOAD", "filename": "BYUpolicegraphic", "name": "BYUpolicegraphic ", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Traffic violations from the State of New Jersey. \nThese were all scraped for analysis from the now defunct peopleviolations.com. \nThe fields are: \nRow ID \nViolator First Name \nViolator Last Name \nViolator's Home City \nOffense Committed \nCity Offense Committed In \nUnix Timestamp when record was added to scraped database \nUnique hash representing this record \nThe first 3 letters of the violator's last name that were used for display \nThe Source URL this record was scraped from", "url": "https://opendata.socrata.com/api/views/7f6e-xkd7/rows.csv?accessType=DOWNLOAD", "filename": "New-Jersey-Traffic-Violations", "name": "New Jersey Traffic Violations - Most Popular Violator Home Cities", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Crime" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Almost 2 million (1,840,647) user profiles extracted from the Last.fm API around Christmas, 2012.", "url": "https://opendata.socrata.com/api/views/5vvd-truf/rows.csv?accessType=DOWNLOAD", "filename": "LastFM-Users", "name": "Two Million LastFM User Profiles", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Technology" } ] }, { "name": "Medicine", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This represents the EPA's 2002 national-scale assessment estimates across the United States plus Puerto Rico, the Virgin Islands, and the District of Columbia using 2002 national air toxics emission inventory as input to an air dispersion model (ASPEN model), and an inhalation exposure model (HAPEM5). Exposure modeling is an important step in this assessment because it takes into account that people move from one location to another, (e.g., from outside environments to inside environments). Originally from http://www.epa.gov/ttn/atw/nata2002/tables.html. All risks are per million residents.", "url": "https://opendata.socrata.com/api/views/he9m-h7tf/rows.csv?accessType=DOWNLOAD", "filename": "EPA-Toxic-Air-Pollutant-Cancer-Risk-by-County", "name": "EPA Toxic Air Pollutant Cancer Risk by County", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "name": "Oncology" } ] } ] } diff --git a/data/datasets/StatLib.json b/data/datasets/StatLib.json index c9b9e1a6f..c8a0875f1 100644 --- a/data/datasets/StatLib.json +++ b/data/datasets/StatLib.json @@ -1,710 +1,415 @@ { - "name": "StatLib", + "name": "StatLib", "categories": [ { "name": "Medicine", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch14.dat contains the following 19 variables:\n\nPatient ID \nDate on study (MMDDYY)\nTreatment arm (D= daunorubicin, I= idarubicin)\nSex (M= male, F= female)\nAge (years)\nFAB classification (1 - 6)\nKarnofsky score (0 - 100) \nBaseline white blood cells (in thousands per cubic millimeter)\nBaseline platelets (in thousands per cubic millimeter)\nBaseline hemoglobin (g/dl)\nEvaluable (Y= yes, N= no)\nComplete remission (CR) (Y= yes, N= no)\nCourses of chemotherapy to CR\nDate of CR (MMDDYY)\nDate of last follow-up (MMDDYY)\nStatus at last follow-up (D= dead, A= alive)\nBone marrow transplant (Y= yes, N= no)\nDate of bone marrow transplant (MMDDYY)\nInclusion in June 30, 1988 analysis (Y= yes, N= no)", "url": "http://lib.stat.cmu.edu/datasets/csb/ch14.dat", "filename": "Leukemia-Trial", "name": "Interpretation of a Leukemia Trial Stopped Early", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch15.dat contains the following variables:\n\n Patient ID : Integer\n \n Institution : 0 - Memorial Sloan-Kettering,\n 1 - Mayo Clinic,\n 2 - John Hopkins.\n Group : 1 - Study,\n 0 - Control.\n\n Means of Detection : 0 - Routine Cytology,\n 1 - Routine X-ray,\n 2 - Both X-ray and Cytology,\n 3 - Interval.\n\n Cell Type : 0 - Epidermoid,\n 1 - Adenocarcinoma,\n 2 - Large Cell,\n 3 - Oat Cell,\n 4 - Other.\n Stage : 4 digits, 1st digit (1,2,3) - overall stage,\n 2nd digit (1,2,3) - tumor,\n 3rd digit (0,1,2) - lymph nodes\n 4th digit (0,1) - distant metastases\n Operated : 1 - yes,\n 0 - no.\n Survival : Integer - Days from detection to last date known alive.\n Survival Category : 0 - Alive,\n 1 - Dead of lung cancer,\n 2 - Dead of other causes.\n\n Missing values - '-'.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch15.dat", "filename": "Lung-Cancer", "name": "Early Lung Cancer Detection Studies", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch16a.dat contains extent of scleral extension\n(extent to which the tumor has invaded the sclera or \"white of the eye\")\nas coded by two raters for each of 885 eyes. There is one record for each\neye; the first field of each record contains a patient identifier, the\nsecond field contains the code for scleral extension assigned by rater A,\nand the third field contains the code for scleral extension assigned by\nrater B. The coding scheme is:\n\n1=None or innermost layers\n2=Within sclera, but does not extend to scleral surface\n3=Extends to scleral surface\n4=Extrascleral extension without transection\n5=Extrascleral extension with presumed residual tumor in the orbit\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch16a.dat", "filename": "Choroidal-Melanoma", "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch16b.dat contains the degree of necrosis (tissue\ndeath) data for 3 raters. The first field contains a patient identifier,\nand the second, third, and fourth fields contain the code for degree of\nnecrosis as assigned by raters A, B, and C, respectively. The coding\nscheme is:\n\n1=None\n2=Less than 10% of cells\n3=Greater than or equal to 10% of cells\n\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch16b.dat", "filename": "Choroidal-Melanoma-2", "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following individual-specific variables:\n\nVariable Coding\nControl/patient code 0=control, 1=patient\nStudy number 1-500 for each group\nNumber of smears 1-15\nBiopsy result 0=negative, 1=positive \n 9=missing (no biopsy)\t\nNumber of days from 0-840 if biopsy done, \nlast smear to biopsy -1 if no biopsy", "url": "http://lib.stat.cmu.edu/datasets/csb/ch18a.dat", "filename": "Cervical-Cancer", "name": "Modeling the Precursors of Cervical Cancer", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following smear-specific variables:\n\nVariable Coding \nControl/patient code 0=control, 1=patient \nStudy number 1-500 for each group \nSmear number 1-15 \nSmear grade 0=negative, 1=positive \nInterval in days 0-3733, 0 if 1st smear \nsince last smear", "url": "http://lib.stat.cmu.edu/datasets/csb/ch18b.dat", "filename": "Cervical-Cancer\n", "name": "Modeling the Precursors of Cervical Cancer\n", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Oncology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch1b.dat is the waste site file, and contains the \nfollowing variables. There are NO missing values.\n\nx: Real, x-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\ny: Real, y-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\nsite: Integer, numerical label of waste site.\n Key: Site 1: Monarch Chemicals\n Site 2: IBM Endicott\n Site 3: Singer\n Site 4: Nesco\n Site 5: GE Auburn\n Site 6: Solvent Savers\n Site 7: Smith Corona\n Site 8: Victory Plaza\n Site 9: Hadco\n Site 10: Morse Chain\n Site 11: Groton", "url": "http://lib.stat.cmu.edu/datasets/csb/ch1b.dat", "filename": "Disease-Clusters", "name": "Spatial Pattern Analysis to Detect Rare Disease Clusters", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch17.dat contains the following 15 variables:\n\nVariable Description\n\nOBS Observation number\nCN Center obtaining and reading the scan\nID Scan ID\nBA1 Bone area (sq cm) from centralized Reader 1\nBA2 Bone area (sq cm) from centralized Reader 2\nBA3 Bone area (sq cm) from centralized Reader 3\nBC1 Bone mineral content (gm) from centralized Reader 1\nBC2 Bone mineral content (gm) from centralized Reader 2\nBC3 Bone mineral content (gm) from centralized Reader 3\nBMD1 Bone mineral density (gm/sq cm) from centralized Reader 1\nBMD2 Bone mineral density (gm/sq cm) from centralized Reader 2\nBMD3 Bone mineral density (gm/sq cm) from centralized Reader 3\nBA Bone area (sq cm) from participating center\nBC Bone mineral content (gm) from participating center\nBMD Bone mineral density (gm/sq cm) from participating center\n", "url": "http://lib.stat.cmu.edu/datasets/csb/ch17.dat", "filename": "Bone-Mineral", "name": "Quality Control for Bone Mineral Density Scans", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch21a.dat contains the spontaneous activity and rectal\ntemperature data (416 observations of 6 variables) There are no missing values.\n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nREP:\t\tIdentification of study replication. The entire 5x7 factorial \n\t\tdesign was replicated.\n\nSPON_ACT:\tSpontaneous Activity as defined by the number of interruptions \n\t\tof a photocell beam in a clear plastic cage over a 10 minute \n\t\tperiod of time.\n\nTEMP_B:\t\tRectal Temperature at baseline (just prior to treatment).\n\nTEMP_60:\tRectal Temperature at 60 minutes post treatment injection.\n\n\n", "url": "http://lib.stat.cmu.edu/datasets/csb/ch21a.dat", "filename": "Drug-Interactions", "name": "Drug Interactions Between Morphine and Marijuana\n", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch21b.dat contains the tail-flick data (510 observations of \n6 variables) Missing data are encoded with a period. \n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nREP:\t\tIdentification of study. Two 5x7 factorial experiments and one \n\t\t5x5 factorial experiment are included.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nFLICK_C:\tControl Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus prior to \n\t\ttreatment.\n\nFLICK_T::\tTest Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus post \n\t\ttreatment. A 10 sec maximum latency was imposed.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch21b.dat", "filename": "Drug-Interactions-2", "name": "Drug Interactions Between Morphine and Marijuana\n", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch20.dat contains the following variables:\n\nid subject identifier\nclinical indicator for selection into clinical sample:\n 1=in clinical sample; 0=not in clinical sample\nstratum stratum membership:\n 1=high screen; 2=low screen blacks;\n 3=low screen whites\nrace subject's self-reported race:\n 1=white; 2=black\ngender subject's gender:\n 1=male; 2=female\nrparents subject's guardian status:\n 1=does not live with both natural parents;\n 0=lives with both natural parents\ncesdtot subject's total center for epidemiologic studies depression\n scale score (range 0-60)\ncohtot subject's total cohesion score, based on faces-ii\n (range 16-80)\nmdd clinical diagnosis of major depression:\n 1=positive diagnosis; 0=negative diagnosis\n 9=missing for subjects not in clinical sample\nweight sampling weights used in logistic regression; defined as\n number of subjects in screening sample in each stratum", "url": "http://lib.stat.cmu.edu/datasets/csb/ch20.dat", "filename": "Adolescent-Depression", "name": "Two-Stage Sampling Designs for Adolescent Depression Studies", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Psychology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch19a.dat", "filename": "never-smokers", "name": "never-smokers", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch19b.dat", "filename": "current-smokers-m", "name": "current smokers: male", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch19c.dat", "filename": "current-smokers-f", "name": "current smokers: female", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch19d.dat", "filename": "former-smokers-mnc", "name": "former smokers: male, no college", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch19e.dat", "filename": "former-smokers-msc", "name": "former smokers: male, some college ", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch19f.dat", "filename": "former-smokers-fnc", "name": "former smokers: female, no college", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch19g.dat", "filename": "former-smokers-fsc", "name": "former smokers: female, some college", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Smoking" } ] }, { "name": "Nature", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch3a.dat includes the validation data collected at the stationary\nambient monitoring site. The variables are:\n\n 1. Date, in MM/DD/YY format,\n DC\n 2. 12-hour average daytime continuous ozone concentration, X ,\n 1\n DP\n 3. 12-hour average daytime passive ozone concentration, X ,\n 1\n NC\n 4. 12-hour average nighttime continuous ozone concentration, X , and\n 1\n NP\n 5. 12-hour average nighttime passive ozone concentration, X .\n 1", "url": "http://lib.stat.cmu.edu/datasets/csb/ch3a.dat", "filename": "Ozone_", "name": "Prediction Models for Personal Ozone Exposure Assessment", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch3b.dat includes the personal ozone exposure data. The \nvariables are:\n\n 1. Subject identification number, ranging from 1 to 23,\n\n 2. Date, in MM/DD/YY format,\n\n 3. Home region, ranging from 1 to 6,\n\n 4. 12-hour average daytime personal ozone concentration, Y,\n\n 5. 12-hour average daytime continuous ozone concentration at the\n DC\n stationary site, X ,\n 1\n\n 6. 12-hour average nighttime continuous ozone concentration at the\n NC\n stationary site, X ,\n 1\n O\n 7. 24-hour average home outdoor passive ozone concentration, X ,\n 1\n DI\n 8. 12-hour average home indoor daytime passive ozone concentration, X ,\n 1\n NI\n 9. 12-hour average home indoor nighttime passive ozone concentration, X ,\n 1\n\n 10. Prediction values for a 12-hour microenvironmental model based\n H\n on hourly ozone concentrations, X ,\n 2\n O\n 11. Fraction of time spent anywhere outdoors, X ,\n 3\n I\n 12. Fraction of time spent at home indoors, X , and\n 3\n\n 13. Indicator variable for whether the child stayed near the\n S\n home for the whole day, X , where 1 = yes, 0 = no.\n 3", "url": "http://lib.stat.cmu.edu/datasets/csb/ch3b.dat", "filename": "Ozone2", "name": "Prediction Models for Personal Ozone Exposure Assessment", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Weather" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch2.dat contains the following variables:\n\n animal - a unique identifier associated with each C. dubia tested\n conc - concentration (micro grams/L)\n brood1 - number of young produced in the first brood\n brood2 - number of young produced in the second brood\n brood3 - number of young produced in the third brood\n total - sum of young produced in the 3 broods (=brood1 + brood2 + brood3)", "url": "http://lib.stat.cmu.edu/datasets/csb/ch2.dat", "filename": "Pollutants", "name": "Assessing Toxicity of Pollutants in Aquatic Systems ", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean salinity at a depth of 100 meters off the Alaskan coast, given in parts\nper thousand. Columns are:\n\n 1. year\n 2. month\n 3. salinity", "url": "http://lib.stat.cmu.edu/crab/salinity", "filename": "salinity-2", "name": "ocean salinity", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean temperature at a depth of 100 meters off the Alaskan coast, given in\ndegrees Celsius. Columns are:\n\n 1. year\n 2. month\n 3. temperature", "url": "http://lib.stat.cmu.edu/crab/celsius", "filename": "celsius", "name": "ocean temperature", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Waters" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch4a.dat contains the burlap data, with the following variables:\n\n1. mburlap = mean burlap count value obtained over 12 subplot values.\n\n2. megg = mean egg mass density per acre obtained over 21 subplot values.\n\n3. seegg = estimated standard error of mean egg mass density obtained\nover 21 subplot values.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch4a.dat", "filename": "Gypsy-Moth", "name": "Measurement Error Models for Gypsy Moth Studies", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch4b.dat contains the defoliation data, with the following variables:\n\n1. mdef = mean defoliation value obtained from 20 subplot values.\n\n2. sedef = estimated standard error of mean defoliation\nobtained from 20 subplot values.\n\n3. megg = mean estimated egg mass density obtained over 20 subplots\n\n4. seegg = estimated standard error or mean egg \nmass density obtained from 20 subplot values.\n\n5. cdefegg = estimated covariance of mean defoliation and mean egg mass\ndensity obtained from 20 subplot values.\n", "url": "http://lib.stat.cmu.edu/datasets/csb/ch4b.dat", "filename": "Gypsy-Moth2", "name": "Measurement Error Models for Gypsy Moth Studies", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch7.dat contains the following variables:\n\nNo - observation number (1,...,294).\nTIME - survival time of halibut (time until death) in hours.\n (NOTE the Table 1 in the book claims survival time is in minutes,\n but HOURS is the correct unit)\nCENSOR - censoring indicator. 1=uncensored observation;\n 0=censored observation.\nTOWD - duration (in minutes) of time trawl net was towed on the bottom.\nDELDEPTH - difference between maximum and minimum depth observed during tow\n (depth measured in meters).\nLENGTH - fork length of halibut in centimeters.\nHANDTIME - handling time (in minutes) between net coming on board vessel \n and fish being placed in holding tanks.\nLOGCAT - natural logarithm of total catch of fish in tow.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch7.dat", "filename": "Atlantic-Halibut", "name": "Survival Analysis for Size Regulation of Atlantic Halibut", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch9.dat contains the following variables:\n\nBIRD : Bird id. \nRX : 1=NT, 2=PT, 3=FT, standing for \"No Tape\" (NT), in which no visible\n guides connected light cues\n with the feeders below them; \"Partial Tape\" (PT), in which fluorescent\n orange Dymo type provided a discontinuous (i.e., broken in two places) \n connection between each light cue and its feeder; and \"Full Tape\"\n (FT), in which the visible guide between each light cue and\n its feeder (fluorescent orange Dymo tape) was continuous.\n Feeding continued for 180 trials.\nGENDER : 0=male, 1=female. \nOUTCOME: 0=failure 1= success.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch9.dat", "filename": "Hummingbirds", "name": "Spatial Association Learning in Hummingbirds\n", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch10.dat contains eight variables, with 30 cases for each.\nEach case refers to a site in the forest. The first variable,\n'random', is a character variable indicating whether the site is a\nspotted owl nest site (=N) or a site selected at random\ncoordinates (=R). Variables 2-8 contain the percents of mature forest\n(>80 years of age). The variable names indicate the outer radii of the\nrings in which the percents were calculated. They are: 0.91km,\n1.18km, 1.40km, 1.60km, 1.77km, 2.41km, and 3.38km. So, for example,\nthe variable '1.18km' contains the percents of mature forest in\nrings with outer radius 1.18km and inner radius .91km centered at \nthe different sites.", "url": "http://lib.stat.cmu.edu/datasets/csb/ch10.dat", "filename": "Habitat-Association", "name": "Habitat Association Studies of the Northern Spotted\nOwl, Field Grouse, and Flammulated Owl\n", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch11a.dat contains a body temperature time series for an\nadult female beaver (Castor canadensis) obtained December 12-13, 1990 \nat Sandhill Wildlife Area, Wisconsin. Observations were made at 10\nminute intervals. These observations follow a random pattern of\nfluctuations, typically observed during freeze-up for all beaver in\nthis study. \n\nVariable List:\n\nObservation No.\nJulian day\nTime\nBody temperature (degrees C) \nActivity (0 = animal inside retreat; 1 = animal outside retreat) \n", "url": "http://lib.stat.cmu.edu/datasets/csb/ch11a.dat", "filename": "Beaver-Body-Temperatures", "name": "Time-Series Analyses of Beaver Body Temperatures", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch11a.dat contains a body temperature time series for\na subadult female beaver (Castor\ncanadensis). Observations were made at Sandhill Wildlife Area,\nWisconsin, November 3-4, 1990 (before freeze-up). Temperature\nobservations follow a plateau pattern, typically observed during\nthe entire ice-free period (late spring to late autumn). Only the\nfirst 100 observations are included in this data set.\n\nVariable list:\n\nObservation number\nJulian day\nTime\nBody temperature (degrees C)\nActivity (0 = animal inside retreat; 1 = animal outside retreat)", "url": "http://lib.stat.cmu.edu/datasets/csb/ch11b.dat", "filename": "Beaver-Body-Temperatures2", "name": "\nTime-Series Analyses of Beaver Body Temperatures\n", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The main data set consists of king crab pot survey data for the years 1973\nthrough 1986. The surveys were conducted in the waters around Kodiak Island,\nAlaska, using pots similar to the pots used by the commercial fishing fleet.\n(A crab pot is a trap that resembles a wooden crate.) A fixed sampling grid\nwas used to place strings of pots (stations) consisting usually of 10 pots in\nopen ocean, or of 2-5 pots in bays. The pots were left in the water for\nperiods of 16-24 hours, removed, and the crab counts recorded. The survey was\nconducted each summer, 2-4 weeks prior to start of the commercial fishing\nseason. The crab counts are classified by size (roughly representing age) and\nsex into six categories.\n\nThe basic survey data is a file \"survey\", containing a 3,450 by 14 matrix\nwith these columns:\n\n 1. Year (last two digits)\n 2. Fishing district (one of four)\n 3. Station identifier (alphabetic)\n 4. The number of pots fished\n 5-6. Latitude and longitude of the location halfway between\n the first and last pot of the station\n 7. Number of pre-recruit-4 crab\n 8. Number of pre-recruit-3 crab\n 9. Number of pre-recruit-2 crab\n 10. Number of pre-recruit-1 crab\n 11. Number of recruit males\n 12. Number of post-recruit males\n 13. Number of juvenile females\n 14. Number of adult females", "url": "http://lib.stat.cmu.edu/crab/survey", "filename": "survey_", "name": "Survey", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "==================== Contents of file \"dstns\" ============================\n \nFor each of the years in the survey (1973 to 1986), a frequency distribution\nof the crab by size (in 1 mm increments) that were surveyed. Separate\ndistributions are given for juvenile females, adult females, and all males.\nThe five columns are:\n\n 1. year\n 2. length in mm\n 3. count of juvenile females\n 4. count of adult females\n 5. count of all males", "url": "http://lib.stat.cmu.edu/crab/dstns", "filename": "dstns", "name": "dstns", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For each of the 14 years in the survey (1973-86), an estimate of the number of\neggs per female. Columns are:\n\n 1. year\n 2. estimated eggs per adult female", "url": "http://lib.stat.cmu.edu/crab/eggs", "filename": "eggs", "name": "eggs per female", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For each year in the survey, a frequency distribution of all females\ncross-classified by size (in 1 mm increments) and percent clutch fullness (5\ncategories). Clutch fullness is, roughly, the realized egg-bearing potential\nof a female crab. The seven columns are:\n\n 1. year\n 2. size, in mm\n 3. count of females with 0% fullness\n 4. count of females with 1-29% fullness\n 5. count of females with 30-59% fullness\n 6. count of females with 60-89% fullness\n 7. count of females with 90-100% fullness", "url": "http://lib.stat.cmu.edu/crab/fullness", "filename": "fullness", "name": "Clutch fullness", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Animals" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "John O. Rawlings and Susan E. Spruill\n\nThe data set ch5.dat contains the following variables:\n\n1. site: coded 1-6 corresponding to the location code used in Table 1.\n2. block: block within site coded 1, 2, ... within sites for the RCB designs;\n block=1 for all observations for the CRD designs, sites 5 and 6.\n3. rep: replication within site coded as missing in sites 1-4;\n coded as 1, 2, ... for replicates in the CRD design.\n4. ozone: target ozone treatment, coded 0.0=charcoal filtered air, \n 1.0=nonfiltered air, \"x.x\"=target level of ozone as multiple of \n ambient ozone level.\n5. rain: acidic rain treatment, coded as pH of rain solution.\n6. fam: genetic family, coded as 1, 2, ... within sites.\n7. ppmhrs: cumulative ozone exposure (ppm-h) during the two years of\n the trials.\n8. vwpH: cumulative exposure to acidic rain computed as vwpH \n = -log(sum(volume*hydrogen ion concentration)).\n9. biomass: total above ground biomass (g) after two growing seasons.\n10. diam: increment of diameter growth (mm) during the two growing seasons.\n11. DMA: whole-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the whole-plot mean from the overall\n site mean.\n12. DMB: sub-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the subplot mean from the whole-plot mean.\n13. D2HA: whole-plot component of the covariate initial volume, \n approximated as diameter squared times height, and expressed as\n the deviation of the whole-plot mean from the overall site mean.\n14. D2HB: sub-plot component of the covariate initial volume and\n expressed as the deviation of the subplot mean from the whole-plot mean.\n15. DMOT: depth to mottling (cm) of the clay soil; one measurement\n per whole-plot. \n\nMissing data are coded with '.'", "url": "http://lib.stat.cmu.edu/datasets/csb/ch5.dat", "filename": "Pine-Seedling", "name": "Estimating Pine Seedling Response to Ozone and Acid Rain", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch8.dat contains the following variables:\n\nPop - population code, 1034 or 1040\nADH - 1 (cepa), 2 (het) or 3 (fist)\nIDH - 1 (cepa), 2 (het) or 3 (fist)\nPGI - 1 (cepa), 2 (het) or 3 (fist)\nfreq - frequency", "url": "http://lib.stat.cmu.edu/datasets/csb/ch8.dat", "filename": "Hybrid-Onions", "name": "\nMixture Fraction and Linkage Analyses for Hybrid Onions", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Plants" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch6.dat contains the following variables. \n\n\nSTRATA : National Marine Fisheries Service (NMFS) 4 digit strata\n designator in which the sample was taken \n \nSAMPLE : Sample number per year ranging from 1 to approximately 450\n\nLAT : Location in terms of latitude of each sample in the Atlantic Ocean \n\nLONG : Location in terms of longitude of each sample in the Atlantic Ocean\n \nTCATCH : Total number of scallops caught at the ith sample location\n\nPREREC : Number of scallops whose shell length is smaller than 70 millimeters\n \nRECRUITS : Number of scallops whose shell length is 70 millimeters or larger", "url": "http://lib.stat.cmu.edu/datasets/csb/ch6.dat", "filename": "Scallop-Abundance", "name": "Geostatistical Estimates of Scallop Abundance", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Geology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Geographical coordinates of the shoreline of the 17 islands that form the\nKodiak Island group. The two columns are\n\n 1. latitude\n 2. longitude\n\nmeasured in degrees and fractions of a degree. Each of the 17 groups of\ncoordinates is terminated by a pair of \"NA\"s, and the end of each group loops\nback to the beginning. For drawing maps, bear in mind that longitude is\nmeasured East to West, which is right to left. This suggests plotting\nnegative longitude instead of longitude. Also, to draw maps that \"look right\"\nto an Alaskan, you must take into account that in this part of the world the\naspect ratio of one degree latitude (y-axis) to one degree longitude (x-axis)\nis 1:1.8 (in terms of actual ground distance).", "url": "http://lib.stat.cmu.edu/crab/kodiak", "filename": "kodiak", "name": "Geographical coordinates of the shoreline of Kodiak Island group", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Other" } ] }, { "name": "Statistics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Some statistics on the fishing fleet and commercial catch, for each year\nbetween 1960 and 1982. The six columns are:\n\n 1. year\n 2. number of vessels registered for fishing\n 3. number of crab caught\n 4. total weight in kilograms of crab caught\n 5. total number of pot-lifts.\n 6. wholesale price of king crab in dollars per pound", "url": "http://lib.stat.cmu.edu/crab/fleet", "filename": "fleet", "name": "fishing fleet and commercial catch", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Commercial catch data for 1960-1982, broken out by district. The four columns\nare:\n\n 1. year\n 2. district number (1, 2, 3 or 4)\n 3. total catch as a count\n 4. total catch in kilograms", "url": "http://lib.stat.cmu.edu/crab/catch", "filename": "catch", "name": "Commercial catch data", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Economics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The file ch12.dat contains the following variables:\n\nlstay: Length of stay of a resident\nage: Age of a resident\ntrt: Nursing home assignment (1: receive treament,0: control)\ngender: Gender (1:male,0:female)\nmarstat: Marital status (1: married,0: not married)\nhlstat: Health status (2: second best, 5: worst)\ncens: Censoring indicator (1:censored, 0: discharged)", "url": "http://lib.stat.cmu.edu/datasets/csb/ch12.dat", "filename": "Nursing-Home-Usage", "name": "\nParametric Duration Analysis of Nursing Home Usage", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set was derived from sample survey data collected in 1988\nin two surveys designed to evaluate the City of Toronto Workplace\nSmoking By-law (National Health Research and Development Program,\nCanada, Project Grant 6606-3346-46). The principal investigator\nwas Dr. L.L. Pederson, University of Western Ontario, Ontario,\nCanada. The surveys were conducted by the Institute for Social\nResearch at York University, Ontario, Canada in January-February\n1988 and in November-December 1988. By agreement with the\nInstitute for Social Research, York University, the survey data are\nin the public domain. This data set can be used freely for\nnoncommercial purposes and can be freely distributed.\n\nThere are 15 variables in the data set, with values separated by\nblanks. There are no missing values. The CSB variable names are as\nfollows: \n\nidno y w x1 x2 x3 z1 z2 z3 z4 z5 z6 z7 z8 z9\n\n\nSHORT DESCRIPTION NAME DEFINITION AND CODING\n\nUnique identifier idno (5 digits, beginning with 1 or 2)\n\nOutcome y Attitude toward smoking in the\n workplace. Smoking should be: \n (1 = prohibited, 2 = restricted,\n 0 = unrestricted)\n\nWeight w Sampling/post-stratification weight\n (ranges from 0.305 to 4.494)\n\nTime x1 Time of survey relative to\n implementation of the by-law \n on March 1, 1988\n (1 = post, 0 = pre)\n\nWork x2 Place of work indicator 1\n with City of Toronto as baseline\n (1 = outside City of Toronto,\n 0 = otherwise)\n\n x3 Place of work indicator 2\n with City of Toronto as baseline\n (1 = not outside the home, \n 0 = otherwise)\n\nResidence z1 Place of residence\n (1 = City of Toronto, \n 0 = other Metro Toronto)\n\nSmoking z2 Smoking status indicator 1\n with those who have never smoked \n as the baseline\n (1 = current smoker, \n 0 = otherwise)\n\n z3 Smoking status indicator 2\n with never as the baseline\n (1 = quit <=6 months ago, \n 0 = otherwise)\n\n z4 Smoking status indicator 3\n with never as the baseline\n (1 = quit >6 months ago, \n 0 = otherwise)\n\n z5 Smoking status indicator 4\n with quit >12 months as the baseline\n (1 = quit 6-12 months, \n 0 = otherwise)\n\nKnowledge z6 Knowledge of health effects of\n environmental tobacco smoke\n (score, ranges from 0 to 12)\n\nSex z7 Sex of respondent\n (1 = male, 0 = female)\nAge z8 Age of respondent\n ( (age in years - 50)/10 )\n\nEducation z9 Level of education\n (-2 = elementary, \n -1 = some high school, \n 0 = high or trade school, \n 1 = college or some university,\n 2 = university degree)\n ", "url": "http://lib.stat.cmu.edu/datasets/csb/ch13.dat", "filename": "Smoking-Restrictions", "name": "Analysis of Attitudes Towards Workplace Smoking Restrictions", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": false } ], "name": "Population" } ] } ] }