diff --git a/data/datasets/JSEDataArchive.json b/data/datasets/JSEDataArchive.json index f5a7b7fb0..560885555 100644 --- a/data/datasets/JSEDataArchive.json +++ b/data/datasets/JSEDataArchive.json @@ -1,980 +1,542 @@ { "categories": [ { "category_name": "Medicine", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Time of Birth, Sex, and Birth Weight of 44 Babies", "download": "http://jse.amstat.org/datasets/babyboom.dat.txt", "filename": "babyboom", "name": "babyboom", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "TAB" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains 21 body dimension measurements as well as age, \nweight, height, and gender on 507 individuals. The 247 men and 260 \nwomen were primarily individuals in their twenties and thirties, with a \nscattering of older men and women, all exercising several hours a week.", "download": "http://jse.amstat.org/datasets/body.dat.txt", "filename": "Body", "name": "Exploring Relationships in Body Dimensions", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ", "download": "http://jse.amstat.org/datasets/calcium.dat.txt", "filename": "Calcium", "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Percentage of body fat, age, weight, height, and ten body circumference\nmeasurements (e.g., abdomen) are recorded for 252 men. Body fat, a\nmeasure of health, is estimated through an underwater weighing\ntechnique. Fitting body fat to the other measurements using multiple\nregression provides a convenient way of estimating body fat for men\nusing only a scale and a measuring tape.", "download": "http://jse.amstat.org/datasets/fat.dat.txt", "filename": "fat", "name": "Fitting Percentage of Body Fat to Simple Body Measurements", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Sample of 654 youths, aged 3 to 19, in the area of East Boston\nduring middle to late 1970's. Interest concerns the relationship\nbetween smoking and FEV. Since the study is necessarily\nobservational, statistical adjustment via regression models\nclarifies the relationship.", "download": "http://jse.amstat.org/datasets/fev.dat.txt", "filename": "fev_", "name": "Forced Expiratory Volume (FEV) Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The tab-delimited data set gives characteristics of young female patients between\nthe ages of 11 to 26 who came to clinics of Johns Hopkins Medical Institutions between\n2006 and 2008 to begin the three-shot regimen of vaccinations with the anti-human\npapillomavirus (HPV) medication Gardasil. ", "download": "http://jse.amstat.org/v19n1/gardasil.dat.txt", "filename": "gardasil", "name": "Retrospective Study (Potential Predictors for Completion or Non-Completion of ", - "number_format": 31, - "remove_quotes": true, "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "From a very young age, shoes for boys tend to be wider than shoes for \ngirls. Is this because boys have wider feet, or because it is assumed that \ngirls, even in elementary school, are willing to sacrifice comfort for fashion? \nTo assess the former, a statistician measures kids' feet. Methods for analysis include \nt-tests, ANCOVA, and least-squares model building. This data set is useful for \ndiscussion of covariates, confounding, and conclusions in the context of the problem.", "download": "http://jse.amstat.org/datasets/kidsfeet.dat.txt", "filename": "kidsfeet", "name": "Foot measurements for fourth grade children", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This article takes data from a paper in the _Journal of the American\nMedical Association_ that examined whether the true mean body\ntemperature is 98.6 degrees Fahrenheit. Because the dataset suggests\nthat the true mean is approximately 98.2, it helps students to grasp\nconcepts about true means, confidence intervals, and t-statistics.\nStudents can use a t-test to test for sex differences in body\ntemperature and regression to investigate the relationship between\ntemperature and heart rate.", "download": "http://jse.amstat.org/datasets/normtemp.dat.txt", "filename": "normtemp", "name": "Normal Body Temperature, Gender, and Heart Rate ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" } ], "subcategory_name": "Common" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Drug interaction study of a new and a standard oral contraceptive \ntherapy. See the \"STORY BEHIND THE DATA\" and \"PEDAGOGICAL NOTES\" \nsections below for details.", "download": "http://jse.amstat.org/datasets/ocdrug.dat.txt", "filename": "ocdrug", "name": "Drug Interaction", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Bacteria are cultured in medical laboratories to identify them so patients can be treated \ncorrectly. The tryptone dataset contains measurements of bacteria counts following the \nculturing of five strains of Staphylococcus aureus. There are many strains of \nStaphylococcus aureus; five were used by the experimenter. They are identified by numbers \nin the data because their names are too complicated to be useful as identifiers. The \ndataset also contains the time of incubation, temperature of incubation and concentration \nof tryptone, a nutrient. The protocols for culturing this bacteria, set the time at 24 \nhours, the temperature at 35 degrees and the tryptone concentration at 1.0%. The question \nis whether the conditions recommended in the protocols for the culturing of these strains \nare optimal. The task is to find the incubation time, temperature and tryptone concentration \nthat optimises the growth of this Bacterium.", "download": "http://jse.amstat.org/datasets/Tryptone.dat.txt", "filename": "Tryptone", "name": "The Tryptone Task ", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.", "download": "http://jse.amstat.org/datasets/cigarettes.dat.txt", "filename": "Cigarette_", "name": "Cigarette data for an introduction to multiple regression", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" } ], "subcategory_name": "Smoking" } ] }, { "category_name": "Nature", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset consists of a few variables that may influence the demand for Beef in the United States. It provides an example of the influence of inflation in monetary time series data as well as providing some interesting statistical features in building demand models in regression.", "download": "http://jse.amstat.org/v22n1/kopcso/BeefDemand.txt", "filename": "BeefDemand", "name": "Beef Demand", - "number_format": 31, - "remove_quotes": true, "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "159 fishes of 7 species are caught and measured. Altogether there are\n8 variables. All the fishes are caught from the same lake\n(Laengelmavesi) near Tampere in Finland.", "download": "http://jse.amstat.org/datasets/fishcatch.dat.txt", "filename": "fishcatch", "name": "fishcatch", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "A cost of increased reproduction in terms of reduced longevity has been\nshown for female fruitflies, but not for males. The flies used were an\noutbred stock. Sexual activity was manipulated by supplying individual\nmales with one or eight receptive virgin females per day. The\nlongevity of these males was compared with that of two control types.\nThe first control consisted of two sets of individual males kept with\none or eight newly inseminated females. Newly inseminated females will\nnot usually remate for at least two days, and thus served as a control\nfor any effect of competition with the male for food or space. The\nsecond control was a set of individual males kept with no females.\nThere were 25 males in each of the five groups, which were treated\nidentically in number of anaesthetizations (using CO2) and provision of\nfresh food medium.", "download": "http://jse.amstat.org/datasets/fruitfly.dat.txt", "filename": "fruitfly", "name": "Sexual activity and the lifespan of male fruitflies", - "number_format": 31, - "remove_quotes": true, "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Animals" } ] }, { "category_name": "Statistics", "subcategories": [ { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/aaup.txt", + "description": "Data are from the American Association of University Professors (AAUP) annual faculty salary survey of American colleges and universities. They include average salary and overall compensation, broken down by full, associate, and assistant professor ranks. The dataset is used for the 1995 Data Analysis Exposition, sponsored by the Statistical Graphics Section of the American Statistical Association. See the file colleges.txt for more information on the Exposition.", + "description_url": "http://jse.amstat.org/datasets/aaup.txt", "download": "http://jse.amstat.org/datasets/aaup.dat.txt", "filename": "AAUP", "name": "AAUP Faculty Salary data", - "number_format": 31, - "remove_quotes": true, "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "columns": ["FICE (Federal ID number)", "College name", "State (postal code)", "Type (I, IIA, or IIB)", "Average salary - full professors", "Average salary - associate professors", "Average salary - assistant professors", "Average salary - all ranks", "Average compensation - full professors", "Average compensation - associate professors", "Average compensation - assistant professors", "Average compensation - all ranks", "Number of full professors", "Number of associate professors", "Number of assistant professors", "Number of instructors", "Number of faculty - all ranks"] }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset bestbuy.day contains monthly data on computer usage \n(MIPS) and total number of stores from August 1996 to July 2000. \nAdditionally, information on the planned number of stores through \nDecember 2001 is available. These data can be used to compare \ntime-series forecasting with trend and seasonality components and \ncausal forecasting based on simple linear regression. The simple \nlinear regression model exhibits unequal error variances, suggesting \na transformation of Y.", "download": "http://jse.amstat.org/datasets/bestbuy.dat.txt", "filename": "Best_Buy", "name": " BestBuy", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dollar amount for a monthly (January 1991 through December 2000) \nhousehold electric bill is presented as a time series. In addition, \npotential explanatory variables are included. Twelve representative \nmonthly values are provided for the average temperature, for \nheating degree days, and for cooling degree days (not for each \nmonth for each year). Additional variables give the family size \neach month and indicate when a new electric meter and new heating \nand cooling equipment was installed. To convert the billing amount \nto estimated power consumption, a tiered rate function (supplied \nin the accompanying Instructor's Manual) and the costs of \nassociated riders (provided here) must be used. Consumption \nestimates resulting from this information are supplied.\t", "download": "http://jse.amstat.org/datasets/electricbill.dat.txt", "filename": "electricbill", "name": " Electric Bill Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data file contains information on 76 single-family homes inEugene, Oregon during 2005. This dataset is suitable for a completemultiple linear regression analysis of home price data that coversmany of the usual regression topics, including interaction andpredictor transformations. Whereas realtors use experience and localknowledge to subjectively value a house based on its characteristics(size, amenities, location, etc.) and the prices of similar housesnearby, regression analysis can provide an alternative that moreobjectively models local house prices using these same data.SOURCES:The data were provided by Victoria Whitman, a realtor in Eugene, in2005. The data were used in a case study in Pardoe (2006).", "download": "http://jse.amstat.org/datasets/homes76.dat.txt", "filename": "homes76", "name": " Modeling home prices using realtor data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": " This file contains total US gross box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.", "download": "http://jse.amstat.org/datasets/movietotal.dat.txt", "filename": "movietotal", "name": "movietotal", - "number_format": 31, - "remove_quotes": true, "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For 97 countries in the world, data are given for birth rates, death\nrates, infant death rates, life expectancies for males and females, and\nGross National Product.\n\nVARIABLE DESCRIPTIONS:\nColumns\n 1 - 6 Live birth rate per 1,000 of population\n 7 - 14 Death rate per 1,000 of population\n15 - 22 Infant deaths per 1,000 of population under 1 year old\n23 - 30 Life expectancy at birth for males\n31 - 38 Life expectancy at birth for females\n39 - 46 Gross National Product per capita in U.S. dollars \n47 - 52 Country Group\n 1 = Eastern Europe\n 2 = South America and Mexico\n 3 = Western Europe, North America, Japan, Australia, New Zealand\n 4 = Middle East\n 5 = Asia\n 6 = Africa\n53 - 74 Country", "download": "http://jse.amstat.org/datasets/poverty.dat.txt", "filename": "poverty", "name": "The Statistics of Poverty and Inequality ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" } ], "subcategory_name": "Economics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "http://jse.amstat.org/datasets/ballbearings.txt", "download": "http://jse.amstat.org/datasets/ballbearings.dat.txt", "filename": "ballbearings", "name": "Ball Bearing Reliability Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "http://jse.amstat.org/datasets/baseball.txt", "download": "http://jse.amstat.org/datasets/baseball.dat.txt", "filename": "baseball_", "name": "baseball", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains every NCAA Basketball Tournament game \never played. The tournament has been held every year since 1939.", "download": "http://jse.amstat.org/datasets/basketball.dat.txt", "filename": "Basketball", "name": "NCAA Basketball Tournament Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Data are provided for Barry Bonds' plate appearances in the 2001\nbaseball season. Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).", "download": "http://jse.amstat.org/datasets/bonds2001.dat.txt", "filename": "Bonds", "name": "Barry Bonds' 2001 Plate Appearances", "number_format": 31, "remove_quotes": true, "separator": "SPACE", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset contains the scores, opponents, and sites of the 18 Big Ten\nmen's basketball games that involved the University of Iowa in 1997.", "download": "http://jse.amstat.org/datasets/hawks.dat.txt", "filename": "hawks", "name": " 1997 University of Iowa Big Ten Basketball Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset consists of game-by-game information for the 1998 season\nfor Mark McGwire and the St. Louis Cardinals, and Sammy Sosa and the\nChicago Cubs. The dataset includes information on the home run hitting\nof these two players, as well as game results for the teams.", "download": "http://jse.amstat.org/datasets/homerun.dat.txt", "filename": "homerun", "name": "The 1998 Home Run Race Between Mark McGwire and Sammy Sosa", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from The Baseball Encyclopedia (1993) and Total Baseball (2001). \nThey include the location, league affiliation (National or American), \ndivision affiliation (East, Central, or West), season of play, home game \nattendance, runs scored, runs allowed, wins, losses, and number of games \nbehind the division leader for each major league franchise for the 1969 \nthrough 2000 seasons. Other data (including opening dates for new stadia, \nand dates of work stoppages) were collected from Ballparks by Munsey and \nSuppes (2001) and InfoPlease (2001).", "download": "http://jse.amstat.org/datasets/MLBattend.dat.txt", "filename": "MLBattend", "name": "1969-2000 Major League Baseball Attendance data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Driver results for all NASCAR races between 1975 1nd 2003, inclusive. The dataset constitutes all \nparticipants in each of 898 races, and includes their start/finish postions, prize winnings, car \nmake and laps completed.", "download": "http://jse.amstat.org/datasets/nascard.dat.txt", "filename": "nascard", "name": "NASCAR Driver Results", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Race results for all NASCAR Winston Cup races between 1975 and 2003, inclusive. The dataset \nincludes the numbers of cars, total prize winnings, monthly consumer price index for the month \nof the race, track length, laps completed by the winner, spatial co-ordinates and name of track.", "download": "http://jse.amstat.org/datasets/nascarr.dat.txt", "filename": "nascarr", "name": "NASCAR Race Results", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This data set contains performance statistics for National \nFootball League (NFL) teams for their 2000 regular season.\n Columns Variable Description\n 1 - 3 initials team initials\n 5 - 26 team name and location of the team\n 28 - 29 wins wins\n 31 - 32 losses losses\n 34 - 35 homedrives50 drives begun in opponents' territory\n 37 - 38 homedrives20 drives begun within 20 yards of the goal\n 40 - 41 oppdrives50 opponents drives begun in team's territory\n 43 oppdrives20 opponents drives begun within 20 yards of goal\n 45 homepuntblock punts blocked by team\n 47 opppuntblock punts team had blocked\n 49 - 50 hometouch touchdowns scored by team\n 52 - 53 opptouch touchdowns scored against team\n 55 - 58 homeyards total yardage gained by offense\n 60 - 63 oppyards total yardage allowed by defense\n 65 - 68 hometop time of possession by offense (in minutes)\n 70 - 73 opptop time of possession by opponents' offense\n 75 - 76 homefgm field goals made\n 78 - 79 oppfgm field goals allowed to opponents\n 81 - 82 homefga field goals attempted\n 84 - 85 oppfga field goals attempted by opponents\n 87 - 89 opppuntno punts made by opponents\n 91 - 94 opppuntave average length of punts made by opponents\n 96 - 99 opppuntnet average change in field position \n during opponents' punts\n101 - 102 opppunttb opponents' punts taken for touchbacks\n104 - 105 opppunt20 opponents' punts that resulted in the team's\n offense beginning within 20 yards of their \n own (defensive) goal line\n107 - 108 opppuntlong longest opponents' punt\n110 - 112 homepuntno punts made by team\n114 - 117 homepuntave average length of punts made by team\n119 - 122 homepuntnet average change in field position \n during team's punts\n124 - 125 homepunttb team's punts taken for touchbacks\n127 - 128 homepunt20 team's punts that resulted in the opponents'\n offense beginning within 20 yards of their \n own (defensive) goal line\n130 - 131 homepuntlong longest team punt\n133 - 135 home1sts first downs obtained by offense\n137 - 139 opp1sts first downs allowed by defense\n141 - 142 homesacks sacks achieved by team's defense\n144 - 145 oppsacks sacks allowed by team's offense\n147 - 148 homekos kickoffs made by team\n150 - 151 oppkos kickoffs received by team\n153 - 156 homekoyds yards gained during kickoff returns\n158 - 161 oppkoyds yards allowed to opposition during kickoff returns\n163 - 166 homekoave average yards gained during kickoff returns\n168 - 171 oppkoave average yards allowed during kickoff returns\n173 - 175 homekolong longest kickoff return made by team\n177 - 179 oppkolong longest kickoff return allowed by team\n181 homekotds kickoffs returned for a touchdown by team\n183 oppkotds kickoffs returned for touchdown by opposition\n185 - 186 homerets punts returned by team\n188 - 189 opprets punts returned by opposition\n191 - 192 homefc punts \"fair caught\" by team\n194 - 195 oppfc punts \"fair caught\" by opposition\n197 - 199 homeretyds return yardage on punts by team\n201 - 203 oppretyds return yardage on punts by opposition\n205 - 208 homeretave average length of punt returns by team\n210 - 213 oppretave average length of punt returns by opposition\n215 homerettds punts returned by team for a touchdown\n217 opprettds punts returned by opponents for a touchdown\n219 - 220 homeint interceptions made by team's defense\n222 - 223 oppint interceptions made against team's offense\n225 - 226 homerecover fumbles recovered by team's defense\n228 - 229 opprecover fumbles recovered by opposing defenses\n231 - 232 numgames games played by team\n234 - 237 opprateyds average number of yards gained \n per minute of possession by opponents\n239 - 242 homerateyds average number of yards gained \n per minute of possession by team\n244 - 247 opppuntrate average number of punts \n per minute of possession by opponents\n249 - 252 homepuntrate average number of punts \n per minute of possession by team\n254 - 258 oppratetd average number of touchdowns \n per minute of possession by opponents\n260 - 264 homeratetd average number of touchdowns \n per minute of possession by team\n266 - 269 winpercent winning percentage\n271 - 275 hometorate turnovers obtained by team,\n per minute of possession by opponents\n277 - 281 opptorate turnovers allowed by team, \n per minute of possession\n283 - 286 home1rate first downs obtained by team, \n per minute of possession\n288 - 291 opp1rate first downs allowed by team's defense, \n per minute of possession by opposition\n293 - 295 homepoints points scored by team\n297 - 299 opppoints points scored against team\n301 - 303 conference conference to which the team belongs (AFC or NFC)", "download": "http://jse.amstat.org/datasets/nfl2000.dat.txt", "filename": "nfl2000", "name": "NFL Y2K PCA", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "download": "http://jse.amstat.org/datasets/nfl93.dat.txt", "filename": "nfl93", "name": "NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "download": "http://jse.amstat.org/datasets/nfl94.dat.txt", "filename": "nfl94", "name": "NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "download": "http://jse.amstat.org/datasets/nfl95.dat.txt", "filename": "nfl95", "name": "NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", "download": "http://jse.amstat.org/datasets/nfl96.dat.txt", "filename": "nfl96", "name": " NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset contains scores for all regular season National Football\nLeague games from the 1998, 1999 and 2000 seasons. In addition to \nthe points scored by the home and visiting teams in each game, the\ndataset contains a pointspread that handicaps each game.\n\nColumns \n 1 - 4 Year (1998, 1999, or 2000)\n 6 - 7 Week of the season (1 to 17)\n10 - 27 Home team name\n29 - 30 Home team score\n33 - 50 Visiting team name\n52 - 53 Visiting team score\n56 - 60 Pointspread ", "download": "http://jse.amstat.org/datasets/nfl98-00.dat.txt", "filename": "nfl98-00", "name": " NFL Scores for 1998-2000", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": " The data set provides the weights (in lbs)\nof the 26 men on the 1996 US Olympic Rowing Team in Atlanta. The\ndata includes the names of the participants and which event they\nrowed in. The US team participated in 7 of the 8 possible events.\nThis data set is useful for discussing outliers,\nexplanations for outliers, and comparing the robustness of the\nmean and the median.\n\n", "download": "http://jse.amstat.org/datasets/rowing.dat.txt", "filename": "rowing", "name": " Weights of 1996 US Olympic Rowing Team", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data consist of 500-yard freestyle swim times for male and female swimmers age 50-94 in a biennial national competition. Variables include year, gender, age, age group, swim time, seed time (qualifying time from state competition), and split times (in each 50-yard segment).", "download": "http://jse.amstat.org/v22n1/doane/SeniorSwimTimes-DataSet.txt", "filename": "SeniorSwimTimes", "name": "SeniorSwimTimes", - "number_format": 31, - "remove_quotes": true, "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are provided for the 56 Tour De France bicycle races since World \nWar II. The year and dates of the event, the total number of stages, \nthe total distance, the winning total time and average speed, the name \nand country of the winner, the birth date of the winner, and the \nwinner's age at the time of victory are the variables in the dataset.", "download": "http://jse.amstat.org/datasets/tdf.dat.txt", "filename": "tdf", "name": "Tour De France Winners (Can Lance Win Six?)", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Sport" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/aptness.txt", + "description": "Full population of data (all software projects completed by the AT&T data center from 1986 through 1991).", + "description_url": "http://jse.amstat.org/datasets/aptness.txt", "download": "http://jse.amstat.org/datasets/aptness.dat.txt", - "filename": "Aptness", + "filename": "aptness", "name": "Evaluating Aptness of a Regression Model", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "columns": ["Function Point Count", "Work Hours", "Operating System: (0) Unix, (1) MVS", "Database Management System: (1) IDMS, (2) IMS, (3) INFORMIX, (4) INGRESS, (5) Other", "Language: (1) COBOL, (2) PLI, (3) C, (4) Other"] }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In many statistical models the normal distribution of the response is an essential assumption.\nThis paper uses a dataset of 2000 euro coins with information (up to the milligram) about\nthe weight of each coin. As the physical coin production process is subject to a multitude\nof (very small) variability sources, it seems reasonable to expect that the empirical\ndistribution of the weight of euro coins does agree with the normal distribution. Goodness\nof fit tests however show that this is not the case. Moreover, some outliers complicate\nthe analysis. Mixtures of normal distributions and skew normal distributions are fitted\nto the data, revealing that the normality assumption might not hold for those weights.", "download": "http://jse.amstat.org/datasets/euroweight.dat.txt", "filename": "euroweight", "name": "The Weight of Euro Coins ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Title, year of release, length in minutes, number of cast members listed, rating, and number of lines \nof description are recorded for a simple random sample of 100 movies. One can use the sample to obtain base-line information \non the movie guide from which the data were collected. The dataset also illustrates two paradoxes for associations between \nthree variables: non-transitivity of positive correlation and Simpson's paradox. SOURCE: The data were taken as a simple \nrandom sample of the approximately 19,000 movies (not including made-for-TV movies) in Leonard Maltin's Movie and Video \nGuide, 1996. ", "download": "http://jse.amstat.org/datasets/films.dat.txt", "filename": "films", "name": "films dataset", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains descriptive data of contestants on the game shoe \"Friend or Foe?\". Information on the contestant's \nrace, sex, age, prize money, and playing strategy are included. ", "download": "http://jse.amstat.org/datasets/friend_or_foe.dat.txt", "filename": "friend_or_foe", "name": "Data from the Television Game Show \"Friend or Foe?\"", - "number_format": 31, - "remove_quotes": true, "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset contains hat size as well as circumference, length of major axis \nand length of minor axis of the inner hat band for 26 hats. The manufacturer \nand the country of manufacture are also included.", "download": "http://jse.amstat.org/datasets/hats.dat.txt", "filename": "hats", "name": " Hat measurements, including hat size", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The dataset consists of samples of size six taken without replacement\nfrom the integers {1, 2, 3, ..., 42}. There are actually three\ndatasets from three different sources, and in each case the six-tuples\nare (in theory) random selections or samples. The observations in each\nsample are given in the order in which they were obtained or selected.", "download": "http://jse.amstat.org/datasets/lotto.dat.txt", "filename": "lotto", "name": "Lotto 6/42 Selections from Individuals, Irish National Lottery, and S-Plus Simulation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "TAB" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This file contains daily per theater box office receipts for 49 \nmovies. This data is to accompany the article entitled Movie Data.", "download": "http://jse.amstat.org/datasets/moviedaily.dat.txt", "filename": "moviedaily", "name": "moviedaily", - "number_format": 31, - "remove_quotes": true, "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Every year actors and actresses are chosen to receive the Oscars awards for best actor and for best actress. This dataset \ncontains information about each of the winners for each of the 77 annual Oscar awards.\n\nAlthough there have been only 77 Oscars, there are 78 male winners and 78 female winners because ties happened on two \noccasions (1933 for the best actor and 1969 for the best actress).\n\nVARIABLE DESCRIPTIONS:\n\nColumns Variables\n 1 Gender (m=male f=female)\n 3-4 Oscar Year Number (1-77)\n 6-9 Year the Oscar Took Place\n 11-29 Winner’s first and last name\n 31-60 Name of the Movie in which the winner acted\n 62-63 Age of winner (at the beginning of the winning year)\n 65-77 Birth place (State if born in USA, else Country)\n 79-80 Month in which the winner was born\n 82-83 Day of month on which winner was born\n 85-88 Year the winner was born", "download": "http://jse.amstat.org/datasets/oscars.dat.txt", "filename": "oscars", "name": "Oscars: Best Actors and Actresses", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains information collected from rolling the pair of\npigs (found in the game \"Pass the Pigs\") 6000 times. A description of\nthe rules, scoring configurations, and data collection method are\nincluded in the accompanying paper.", "download": "http://jse.amstat.org/datasets/pig.dat.txt", "filename": "pig", "name": "Data from the game \"Pass the Pigs\"", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "In a residential home, energy consumption is closely related to the\noutdoor temperature and size of the house. In a home of a given size,\ntemperature fluctuations and energy consumption vary fairly predictably\nover time. When homeowners add a room, other things being equal,\nutility usage should increase. This dataset permits students to\nestimate the energy demand and make forecasts for future months, as\nwell as explore other relationships.\n\nThe dataset contains natural gas and electricity usage data for a\ngas-heated single-family residence in the Boston area from September\n1990 through May 1997, accompanied by monthly climatological data. \nThe dataset is useful for illustrating the concepts and techniques of\ncentral tendency, dispersion, elementary time series analysis,\ncorrelation, simple and multiple regression, and variable\ntransformations.", "download": "http://jse.amstat.org/datasets/utility.dat.txt", "filename": "utility", "name": "What Does It Take to Heat a New Room? ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" } ], "subcategory_name": "Other" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/airport.txt", + "description": "This dataset consists of all 135 large and medium sized air hubs in the United States as defined by the Federal Aviation Administration.", + "description_url": "http://jse.amstat.org/datasets/airport.txt", "download": "http://jse.amstat.org/datasets/airport.dat.txt", - "filename": "US-Airport-Statistics", + "filename": "airport", "name": " US Airport Statistics", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "columns": ["Airport", "City", "Scheduled departures", "Performed departures", "Enplaned passengers", "Enplaned revenue tons of freight", "Enplaned revenue tons of mail"] }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data is a set of 50000 (1.3 MB ) observations containing roughly 2 minutes of traffic from the one hour, larger \ndec-pkt-1.tcp file used in the paper. The larger file can be accessed from the author's web page or from its source. With \nonly 50000 observations, the data set ", "download": "http://jse.amstat.org/datasets/packetdata.dat.txt", "filename": "packetdata", "name": "packetdata", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway as the sampling unit.\n\nVARIABLE DESCRIPTIONS:\nInterstate # Columns 1-2\nNumber of states Columns 4-5\nApproximate miles Columns 7-10\nSouthern or Western end Columns 12-34\nNorthern or Eastern end Columns 36-57", "download": "http://jse.amstat.org/datasets/ushighway1.dat.txt", "filename": "ushighway1", "name": " US INTERSTATE SYSTEM I", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway/state combination \nas the sampling unit. Three principal cities through which the\nhighway runs are given for each state. Therefore, a single highway \nmay have several observations, one observation for each state.", "download": "http://jse.amstat.org/datasets/ushighway2.dat.txt", "filename": "ushighway2", "name": "US INTERSTATE SYSTEM II", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset consists of a listing of all US 3-digit interstate\nhighways (spurs and connecters), treating the highway/state\ncombination as the sampling unit.", "download": "http://jse.amstat.org/datasets/ushighway3.dat.txt", "filename": "ushighway3", "name": "US INTERSTATE SYSTEM III", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" } ], "subcategory_name": "Travel" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For each of the forty largest countries in the world (according to 1990\npopulation figures), data are given for the country's life expectancy\nat birth, number of people per television set, and number of people per\nphysician.", "download": "http://jse.amstat.org/datasets/televisions.dat.txt", "filename": "televisions", "name": "Televisions, Physicians, and Life Expectancy", - "number_format": 31, - "remove_quotes": true, "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Population" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, + "description": "Specifications are given for 428 new vehicles for the 2004 year. The variables recorded include price, measurements relating to the size of the vehicle, and fuel efficiency.", + "description_url": "http://jse.amstat.org/datasets/04cars.txt", + "download": "http://jse.amstat.org/datasets/04cars.dat.txt", + "filename": "04cars", + "name": "2004 New Car and Truck Data", + "separator": "SPACE", + "columns": ["Vehicle Name", "Sports Car? (1=yes, 0=no)", "Sport Utility Vehicle? (1=yes, 0=no)", "Wagon? (1=yes, 0=no)", "Minivan?(1=yes, 0=no)", "Pickup? (1=yes, 0=no)", "All-Wheel Drive? (1=yes, 0=no)", "Rear-Wheel Drive? (1=yes, 0=no)", "Suggested Retail Price (U.S. Dollars)", "Dealer Cost (or 'invoice price') (U.S. Dollars)", "Engine Size (liters)", "Number of Cylinders (=-1 if rotary engine)", "Horsepower", "City Miles Per Gallon", "Highway Miles Per Gallon", "Weight (Pounds)", "Wheel Base (inches)", "Length (inches)", "Width (inches)"] + }, + { + "description": "The data set contains the results of a calibration experiment designed to estimate volume of oysters and to compare two computer vision systems (2-D vs. 3-D) for classification of oysters based on their image size in number of pixels. ", + "description_url": "http://jse.amstat.org/datasets/30oysters.dat.txt", + "download": "http://jse.amstat.org/datasets/30oysters.dat.txt", + "filename": "30oysters", + "name": "Oyster Volume Estimation Data", + "separator": "SPACE", + "columns": ["Oyster ID", "Oyster weight (g)", "Oyster volume (cc)", "Oyster size information from the 3-D imaging system (in volume pixels)", "Oyster size information from the 2-D imaging system (in pixels)"] + }, + { "description": "The data set gives a random sample of the length of visits of users entering the msnbc.com web site during September 28, 1999.\nThe length of the visit is an estimate of the total number of clicks or pages seen by each user and is based on web server \nlogs, thus it counts only pages recorded by the server. Pages cached in the user's browser or in a cache proxy server are \nunknown. The data set used in the paper is much larger than the one made available here but that larger data set is also \navailable in a page cited in the references. ", "download": "http://jse.amstat.org/datasets/msnbclength.dat.txt", "filename": "msnbclength", "name": "Internet Data Analysis for Undergrad Curriculum", - "number_format": 31, - "remove_quotes": true, - "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "," }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The video lottery terminal dataset contains observations on the three\nwindows of an electronic slot machine for 345 plays together with the\nprize paid out for each play. The prize payout distribution is so\nbadly skewed that confidence intervals for expected payout based on the\ncentral limit theorem are not accurate. The dataset can be used at the\ngraduate or upper undergraduate level to illustrate parametric\nbootstrapping. The dataset can also be used in a graduate course to\nillustrate tests of independence for two and three-way contingency\ntables involving random zeroes, or these tables may be collapsed and\nused as examples in an introductory course.", "download": "http://jse.amstat.org/datasets/vlt.dat.txt", "filename": "vlt_", "name": "Video Lottery Terminal Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" } ], "subcategory_name": "Technology" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "For each U.S. Senator, his or her votes on whether to remove President\nClinton on each of the two articles of impeachment (plus a summary\nvariable representing each Senator's number of \"guilty\" votes) are\nprovided, as well as each Senator's values on several variables that\ncould be predictive of vote (e.g., Senator's degree of conservatism,\nhow well Clinton did in the Senator's state in the 1996 Presidential\nelection).", "download": "http://jse.amstat.org/datasets/impeach.dat.txt", "filename": "impeach", "name": " U.S. Senate Votes on Clinton Removal", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "The data consist of the numbers of days served in office for the 43 \nPresidents of the United States as of 4 February 2004. ", "download": "http://jse.amstat.org/datasets/outlier.dat.txt", "filename": "outlier", "name": "A Dataset That Is 44% Outliers", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" } ], "subcategory_name": "Politics" }, { "datasets": [ { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "This dataset contains variables that address the relationship between \npublic school expenditures and academic performance, as measured by the \nSAT. \n\nVARIABLE DESCRIPTIONS: \nColumns\n 1 - 16 Name of state (in quotation marks)\n18 - 22 Current expenditure per pupil in average daily attendance \n in public elementary and secondary schools, 1994-95 \n (in thousands of dollars)\n24 - 27 Average pupil/teacher ratio in public elementary and \n secondary schools, Fall 1994\n29 - 34 Estimated average annual salary of teachers in public \n elementary and secondary schools, 1994-95 (in thousands of \n dollars)\n36 - 37 Percentage of all eligible students taking the SAT, 1994-95\n39 - 41 Average verbal SAT score, 1994-95\n43 - 45 Average math SAT score, 1994-95\n47 - 50 Average total score on the SAT, 1994-95 ", "download": "http://jse.amstat.org/datasets/sat.dat.txt", "filename": "sat", "name": "Getting What You Pay For: The Debate Over Equity in Public School Expenditures ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "SPACE" }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, "description": "Data are from the 1995 U.S. News report on American colleges and\nuniversities. They include demographic information on tuition,\nroom & board costs, SAT or ACT scores, application/acceptance\nrates, student/faculty ratio, graduation rate, and more. The\ndataset is used for the 1995 Data Analysis Exposition, sponsored\nby the Statistical Graphics Section of the American Statistical\nAssociation. See the file colleges.txt for more information on \nthe Exposition.", "download": "http://jse.amstat.org/datasets/usnews.dat.txt", "filename": "usnews", "name": "U.S. News College data", - "number_format": 31, - "remove_quotes": true, - "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "separator": "," } ], "subcategory_name": "Education" } ] } ], "collection_name": "JSEDataArchive" } diff --git a/src/backend/datasources/DatasetHandler.cpp b/src/backend/datasources/DatasetHandler.cpp index c1b57127d..20a46dba0 100644 --- a/src/backend/datasources/DatasetHandler.cpp +++ b/src/backend/datasources/DatasetHandler.cpp @@ -1,313 +1,308 @@ /*************************************************************************** File : DatasetHandler.cpp Project : LabPlot Description : Processes a dataset's metadata file -------------------------------------------------------------------- Copyright : (C) 2019 Kovacs Ferencz (kferike98@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "backend/datasources/filters/AsciiFilter.h" #include "backend/datasources/DatasetHandler.h" #include #include #include #include #include #include #include #include #include #include #include /*! \class DatasetHandler \brief Provides functionality to process a metadata file of a dataset, configure a spreadsheet and filter based on it, download the dataset and load it into the spreadsheet. \ingroup datasources */ DatasetHandler::DatasetHandler(Spreadsheet* spreadsheet) : m_spreadsheet(spreadsheet), m_filter(new AsciiFilter), m_object(nullptr), m_downloadManager(new QNetworkAccessManager) { connect(m_downloadManager, &QNetworkAccessManager::finished, this, &DatasetHandler::downloadFinished); connect(this, &DatasetHandler::downloadCompleted, this, &DatasetHandler::processDataset); } DatasetHandler::~DatasetHandler() { delete m_downloadManager; delete m_filter; } /** * @brief Initiates processing the metadata file,, located at the given path, belonging to a dataset. * @param path the path to the metadata file */ void DatasetHandler::processMetadata(const QJsonObject& object) { m_object = new QJsonObject(object); qDebug("Start processing dataset..."); if(!m_object->isEmpty()) { configureFilter(); configureSpreadsheet(); prepareForDataset(); } } /** * @brief Marks the metadata file being invalid by setting the value of a flag, also pops up a messagebox. */ void DatasetHandler::markMetadataAsInvalid() { m_invalidMetadataFile = true; QMessageBox::critical(0, "Invalid metadata file", "The metadata file for the choosen dataset is invalid!"); } /** * @brief Configures the filter, that will be used later, based on the metadata file. */ void DatasetHandler::configureFilter() { - qDebug("Configure filter"); + //set some default values common to many datasets + m_filter->setNumberFormat(QLocale::C); + m_filter->setSkipEmptyParts(true); + m_filter->setHeaderEnabled(false); + + //read properties specified in the dataset description if(!m_object->isEmpty()) { if(m_object->contains("separator")) m_filter->setSeparatingCharacter(m_object->value("separator").toString()); - else - markMetadataAsInvalid(); if(m_object->contains("comment_character")) m_filter->setCommentCharacter(m_object->value("comment_character").toString()); - else - markMetadataAsInvalid(); if(m_object->contains("create_index_column")) m_filter->setCreateIndexEnabled(m_object->value("create_index_column").toBool()); - else - markMetadataAsInvalid(); if(m_object->contains("skip_empty_parts")) m_filter->setSkipEmptyParts(m_object->value("skip_empty_parts").toBool()); - else - markMetadataAsInvalid(); if(m_object->contains("simplify_whitespaces")) m_filter->setSimplifyWhitespacesEnabled(m_object->value("simplify_whitespaces").toBool()); - else - markMetadataAsInvalid(); if(m_object->contains("remove_quotes")) m_filter->setRemoveQuotesEnabled(m_object->value("remove_quotes").toBool()); - else - markMetadataAsInvalid(); if(m_object->contains("use_first_row_for_vectorname")) m_filter->setHeaderEnabled(m_object->value("use_first_row_for_vectorname").toBool()); - else - markMetadataAsInvalid(); if(m_object->contains("number_format")) m_filter->setNumberFormat(QLocale::Language(m_object->value("number_format").toInt())); - else - markMetadataAsInvalid(); if(m_object->contains("DateTime_format")) m_filter->setDateTimeFormat(m_object->value("DateTime_format").toString()); - else - markMetadataAsInvalid(); + if(m_object->contains("columns")) { + const QJsonArray& columnsArray = m_object->value("columns").toArray(); + QStringList columnNames; + for (int i = 0; i < columnsArray.size(); ++i) + columnNames << columnsArray[i].toString(); + + m_filter->setVectorNames(columnNames); + } } else { qDebug() << "Empty object"; markMetadataAsInvalid(); } } /** * @brief Configures the spreadsheet based on the metadata file. */ void DatasetHandler::configureSpreadsheet() { qDebug("Conf spreadsheet"); if(!m_object->isEmpty()) { if(m_object->contains("name")) m_spreadsheet->setName( m_object->value("name").toString()); else markMetadataAsInvalid(); if(m_object->contains("description")) m_spreadsheet->setComment(m_object->value("description").toString()); } else { markMetadataAsInvalid(); } } /** * @brief Extracts the download URL of the dataset and initiates the process of download. */ void DatasetHandler::prepareForDataset() { qDebug("Start downloading dataset"); if(!m_object->isEmpty()) { if(m_object->contains("download")) { const QString& url = m_object->value("download").toString(); const QUrl downloadUrl = QUrl::fromEncoded(url.toLocal8Bit()); doDownload(url); } else { QMessageBox::critical(0, i18n("Invalid metadata file"), i18n("There is no download URL present in the metadata file!")); } } else { markMetadataAsInvalid(); } } /** * @brief Starts the download of the dataset. * @param url the download URL of the dataset */ void DatasetHandler::doDownload(const QUrl& url) { qDebug("Download request"); QNetworkRequest request(url); m_currentDownload = m_downloadManager->get(request); connect(m_currentDownload, &QNetworkReply::downloadProgress, [this] (qint64 bytesReceived, qint64 bytesTotal) { double progress; if (bytesTotal == -1) progress = 0; else progress = 100 * (static_cast(bytesReceived) / static_cast(bytesTotal)); qDebug() << "Progress: " << progress; emit downloadProgress(progress); }); } /** * @brief Called when the download of the dataset is finished. */ void DatasetHandler::downloadFinished(QNetworkReply* reply) { qDebug("Download finished"); const QUrl& url = reply->url(); if (reply->error()) { qDebug("Download of %s failed: %s\n", url.toEncoded().constData(), qPrintable(reply->errorString())); } else { if (isHttpRedirect(reply)) { qDebug("Request was redirected.\n"); } else { QString filename = saveFileName(url); if (saveToDisk(filename, reply)) { qDebug("Download of %s succeeded (saved to %s)\n", url.toEncoded().constData(), qPrintable(filename)); m_fileName = filename; emit downloadCompleted(); } } } m_currentDownload = nullptr; reply->deleteLater(); } /** * @brief Checks whether the GET request was redirected or not. */ bool DatasetHandler::isHttpRedirect(QNetworkReply* reply) { const int statusCode = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt(); // TODO enum/defines for status codes ? return statusCode == 301 || statusCode == 302 || statusCode == 303 || statusCode == 305 || statusCode == 307 || statusCode == 308; } /** * @brief Returns the name and path of the file that will contain the content of the reply (based on the URL). * @param url */ QString DatasetHandler::saveFileName(const QUrl& url) { const QString path = url.path(); //get the extension of the downloaded file const QString downloadFileName = QFileInfo(path).fileName(); int lastIndex = downloadFileName.lastIndexOf("."); const QString fileExtension = lastIndex >= 0 ? downloadFileName.right(downloadFileName.length() - lastIndex) : ""; QString basename = m_object->value("filename").toString() + fileExtension; if (basename.isEmpty()) basename = "download"; QDir downloadDir(QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + QLatin1String("/datasets_local/")); if (!downloadDir.exists()) downloadDir.mkdir(downloadDir.path()); QString fileName = downloadDir.path() + QLatin1Char('/') + basename; QFileInfo fileInfo (fileName); if (QFile::exists(fileName)) { if(fileInfo.lastModified().addDays(1) < QDateTime::currentDateTime()){ QFile removeFile (fileName); removeFile.remove(); } else { qDebug() << "Dataset file already exists, no need to download it again"; } } return fileName; } /** * @brief Saves the content of the network reply to the given path under the given name. */ bool DatasetHandler::saveToDisk(const QString& filename, QIODevice* data) { QFile file(filename); if (!file.open(QIODevice::WriteOnly)) { qDebug("Could not open %s for writing: %s\n", qPrintable(filename), qPrintable(file.errorString())); return false; } file.write(data->readAll()); file.close(); return true; } /** * @brief Processes the downloaded dataset with the help of the already configured filter. */ void DatasetHandler::processDataset() { m_filter->readDataFromFile(m_fileName, m_spreadsheet); configureColumns(); } /** * @brief Configures the columns of the spreadsheet, based on the metadata file. */ void DatasetHandler::configureColumns() { if(!m_object->isEmpty()) { int index = 0; const int columnsCount = m_spreadsheet->columnCount(); while(m_object->contains(i18n("column_description_%1", index)) && (index < columnsCount)) { m_spreadsheet->column(index)->setComment(m_object->value(i18n("column_description_%1", index)).toString()); ++index; } } else { qDebug("Invalid Json document"); } } diff --git a/src/backend/datasources/filters/AsciiFilter.cpp b/src/backend/datasources/filters/AsciiFilter.cpp index 56cbd2f92..506e67c7c 100644 --- a/src/backend/datasources/filters/AsciiFilter.cpp +++ b/src/backend/datasources/filters/AsciiFilter.cpp @@ -1,2664 +1,2667 @@ /*************************************************************************** File : AsciiFilter.cpp Project : LabPlot Description : ASCII I/O-filter -------------------------------------------------------------------- Copyright : (C) 2009-2018 Stefan Gerlach (stefan.gerlach@uni.kn) Copyright : (C) 2009-2019 Alexander Semke (alexander.semke@web.de) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "backend/datasources/LiveDataSource.h" #include "backend/core/column/Column.h" #include "backend/core/Project.h" #include "backend/datasources/filters/AsciiFilter.h" #include "backend/datasources/filters/AsciiFilterPrivate.h" #include "backend/worksheet/plots/cartesian/CartesianPlot.h" #include "backend/worksheet/plots/cartesian/XYCurve.h" #include "backend/lib/macros.h" #include "backend/lib/trace.h" #ifdef HAVE_MQTT #include "backend/datasources/MQTTClient.h" #include "backend/datasources/MQTTTopic.h" #endif #include #include #include #if defined(Q_OS_LINUX) || defined(Q_OS_BSD4) #include #include #endif /*! \class AsciiFilter \brief Manages the import/export of data organized as columns (vectors) from/to an ASCII-file. \ingroup datasources */ AsciiFilter::AsciiFilter() : AbstractFileFilter(Ascii), d(new AsciiFilterPrivate(this)) {} AsciiFilter::~AsciiFilter() = default; /*! reads the content of the device \c device. */ void AsciiFilter::readDataFromDevice(QIODevice& device, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode, int lines) { d->readDataFromDevice(device, dataSource, importMode, lines); } void AsciiFilter::readFromLiveDeviceNotFile(QIODevice &device, AbstractDataSource* dataSource) { d->readFromLiveDevice(device, dataSource); } qint64 AsciiFilter::readFromLiveDevice(QIODevice& device, AbstractDataSource* dataSource, qint64 from) { return d->readFromLiveDevice(device, dataSource, from); } #ifdef HAVE_MQTT QVector AsciiFilter::preview(const QString& message) { return d->preview(message); } /*! reads the content of a message received by the topic. */ void AsciiFilter::readMQTTTopic(const QString& message, AbstractDataSource* dataSource) { d->readMQTTTopic(message, dataSource); } /*! Returns the statistical data, that the MQTTTopic needs for the will message. */ QString AsciiFilter::MQTTColumnStatistics(const MQTTTopic* topic) const { return d->MQTTColumnStatistics(topic); } /*! Returns the column mode of the last column (the value column of the MQTTTopic). */ AbstractColumn::ColumnMode AsciiFilter::MQTTColumnMode() const { return d->MQTTColumnMode(); } /*! After the MQTTTopic is loaded, prepares the filter for reading. */ void AsciiFilter::setPreparedForMQTT(bool prepared, MQTTTopic* topic, const QString& separator) { d->setPreparedForMQTT(prepared, topic, separator); } #endif /*! returns the separator used by the filter. */ QString AsciiFilter::separator() const { return d->separator(); } /*! returns the separator used by the filter. */ int AsciiFilter::isPrepared() { return d->isPrepared(); } /*! reads the content of the file \c fileName. */ void AsciiFilter::readDataFromFile(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) { d->readDataFromFile(fileName, dataSource, importMode); } QVector AsciiFilter::preview(const QString& fileName, int lines) { return d->preview(fileName, lines); } QVector AsciiFilter::preview(QIODevice& device) { return d->preview(device); } /*! reads the content of the file \c fileName to the data source \c dataSource. */ //void AsciiFilter::read(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) { // d->read(fileName, dataSource, importMode); //} /*! writes the content of the data source \c dataSource to the file \c fileName. */ void AsciiFilter::write(const QString& fileName, AbstractDataSource* dataSource) { d->write(fileName, dataSource); // emit() } /*! loads the predefined filter settings for \c filterName */ void AsciiFilter::loadFilterSettings(const QString& filterName) { Q_UNUSED(filterName); } /*! saves the current settings as a new filter with the name \c filterName */ void AsciiFilter::saveFilterSettings(const QString& filterName) const { Q_UNUSED(filterName); } /*! returns the list with the names of all saved (system wide or user defined) filter settings. */ QStringList AsciiFilter::predefinedFilters() { return QStringList(); } /*! returns the list of all predefined separator characters. */ QStringList AsciiFilter::separatorCharacters() { return (QStringList() << "auto" << "TAB" << "SPACE" << "," << ";" << ":" << ",TAB" << ";TAB" << ":TAB" << ",SPACE" << ";SPACE" << ":SPACE" << "2xSPACE" << "3xSPACE" << "4xSPACE" << "2xTAB"); } /*! returns the list of all predefined comment characters. */ QStringList AsciiFilter::commentCharacters() { return (QStringList() << "#" << "!" << "//" << "+" << "c" << ":" << ";"); } /*! returns the list of all predefined data types. */ QStringList AsciiFilter::dataTypes() { const QMetaObject& mo = AbstractColumn::staticMetaObject; const QMetaEnum& me = mo.enumerator(mo.indexOfEnumerator("ColumnMode")); QStringList list; for (int i = 0; i <= 100; ++i) // me.keyCount() does not work because we have holes in enum if (me.valueToKey(i)) list << me.valueToKey(i); return list; } QString AsciiFilter::fileInfoString(const QString& fileName) { QString info(i18n("Number of columns: %1", AsciiFilter::columnNumber(fileName))); info += QLatin1String("
"); info += i18n("Number of lines: %1", AsciiFilter::lineNumber(fileName)); return info; } /*! returns the number of columns in the file \c fileName. */ int AsciiFilter::columnNumber(const QString& fileName, const QString& separator) { KFilterDev device(fileName); if (!device.open(QIODevice::ReadOnly)) { DEBUG("Could not open file " << fileName.toStdString() << " for determining number of columns"); return -1; } QString line = device.readLine(); line.remove(QRegExp("[\\n\\r]")); QStringList lineStringList; if (separator.length() > 0) lineStringList = line.split(separator); else lineStringList = line.split(QRegExp("\\s+")); DEBUG("number of columns : " << lineStringList.size()); return lineStringList.size(); } size_t AsciiFilter::lineNumber(const QString& fileName) { KFilterDev device(fileName); if (!device.open(QIODevice::ReadOnly)) { DEBUG("Could not open file " << fileName.toStdString() << " to determine number of lines"); return 0; } // if (!device.canReadLine()) // return -1; size_t lineCount = 0; #if defined(Q_OS_LINUX) || defined(Q_OS_BSD4) //on linux and BSD use wc, if available, which is much faster than counting lines in the file if (device.compressionType() == KCompressionDevice::None && !QStandardPaths::findExecutable(QLatin1String("wc")).isEmpty()) { QProcess wc; wc.start(QLatin1String("wc"), QStringList() << QLatin1String("-l") << fileName); size_t lineCount = 0; while (wc.waitForReadyRead()) { QString line(wc.readLine()); // wc on macOS has leading spaces: use SkipEmptyParts lineCount = line.split(' ', QString::SkipEmptyParts)[0].toInt(); } return lineCount; } #endif while (!device.atEnd()) { device.readLine(); lineCount++; } return lineCount; } /*! returns the number of lines in the device \c device and 0 if sequential. resets the position to 0! */ size_t AsciiFilter::lineNumber(QIODevice& device) const { if (device.isSequential()) return 0; // if (!device.canReadLine()) // DEBUG("WARNING in AsciiFilter::lineNumber(): device cannot 'readLine()' but using it anyway."); size_t lineCount = 0; device.seek(0); if (d->readingFile) lineCount = lineNumber(d->readingFileName); else { while (!device.atEnd()) { device.readLine(); lineCount++; } } device.seek(0); return lineCount; } void AsciiFilter::setCommentCharacter(const QString& s) { d->commentCharacter = s; } QString AsciiFilter::commentCharacter() const { return d->commentCharacter; } void AsciiFilter::setSeparatingCharacter(const QString& s) { d->separatingCharacter = s; } QString AsciiFilter::separatingCharacter() const { return d->separatingCharacter; } void AsciiFilter::setDateTimeFormat(const QString &f) { d->dateTimeFormat = f; } QString AsciiFilter::dateTimeFormat() const { return d->dateTimeFormat; } void AsciiFilter::setNumberFormat(QLocale::Language lang) { d->numberFormat = lang; } QLocale::Language AsciiFilter::numberFormat() const { return d->numberFormat; } void AsciiFilter::setAutoModeEnabled(const bool b) { d->autoModeEnabled = b; } bool AsciiFilter::isAutoModeEnabled() const { return d->autoModeEnabled; } void AsciiFilter::setHeaderEnabled(const bool b) { d->headerEnabled = b; } bool AsciiFilter::isHeaderEnabled() const { return d->headerEnabled; } void AsciiFilter::setSkipEmptyParts(const bool b) { d->skipEmptyParts = b; } bool AsciiFilter::skipEmptyParts() const { return d->skipEmptyParts; } void AsciiFilter::setCreateIndexEnabled(bool b) { d->createIndexEnabled = b; } bool AsciiFilter::createIndexEnabled() const { return d->createIndexEnabled; } void AsciiFilter::setCreateTimestampEnabled(bool b) { d->createTimestampEnabled = b; } bool AsciiFilter::createTimestampEnabled() const { return d->createTimestampEnabled; } void AsciiFilter::setSimplifyWhitespacesEnabled(bool b) { d->simplifyWhitespacesEnabled = b; } bool AsciiFilter::simplifyWhitespacesEnabled() const { return d->simplifyWhitespacesEnabled; } void AsciiFilter::setNaNValueToZero(bool b) { if (b) d->nanValue = 0; else d->nanValue = std::numeric_limits::quiet_NaN(); } bool AsciiFilter::NaNValueToZeroEnabled() const { return (d->nanValue == 0); } void AsciiFilter::setRemoveQuotesEnabled(bool b) { d->removeQuotesEnabled = b; } bool AsciiFilter::removeQuotesEnabled() const { return d->removeQuotesEnabled; } void AsciiFilter::setVectorNames(const QString& s) { d->vectorNames.clear(); if (!s.simplified().isEmpty()) d->vectorNames = s.simplified().split(' '); } +void AsciiFilter::setVectorNames(QStringList list) { + d->vectorNames = list; +} QStringList AsciiFilter::vectorNames() const { return d->vectorNames; } QVector AsciiFilter::columnModes() { return d->columnModes; } void AsciiFilter::setStartRow(const int r) { d->startRow = r; } int AsciiFilter::startRow() const { return d->startRow; } void AsciiFilter::setEndRow(const int r) { d->endRow = r; } int AsciiFilter::endRow() const { return d->endRow; } void AsciiFilter::setStartColumn(const int c) { d->startColumn = c; } int AsciiFilter::startColumn() const { return d->startColumn; } void AsciiFilter::setEndColumn(const int c) { d->endColumn = c; } int AsciiFilter::endColumn() const { return d->endColumn; } //##################################################################### //################### Private implementation ########################## //##################################################################### AsciiFilterPrivate::AsciiFilterPrivate(AsciiFilter* owner) : q(owner) { } /*! * get a single line from device */ QStringList AsciiFilterPrivate::getLineString(QIODevice& device) { QString line; do { // skip comment lines in data lines if (!device.canReadLine()) DEBUG("WARNING in AsciiFilterPrivate::getLineString(): device cannot 'readLine()' but using it anyway."); // line = device.readAll(); line = device.readLine(); } while (!commentCharacter.isEmpty() && line.startsWith(commentCharacter)); line.remove(QRegExp("[\\n\\r]")); // remove any newline DEBUG("data line : \'" << line.toStdString() << '\''); QStringList lineStringList = line.split(m_separator, (QString::SplitBehavior)skipEmptyParts); //TODO: remove quotes here? if (simplifyWhitespacesEnabled) { for (int i = 0; i < lineStringList.size(); ++i) lineStringList[i] = lineStringList[i].simplified(); } QDEBUG("data line, parsed: " << lineStringList); return lineStringList; } /*! * returns -1 if the device couldn't be opened, 1 if the current read position in the device is at the end and 0 otherwise. */ int AsciiFilterPrivate::prepareDeviceToRead(QIODevice& device) { DEBUG("AsciiFilterPrivate::prepareDeviceToRead(): is sequential = " << device.isSequential() << ", can readLine = " << device.canReadLine()); if (!device.open(QIODevice::ReadOnly)) return -1; if (device.atEnd() && !device.isSequential()) // empty file return 1; ///////////////////////////////////////////////////////////////// // Parse the first line: // Determine the number of columns, create the columns and use (if selected) the first row to name them QString firstLine; // skip the comment lines first if (!commentCharacter.isEmpty()) { do { if (!device.canReadLine()) DEBUG("WARNING in AsciiFilterPrivate::prepareDeviceToRead(): device cannot 'readLine()' but using it anyway."); if (device.atEnd()) { DEBUG("device at end! Giving up."); if (device.isSequential()) break; else return 1; } firstLine = device.readLine(); } while (firstLine.startsWith(commentCharacter) || firstLine.simplified().isEmpty()); } // navigate to the line where we asked to start reading from DEBUG(" Skipping " << startRow - 1 << " lines"); for (int i = 0; i < startRow - 1; ++i) { if (!device.canReadLine()) DEBUG("WARNING in AsciiFilterPrivate::prepareDeviceToRead(): device cannot 'readLine()' but using it anyway."); if (device.atEnd()) { DEBUG("device at end! Giving up."); if (device.isSequential()) break; else return 1; } firstLine = device.readLine(); DEBUG(" line = " << firstLine.toStdString()); } DEBUG(" device position after first line and comments = " << device.pos()); firstLine.remove(QRegExp("[\\n\\r]")); // remove any newline if (removeQuotesEnabled) firstLine = firstLine.remove(QLatin1Char('"')); //TODO: this doesn't work, the split below introduces whitespaces again // if (simplifyWhitespacesEnabled) // firstLine = firstLine.simplified(); DEBUG("First line: \'" << firstLine.toStdString() << '\''); // determine separator and split first line QStringList firstLineStringList; if (separatingCharacter == "auto") { DEBUG("automatic separator"); QRegExp regExp("(\\s+)|(,\\s+)|(;\\s+)|(:\\s+)"); firstLineStringList = firstLine.split(regExp, (QString::SplitBehavior)skipEmptyParts); if (!firstLineStringList.isEmpty()) { int length1 = firstLineStringList.at(0).length(); if (firstLineStringList.size() > 1) m_separator = firstLine.mid(length1, 1); else m_separator = ' '; } } else { // use given separator // replace symbolic "TAB" with '\t' m_separator = separatingCharacter.replace(QLatin1String("2xTAB"), "\t\t", Qt::CaseInsensitive); m_separator = separatingCharacter.replace(QLatin1String("TAB"), "\t", Qt::CaseInsensitive); // replace symbolic "SPACE" with ' ' m_separator = m_separator.replace(QLatin1String("2xSPACE"), QLatin1String(" "), Qt::CaseInsensitive); m_separator = m_separator.replace(QLatin1String("3xSPACE"), QLatin1String(" "), Qt::CaseInsensitive); m_separator = m_separator.replace(QLatin1String("4xSPACE"), QLatin1String(" "), Qt::CaseInsensitive); m_separator = m_separator.replace(QLatin1String("SPACE"), QLatin1String(" "), Qt::CaseInsensitive); firstLineStringList = firstLine.split(m_separator, (QString::SplitBehavior)skipEmptyParts); } DEBUG("separator: \'" << m_separator.toStdString() << '\''); DEBUG("number of columns: " << firstLineStringList.size()); QDEBUG("first line: " << firstLineStringList); DEBUG("headerEnabled: " << headerEnabled); //optionally, remove potential spaces in the first line //TODO: this part should be obsolete actually if we do firstLine = firstLine.simplified(); above... if (simplifyWhitespacesEnabled) { for (int i = 0; i < firstLineStringList.size(); ++i) firstLineStringList[i] = firstLineStringList[i].simplified(); } //in GUI in AsciiOptionsWidget we start counting from 1, subtract 1 here to start from zero m_actualStartRow = startRow - 1; if (headerEnabled) { // use first line to name vectors vectorNames = firstLineStringList; QDEBUG("vector names =" << vectorNames); ++m_actualStartRow; } // set range to read if (endColumn == -1) { if (headerEnabled || vectorNames.size() == 0) endColumn = firstLineStringList.size(); // last column else //number of vector names provided in the import dialog (not more than the maximal number of columns in the file) endColumn = qMin(vectorNames.size(), firstLineStringList.size()); } if (endColumn < startColumn) m_actualCols = 0; else m_actualCols = endColumn - startColumn + 1; if (createIndexEnabled) { vectorNames.prepend(i18n("Index")); m_actualCols++; } //TEST: readline-seek-readline fails /* qint64 testpos = device.pos(); DEBUG("read data line @ pos " << testpos << " : " << device.readLine().toStdString()); device.seek(testpos); testpos = device.pos(); DEBUG("read data line again @ pos " << testpos << " : " << device.readLine().toStdString()); */ ///////////////////////////////////////////////////////////////// // parse first data line to determine data type for each column // if the first line was already parsed as the header, read the next line if (headerEnabled && !device.isSequential()) firstLineStringList = getLineString(device); columnModes.resize(m_actualCols); int col = 0; if (createIndexEnabled) { columnModes[0] = AbstractColumn::Integer; col = 1; } for (auto& valueString : firstLineStringList) { // parse columns available in first data line if (simplifyWhitespacesEnabled) valueString = valueString.simplified(); if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); if (col == m_actualCols) break; columnModes[col++] = AbstractFileFilter::columnMode(valueString, dateTimeFormat, numberFormat); } // parsing more lines to better determine data types for (unsigned int i = 0; i < m_dataTypeLines; ++i) { if (device.atEnd()) // EOF reached break; firstLineStringList = getLineString(device); createIndexEnabled ? col = 1 : col = 0; for (auto& valueString : firstLineStringList) { if (simplifyWhitespacesEnabled) valueString = valueString.simplified(); if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); if (col == m_actualCols) break; AbstractColumn::ColumnMode mode = AbstractFileFilter::columnMode(valueString, dateTimeFormat, numberFormat); // numeric: integer -> numeric if (mode == AbstractColumn::Numeric && columnModes[col] == AbstractColumn::Integer) columnModes[col] = mode; // text: non text -> text if (mode == AbstractColumn::Text && columnModes[col] != AbstractColumn::Text) columnModes[col] = mode; col++; } } QDEBUG("column modes = " << columnModes); // ATTENTION: This resets the position in the device to 0 m_actualRows = (int)q->lineNumber(device); const int actualEndRow = (endRow == -1 || endRow > m_actualRows) ? m_actualRows : endRow; if (actualEndRow > m_actualStartRow) m_actualRows = actualEndRow - m_actualStartRow; else m_actualRows = 0; DEBUG("start/end column: " << startColumn << ' ' << endColumn); DEBUG("start/end row: " << m_actualStartRow << ' ' << actualEndRow); DEBUG("actual cols/rows (w/o header): " << m_actualCols << ' ' << m_actualRows); if (m_actualRows == 0 && !device.isSequential()) return 1; return 0; } /*! reads the content of the file \c fileName to the data source \c dataSource. Uses the settings defined in the data source. */ void AsciiFilterPrivate::readDataFromFile(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) { DEBUG("AsciiFilterPrivate::readDataFromFile(): fileName = \'" << fileName.toStdString() << "\', dataSource = " << dataSource << ", mode = " << ENUM_TO_STRING(AbstractFileFilter, ImportMode, importMode)); //dirty hack: set readingFile and readingFileName in order to know in lineNumber(QIODevice) //that we're reading from a file and to benefit from much faster wc on linux //TODO: redesign the APIs and remove this later readingFile = true; readingFileName = fileName; KFilterDev device(fileName); readDataFromDevice(device, dataSource, importMode); readingFile = false; } qint64 AsciiFilterPrivate::readFromLiveDevice(QIODevice& device, AbstractDataSource* dataSource, qint64 from) { DEBUG("AsciiFilterPrivate::readFromLiveDevice(): bytes available = " << device.bytesAvailable() << ", from = " << from); if (device.bytesAvailable() <= 0) { DEBUG(" No new data available"); return 0; } //TODO: may be also a matrix? auto* spreadsheet = dynamic_cast(dataSource); if (spreadsheet->sourceType() != LiveDataSource::SourceType::FileOrPipe) if (device.isSequential() && device.bytesAvailable() < (int)sizeof(quint16)) return 0; if (!m_prepared) { DEBUG(" Preparing .."); switch (spreadsheet->sourceType()) { case LiveDataSource::SourceType::FileOrPipe: { const int deviceError = prepareDeviceToRead(device); if (deviceError != 0) { DEBUG(" Device error = " << deviceError); return 0; } break; } case LiveDataSource::SourceType::NetworkTcpSocket: case LiveDataSource::SourceType::NetworkUdpSocket: case LiveDataSource::SourceType::LocalSocket: case LiveDataSource::SourceType::SerialPort: m_actualRows = 1; if (createIndexEnabled) { m_actualCols = 2; columnModes << AbstractColumn::Integer << AbstractColumn::Numeric; vectorNames << i18n("Index") << i18n("Value"); } else { m_actualCols = 1; columnModes << AbstractColumn::Numeric; vectorNames << i18n("Value"); } QDEBUG(" vector names = " << vectorNames); break; case LiveDataSource::SourceType::MQTT: break; } // prepare import for spreadsheet spreadsheet->setUndoAware(false); spreadsheet->resize(AbstractFileFilter::Replace, vectorNames, m_actualCols); //columns in a file data source don't have any manual changes. //make the available columns undo unaware and suppress the "data changed" signal. //data changes will be propagated via an explicit Column::setChanged() call once new data was read. for (int i = 0; i < spreadsheet->childCount(); i++) { spreadsheet->child(i)->setUndoAware(false); spreadsheet->child(i)->setSuppressDataChangedSignal(true); } int keepNValues = spreadsheet->keepNValues(); if (keepNValues == 0) spreadsheet->setRowCount(m_actualRows > 1 ? m_actualRows : 1); else { spreadsheet->setRowCount(keepNValues); m_actualRows = keepNValues; } m_dataContainer.resize(m_actualCols); DEBUG(" data source resized to col: " << m_actualCols); DEBUG(" data source rowCount: " << spreadsheet->rowCount()); DEBUG(" Setting data .."); for (int n = 0; n < m_actualCols; ++n) { // data() returns a void* which is a pointer to any data type (see ColumnPrivate.cpp) spreadsheet->child(n)->setColumnMode(columnModes[n]); switch (columnModes[n]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } DEBUG(" Prepared!"); } qint64 bytesread = 0; #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportTotal: "); #endif LiveDataSource::ReadingType readingType; if (!m_prepared) { readingType = LiveDataSource::ReadingType::TillEnd; } else { //we have to read all the data when reading from end //so we set readingType to TillEnd if (spreadsheet->readingType() == LiveDataSource::ReadingType::FromEnd) readingType = LiveDataSource::ReadingType::TillEnd; //if we read the whole file we just start from the beginning of it //and read till end else if (spreadsheet->readingType() == LiveDataSource::ReadingType::WholeFile) readingType = LiveDataSource::ReadingType::TillEnd; else readingType = spreadsheet->readingType(); } DEBUG(" Reading type = " << ENUM_TO_STRING(LiveDataSource, ReadingType, readingType)); //move to the last read position, from == total bytes read //since the other source types are sequential we cannot seek on them if (spreadsheet->sourceType() == LiveDataSource::SourceType::FileOrPipe) device.seek(from); //count the new lines, increase actualrows on each //now we read all the new lines, if we want to use sample rate //then here we can do it, if we have actually sample rate number of lines :-? int newLinesForSampleSizeNotTillEnd = 0; int newLinesTillEnd = 0; QVector newData; if (readingType != LiveDataSource::ReadingType::TillEnd) newData.resize(spreadsheet->sampleSize()); int newDataIdx = 0; { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportReadingFromFile: "); #endif DEBUG(" source type = " << ENUM_TO_STRING(LiveDataSource, SourceType, spreadsheet->sourceType())); while (!device.atEnd()) { if (readingType != LiveDataSource::ReadingType::TillEnd) { switch (spreadsheet->sourceType()) { // different sources need different read methods case LiveDataSource::SourceType::LocalSocket: newData[newDataIdx++] = device.readAll(); break; case LiveDataSource::SourceType::NetworkUdpSocket: newData[newDataIdx++] = device.read(device.bytesAvailable()); break; case LiveDataSource::SourceType::FileOrPipe: newData.push_back(device.readLine()); break; case LiveDataSource::SourceType::NetworkTcpSocket: //TODO: check serial port case LiveDataSource::SourceType::SerialPort: newData[newDataIdx++] = device.read(device.bytesAvailable()); break; case LiveDataSource::SourceType::MQTT: break; } } else { // ReadingType::TillEnd switch (spreadsheet->sourceType()) { // different sources need different read methods case LiveDataSource::SourceType::LocalSocket: newData.push_back(device.readAll()); break; case LiveDataSource::SourceType::NetworkUdpSocket: newData.push_back(device.read(device.bytesAvailable())); break; case LiveDataSource::SourceType::FileOrPipe: newData.push_back(device.readLine()); break; case LiveDataSource::SourceType::NetworkTcpSocket: //TODO: check serial port case LiveDataSource::SourceType::SerialPort: newData.push_back(device.read(device.bytesAvailable())); break; case LiveDataSource::SourceType::MQTT: break; } } newLinesTillEnd++; if (readingType != LiveDataSource::ReadingType::TillEnd) { newLinesForSampleSizeNotTillEnd++; //for Continuous reading and FromEnd we read sample rate number of lines if possible //here TillEnd and Whole file behave the same if (newLinesForSampleSizeNotTillEnd == spreadsheet->sampleSize()) break; } } QDEBUG(" data read: " << newData); } //now we reset the readingType if (spreadsheet->readingType() == LiveDataSource::ReadingType::FromEnd) readingType = spreadsheet->readingType(); //we had less new lines than the sample size specified if (readingType != LiveDataSource::ReadingType::TillEnd) QDEBUG(" Removed empty lines: " << newData.removeAll(QString())); //back to the last read position before counting when reading from files if (spreadsheet->sourceType() == LiveDataSource::SourceType::FileOrPipe) device.seek(from); const int spreadsheetRowCountBeforeResize = spreadsheet->rowCount(); int currentRow = 0; // indexes the position in the vector(column) int linesToRead = 0; int keepNValues = spreadsheet->keepNValues(); DEBUG(" Increase row count. keepNValues = " << keepNValues); if (m_prepared) { //increase row count if we don't have a fixed size //but only after the preparation step if (keepNValues == 0) { DEBUG(" keep All values"); if (readingType != LiveDataSource::ReadingType::TillEnd) m_actualRows += qMin(newData.size(), spreadsheet->sampleSize()); else { //we don't increase it if we reread the whole file, we reset it if (!(spreadsheet->readingType() == LiveDataSource::ReadingType::WholeFile)) m_actualRows += newData.size(); else m_actualRows = newData.size(); } //appending if (spreadsheet->readingType() == LiveDataSource::ReadingType::WholeFile) linesToRead = m_actualRows; else linesToRead = m_actualRows - spreadsheetRowCountBeforeResize; } else { // fixed size DEBUG(" keep " << keepNValues << " values"); if (readingType == LiveDataSource::ReadingType::TillEnd) { //we had more lines than the fixed size, so we read m_actualRows number of lines if (newLinesTillEnd > m_actualRows) { linesToRead = m_actualRows; //TODO after reading we should skip the next data lines //because it's TillEnd actually } else linesToRead = newLinesTillEnd; } else { //we read max sample size number of lines when the reading mode //is ContinuouslyFixed or FromEnd, WholeFile is disabled linesToRead = qMin(spreadsheet->sampleSize(), newLinesTillEnd); } } if (linesToRead == 0) return 0; } else // not prepared linesToRead = newLinesTillEnd; DEBUG(" lines to read = " << linesToRead); DEBUG(" actual rows (w/o header) = " << m_actualRows); //TODO // if (spreadsheet->sourceType() == LiveDataSource::SourceType::FileOrPipe || spreadsheet->sourceType() == LiveDataSource::SourceType::NetworkUdpSocket) { // if (m_actualRows < linesToRead) { // DEBUG(" SET lines to read to " << m_actualRows); // linesToRead = m_actualRows; // } // } //new rows/resize columns if we don't have a fixed size //TODO if the user changes this value..m_resizedToFixedSize..setResizedToFixedSize if (keepNValues == 0) { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportResizing: "); #endif if (spreadsheet->rowCount() < m_actualRows) spreadsheet->setRowCount(m_actualRows); if (!m_prepared) currentRow = 0; else { // indexes the position in the vector(column) if (spreadsheet->readingType() == LiveDataSource::ReadingType::WholeFile) currentRow = 0; else currentRow = spreadsheetRowCountBeforeResize; } // if we have fixed size, we do this only once in preparation, here we can use // m_prepared and we need something to decide whether it has a fixed size or increasing for (int n = 0; n < m_actualCols; ++n) { // data() returns a void* which is a pointer to any data type (see ColumnPrivate.cpp) switch (columnModes[n]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } } else { // fixed size //when we have a fixed size we have to pop sampleSize number of lines if specified //here popping, setting currentRow if (!m_prepared) { if (spreadsheet->readingType() == LiveDataSource::ReadingType::WholeFile) currentRow = 0; else currentRow = m_actualRows - qMin(newLinesTillEnd, m_actualRows); } else { if (readingType == LiveDataSource::ReadingType::TillEnd) { if (newLinesTillEnd > m_actualRows) { currentRow = 0; } else { if (spreadsheet->readingType() == LiveDataSource::ReadingType::WholeFile) currentRow = 0; else currentRow = m_actualRows - newLinesTillEnd; } } else { //we read max sample size number of lines when the reading mode //is ContinuouslyFixed or FromEnd currentRow = m_actualRows - qMin(spreadsheet->sampleSize(), newLinesTillEnd); } } if (m_prepared) { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportPopping: "); #endif // enable data change signal for (int col = 0; col < m_actualCols; ++col) spreadsheet->child(col)->setSuppressDataChangedSignal(false); for (int row = 0; row < linesToRead; ++row) { for (int col = 0; col < m_actualCols; ++col) { switch (columnModes[col]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(spreadsheet->child(col)->data()); vector->pop_front(); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(spreadsheet->child(col)->data()); vector->pop_front(); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(spreadsheet->child(col)->data()); vector->pop_front(); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(spreadsheet->child(col)->data()); vector->pop_front(); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } } } } // from the last row we read the new data in the spreadsheet DEBUG(" Reading from line " << currentRow << " till end line " << newLinesTillEnd); DEBUG(" Lines to read:" << linesToRead <<", actual rows:" << m_actualRows << ", actual cols:" << m_actualCols); newDataIdx = 0; if (readingType == LiveDataSource::ReadingType::FromEnd) { if (m_prepared) { if (newData.size() > spreadsheet->sampleSize()) newDataIdx = newData.size() - spreadsheet->sampleSize(); //since we skip a couple of lines, we need to count those bytes too for (int i = 0; i < newDataIdx; ++i) bytesread += newData.at(i).size(); } } DEBUG(" newDataIdx: " << newDataIdx); static int indexColumnIdx = 1; { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportFillingContainers: "); #endif int row = 0; if (readingType == LiveDataSource::ReadingType::TillEnd || (readingType == LiveDataSource::ReadingType::ContinuousFixed)) { if (headerEnabled) { if (!m_prepared) { row = 1; bytesread += newData.at(0).size(); } } } if (spreadsheet->sourceType() == LiveDataSource::SourceType::FileOrPipe) { if (readingType == LiveDataSource::ReadingType::WholeFile) { if (headerEnabled) { row = 1; bytesread += newData.at(0).size(); } } } QLocale locale(numberFormat); for (; row < linesToRead; ++row) { DEBUG("\n Reading row " << row + 1 << " of " << linesToRead); QString line; if (readingType == LiveDataSource::ReadingType::FromEnd) line = newData.at(newDataIdx++); else line = newData.at(row); //when we read the whole file we don't care about the previous position //so we don't have to count those bytes if (readingType != LiveDataSource::ReadingType::WholeFile) { if (spreadsheet->sourceType() == LiveDataSource::SourceType::FileOrPipe) { bytesread += line.size(); } } if (line.isEmpty() || (!commentCharacter.isEmpty() && line.startsWith(commentCharacter))) // skip empty or commented lines continue; QStringList lineStringList; // only FileOrPipe support multiple columns if (spreadsheet->sourceType() == LiveDataSource::SourceType::FileOrPipe) lineStringList = line.split(m_separator, (QString::SplitBehavior)skipEmptyParts); else lineStringList << line; QDEBUG(" line = " << lineStringList << ", separator = \'" << m_separator << "\'"); DEBUG(" Line bytes: " << line.size() << " line: " << line.toStdString()); if (simplifyWhitespacesEnabled) { for (int i = 0; i < lineStringList.size(); ++i) lineStringList[i] = lineStringList[i].simplified(); } if (createIndexEnabled) { if (spreadsheet->keepNValues() == 0) lineStringList.prepend(QString::number(currentRow + 1)); else lineStringList.prepend(QString::number(indexColumnIdx++)); } QDEBUG(" column modes = " << columnModes); for (int n = 0; n < m_actualCols; ++n) { DEBUG(" actual col = " << n); if (n < lineStringList.size()) { QString valueString = lineStringList.at(n); if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); DEBUG(" value string = " << valueString.toStdString()); // set value depending on data type switch (columnModes[n]) { case AbstractColumn::Numeric: { DEBUG(" Numeric"); bool isNumber; const double value = locale.toDouble(valueString, &isNumber); static_cast*>(m_dataContainer[n])->operator[](currentRow) = (isNumber ? value : nanValue); // qDebug() << "dataContainer[" << n << "] size:" << static_cast*>(m_dataContainer[n])->size(); break; } case AbstractColumn::Integer: { DEBUG(" Integer"); bool isNumber; const int value = locale.toInt(valueString, &isNumber); static_cast*>(m_dataContainer[n])->operator[](currentRow) = (isNumber ? value : 0); // qDebug() << "dataContainer[" << n << "] size:" << static_cast*>(m_dataContainer[n])->size(); break; } case AbstractColumn::DateTime: { const QDateTime valueDateTime = QDateTime::fromString(valueString, dateTimeFormat); static_cast*>(m_dataContainer[n])->operator[](currentRow) = valueDateTime.isValid() ? valueDateTime : QDateTime(); break; } case AbstractColumn::Text: static_cast*>(m_dataContainer[n])->operator[](currentRow) = valueString; break; case AbstractColumn::Month: //TODO break; case AbstractColumn::Day: //TODO break; } } else { DEBUG(" missing columns in this line"); switch (columnModes[n]) { case AbstractColumn::Numeric: static_cast*>(m_dataContainer[n])->operator[](currentRow) = nanValue; break; case AbstractColumn::Integer: static_cast*>(m_dataContainer[n])->operator[](currentRow) = 0; break; case AbstractColumn::DateTime: static_cast*>(m_dataContainer[n])->operator[](currentRow) = QDateTime(); break; case AbstractColumn::Text: static_cast*>(m_dataContainer[n])->operator[](currentRow).clear(); break; case AbstractColumn::Month: //TODO break; case AbstractColumn::Day: //TODO break; } } } currentRow++; } } if (m_prepared) { //notify all affected columns and plots about the changes PERFTRACE("AsciiLiveDataImport, notify affected columns and plots"); //determine the dependent plots QVector plots; for (int n = 0; n < m_actualCols; ++n) spreadsheet->column(n)->addUsedInPlots(plots); //suppress retransform in the dependent plots for (auto* plot : plots) plot->setSuppressDataChangedSignal(true); for (int n = 0; n < m_actualCols; ++n) spreadsheet->column(n)->setChanged(); //retransform the dependent plots for (auto* plot : plots) { plot->setSuppressDataChangedSignal(false); plot->dataChanged(); } } else m_prepared = true; DEBUG("AsciiFilterPrivate::readFromLiveDevice() DONE"); return bytesread; } /*! reads the content of device \c device to the data source \c dataSource. Uses the settings defined in the data source. */ void AsciiFilterPrivate::readDataFromDevice(QIODevice& device, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode, int lines) { DEBUG("AsciiFilterPrivate::readDataFromDevice(): dataSource = " << dataSource << ", mode = " << ENUM_TO_STRING(AbstractFileFilter, ImportMode, importMode) << ", lines = " << lines); if (!m_prepared) { const int deviceError = prepareDeviceToRead(device); if (deviceError != 0) { DEBUG("Device error = " << deviceError); return; } // matrix data has only one column mode if (dynamic_cast(dataSource)) { auto mode = columnModes[0]; //TODO: remove this when Matrix supports text type if (mode == AbstractColumn::Text) mode = AbstractColumn::Numeric; for (auto& c : columnModes) if (c != mode) c = mode; } m_columnOffset = dataSource->prepareImport(m_dataContainer, importMode, m_actualRows, m_actualCols, vectorNames, columnModes); m_prepared = true; } DEBUG("locale = " << QLocale::languageToString(numberFormat).toStdString()); QLocale locale(numberFormat); // Read the data int currentRow = 0; // indexes the position in the vector(column) if (lines == -1) lines = m_actualRows; //skip data lines, if required DEBUG(" Skipping " << m_actualStartRow << " lines"); for (int i = 0; i < m_actualStartRow; ++i) device.readLine(); DEBUG(" Reading " << qMin(lines, m_actualRows) << " lines, " << m_actualCols << " columns"); if (qMin(lines, m_actualRows) == 0 || m_actualCols == 0) return; QString line; QString valueString; //Don't put the definition QStringList lineStringList outside of the for-loop, //the compiler doesn't seem to optimize the destructor of QList well enough in this case. lines = qMin(lines, m_actualRows); int progressIndex = 0; const float progressInterval = 0.01*lines; //update on every 1% only for (int i = 0; i < lines; ++i) { line = device.readLine(); // remove any newline line.remove(QLatin1Char('\n')); line.remove(QLatin1Char('\r')); if (removeQuotesEnabled) line.remove(QLatin1Char('"')); if (line.isEmpty() || (!commentCharacter.isEmpty() && line.startsWith(commentCharacter))) // skip empty or commented lines continue; QStringList lineStringList = line.split(m_separator, (QString::SplitBehavior)skipEmptyParts); // DEBUG(" Line bytes: " << line.size() << " line: " << line.toStdString()); if (simplifyWhitespacesEnabled) { for (int i = 0; i < lineStringList.size(); ++i) lineStringList[i] = lineStringList[i].simplified(); } // remove left white spaces if (skipEmptyParts) { for (int n = 0; n < lineStringList.size(); ++n) { valueString = lineStringList.at(n); if (!QString::compare(valueString, " ")) { lineStringList.removeAt(n); n--; } } } for (int n = 0; n < m_actualCols; ++n) { // index column if required if (n == 0 && createIndexEnabled) { static_cast*>(m_dataContainer[0])->operator[](currentRow) = i + 1; continue; } //column counting starts with 1, subtract 1 as well as another 1 for the index column if required int col = createIndexEnabled ? n + startColumn - 2: n + startColumn - 1; if (col < lineStringList.size()) { valueString = lineStringList.at(col); // set value depending on data type switch (columnModes.at(n)) { case AbstractColumn::Numeric: { bool isNumber; const double value = locale.toDouble(valueString, &isNumber); static_cast*>(m_dataContainer[n])->operator[](currentRow) = (isNumber ? value : nanValue); break; } case AbstractColumn::Integer: { bool isNumber; const int value = locale.toInt(valueString, &isNumber); static_cast*>(m_dataContainer[n])->operator[](currentRow) = (isNumber ? value : 0); break; } case AbstractColumn::DateTime: { const QDateTime valueDateTime = QDateTime::fromString(valueString, dateTimeFormat); static_cast*>(m_dataContainer[n])->operator[](currentRow) = valueDateTime.isValid() ? valueDateTime : QDateTime(); break; } case AbstractColumn::Text: { auto* colData = static_cast*>(m_dataContainer[n]); colData->operator[](currentRow) = valueString; break; } case AbstractColumn::Month: // never happens case AbstractColumn::Day: break; } } else { // missing columns in this line switch (columnModes.at(n)) { case AbstractColumn::Numeric: static_cast*>(m_dataContainer[n])->operator[](currentRow) = nanValue; break; case AbstractColumn::Integer: static_cast*>(m_dataContainer[n])->operator[](currentRow) = 0; break; case AbstractColumn::DateTime: static_cast*>(m_dataContainer[n])->operator[](currentRow) = QDateTime(); break; case AbstractColumn::Text: static_cast*>(m_dataContainer[n])->operator[](currentRow).clear(); break; case AbstractColumn::Month: // never happens case AbstractColumn::Day: break; } } } currentRow++; //ask to update the progress bar only if we have more than 1000 lines //only in 1% steps progressIndex++; if (lines > 1000 && progressIndex > progressInterval) { emit q->completed(100 * currentRow/lines); progressIndex = 0; QApplication::processEvents(QEventLoop::AllEvents, 0); } } DEBUG(" Read " << currentRow << " lines"); //we might have skipped empty lines above. shrink the spreadsheet if the number of read lines (=currentRow) //is smaller than the initial size of the spreadsheet (=m_actualRows). //TODO: should also be relevant for Matrix auto* s = dynamic_cast(dataSource); if (s && currentRow != m_actualRows && importMode == AbstractFileFilter::Replace) s->setRowCount(currentRow); dataSource->finalizeImport(m_columnOffset, startColumn, startColumn + m_actualCols - 1, dateTimeFormat, importMode); } /*! * preview for special devices (local/UDP/TCP socket or serial port) */ QVector AsciiFilterPrivate::preview(QIODevice &device) { DEBUG("AsciiFilterPrivate::preview(): bytesAvailable = " << device.bytesAvailable() << ", isSequential = " << device.isSequential()); QVector dataStrings; if (!(device.bytesAvailable() > 0)) { DEBUG("No new data available"); return dataStrings; } if (device.isSequential() && device.bytesAvailable() < (int)sizeof(quint16)) return dataStrings; #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportTotal: "); #endif int linesToRead = 0; QVector newData; //TODO: serial port "read(nBytes)"? while (!device.atEnd()) { if (device.canReadLine()) newData.push_back(device.readLine()); else // UDP fails otherwise newData.push_back(device.readAll()); linesToRead++; } QDEBUG(" data = " << newData); if (linesToRead == 0) return dataStrings; int col = 0; int colMax = newData.at(0).size(); if (createIndexEnabled) colMax++; columnModes.resize(colMax); if (createIndexEnabled) { columnModes[0] = AbstractColumn::ColumnMode::Integer; col = 1; vectorNames.prepend(i18n("Index")); } vectorNames.append(i18n("Value")); QDEBUG(" vector names = " << vectorNames); for (const auto& valueString : newData.at(0).split(' ', QString::SkipEmptyParts)) { if (col == colMax) break; columnModes[col++] = AbstractFileFilter::columnMode(valueString, dateTimeFormat, numberFormat); } QString line; QLocale locale(numberFormat); QStringList lineString; for (int i = 0; i < linesToRead; ++i) { line = newData.at(i); // remove any newline line = line.remove('\n'); line = line.remove('\r'); if (simplifyWhitespacesEnabled) line = line.simplified(); if (line.isEmpty() || (!commentCharacter.isEmpty() && line.startsWith(commentCharacter))) // skip empty or commented lines continue; QStringList lineStringList = line.split(' ', QString::SkipEmptyParts); if (createIndexEnabled) lineStringList.prepend(QString::number(i + 1)); for (int n = 0; n < lineStringList.size(); ++n) { if (n < lineStringList.size()) { QString valueString = lineStringList.at(n); if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); switch (columnModes[n]) { case AbstractColumn::Numeric: { bool isNumber; const double value = locale.toDouble(valueString, &isNumber); lineString += QString::number(isNumber ? value : nanValue, 'g', 16); break; } case AbstractColumn::Integer: { bool isNumber; const int value = locale.toInt(valueString, &isNumber); lineString += QString::number(isNumber ? value : 0); break; } case AbstractColumn::DateTime: { const QDateTime valueDateTime = QDateTime::fromString(valueString, dateTimeFormat); lineString += valueDateTime.isValid() ? valueDateTime.toString(dateTimeFormat) : QLatin1String(" "); break; } case AbstractColumn::Text: lineString += valueString; break; case AbstractColumn::Month: // never happens case AbstractColumn::Day: break; } } else // missing columns in this line lineString += QString(); } dataStrings << lineString; } return dataStrings; } /*! * generates the preview for the file \c fileName reading the provided number of \c lines. */ QVector AsciiFilterPrivate::preview(const QString& fileName, int lines) { QVector dataStrings; //dirty hack: set readingFile and readingFileName in order to know in lineNumber(QIODevice) //that we're reading from a file and to benefit from much faster wc on linux //TODO: redesign the APIs and remove this later readingFile = true; readingFileName = fileName; KFilterDev device(fileName); const int deviceError = prepareDeviceToRead(device); readingFile = false; if (deviceError != 0) { DEBUG("Device error = " << deviceError); return dataStrings; } //number formatting DEBUG("locale = " << QLocale::languageToString(numberFormat).toStdString()); QLocale locale(numberFormat); // Read the data if (lines == -1) lines = m_actualRows; // set column names for preview if (!headerEnabled) { int start = 0; if (createIndexEnabled) start = 1; for (int i = start; i < m_actualCols; i++) vectorNames << "Column " + QString::number(i + 1); } QDEBUG(" column names = " << vectorNames); //skip data lines, if required DEBUG(" Skipping " << m_actualStartRow << " lines"); for (int i = 0; i < m_actualStartRow; ++i) device.readLine(); DEBUG(" Generating preview for " << qMin(lines, m_actualRows) << " lines"); QString line; for (int i = 0; i < qMin(lines, m_actualRows); ++i) { line = device.readLine(); // remove any newline line = line.remove('\n'); line = line.remove('\r'); if (line.isEmpty() || (!commentCharacter.isEmpty() && line.startsWith(commentCharacter))) // skip empty or commented lines continue; QStringList lineStringList = line.split(m_separator, (QString::SplitBehavior)skipEmptyParts); QDEBUG(" line = " << lineStringList); DEBUG(" Line bytes: " << line.size() << " line: " << line.toStdString()); if (simplifyWhitespacesEnabled) { for (int i = 0; i < lineStringList.size(); ++i) lineStringList[i] = lineStringList[i].simplified(); } QStringList lineString; for (int n = 0; n < m_actualCols; ++n) { // index column if required if (n == 0 && createIndexEnabled) { lineString += QString::number(i + 1); continue; } //column counting starts with 1, subtract 1 as well as another 1 for the index column if required int col = createIndexEnabled ? n + startColumn - 2: n + startColumn - 1; if (col < lineStringList.size()) { QString valueString = lineStringList.at(col); if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); //DEBUG(" valueString = " << valueString.toStdString()); if (skipEmptyParts && !QString::compare(valueString, " ")) // handle left white spaces continue; // set value depending on data type switch (columnModes[n]) { case AbstractColumn::Numeric: { bool isNumber; const double value = locale.toDouble(valueString, &isNumber); lineString += QString::number(isNumber ? value : nanValue, 'g', 15); break; } case AbstractColumn::Integer: { bool isNumber; const int value = locale.toInt(valueString, &isNumber); lineString += QString::number(isNumber ? value : 0); break; } case AbstractColumn::DateTime: { const QDateTime valueDateTime = QDateTime::fromString(valueString, dateTimeFormat); lineString += valueDateTime.isValid() ? valueDateTime.toString(dateTimeFormat) : QLatin1String(" "); break; } case AbstractColumn::Text: lineString += valueString; break; case AbstractColumn::Month: // never happens case AbstractColumn::Day: break; } } else // missing columns in this line lineString += QString(); } dataStrings << lineString; } return dataStrings; } /*! writes the content of \c dataSource to the file \c fileName. */ void AsciiFilterPrivate::write(const QString & fileName, AbstractDataSource* dataSource) { Q_UNUSED(fileName); Q_UNUSED(dataSource); //TODO: save data to ascii file } //############################################################################## //################## Serialization/Deserialization ########################### //############################################################################## /*! Saves as XML. */ void AsciiFilter::save(QXmlStreamWriter* writer) const { writer->writeStartElement( "asciiFilter"); writer->writeAttribute( "commentCharacter", d->commentCharacter); writer->writeAttribute( "separatingCharacter", d->separatingCharacter); writer->writeAttribute( "autoMode", QString::number(d->autoModeEnabled)); writer->writeAttribute( "createIndex", QString::number(d->createIndexEnabled)); writer->writeAttribute( "createTimestamp", QString::number(d->createTimestampEnabled)); writer->writeAttribute( "header", QString::number(d->headerEnabled)); writer->writeAttribute( "vectorNames", d->vectorNames.join(' ')); writer->writeAttribute( "skipEmptyParts", QString::number(d->skipEmptyParts)); writer->writeAttribute( "simplifyWhitespaces", QString::number(d->simplifyWhitespacesEnabled)); writer->writeAttribute( "nanValue", QString::number(d->nanValue)); writer->writeAttribute( "removeQuotes", QString::number(d->removeQuotesEnabled)); writer->writeAttribute( "startRow", QString::number(d->startRow)); writer->writeAttribute( "endRow", QString::number(d->endRow)); writer->writeAttribute( "startColumn", QString::number(d->startColumn)); writer->writeAttribute( "endColumn", QString::number(d->endColumn)); writer->writeEndElement(); } /*! Loads from XML. */ bool AsciiFilter::load(XmlStreamReader* reader) { KLocalizedString attributeWarning = ki18n("Attribute '%1' missing or empty, default value is used"); QXmlStreamAttributes attribs = reader->attributes(); QString str; READ_STRING_VALUE("commentCharacter", commentCharacter); READ_STRING_VALUE("separatingCharacter", separatingCharacter); READ_INT_VALUE("createIndex", createIndexEnabled, bool); READ_INT_VALUE("createTimestamp", createTimestampEnabled, bool); READ_INT_VALUE("autoMode", autoModeEnabled, bool); READ_INT_VALUE("header", headerEnabled, bool); str = attribs.value("vectorNames").toString(); d->vectorNames = str.split(' '); //may be empty READ_INT_VALUE("simplifyWhitespaces", simplifyWhitespacesEnabled, bool); READ_DOUBLE_VALUE("nanValue", nanValue); READ_INT_VALUE("removeQuotes", removeQuotesEnabled, bool); READ_INT_VALUE("skipEmptyParts", skipEmptyParts, bool); READ_INT_VALUE("startRow", startRow, int); READ_INT_VALUE("endRow", endRow, int); READ_INT_VALUE("startColumn", startColumn, int); READ_INT_VALUE("endColumn", endColumn, int); return true; } int AsciiFilterPrivate::isPrepared() { return m_prepared; } #ifdef HAVE_MQTT int AsciiFilterPrivate::prepareToRead(const QString& message) { QStringList lines = message.split('\n'); if (lines.isEmpty()) return 1; // Parse the first line: // Determine the number of columns, create the columns and use (if selected) the first row to name them QString firstLine = lines.at(0); if (simplifyWhitespacesEnabled) firstLine = firstLine.simplified(); DEBUG("First line: \'" << firstLine.toStdString() << '\''); // determine separator and split first line QStringList firstLineStringList; if (separatingCharacter == "auto") { DEBUG("automatic separator"); QRegExp regExp("(\\s+)|(,\\s+)|(;\\s+)|(:\\s+)"); firstLineStringList = firstLine.split(regExp, (QString::SplitBehavior)skipEmptyParts); } else { // use given separator // replace symbolic "TAB" with '\t' m_separator = separatingCharacter.replace(QLatin1String("2xTAB"), "\t\t", Qt::CaseInsensitive); m_separator = separatingCharacter.replace(QLatin1String("TAB"), "\t", Qt::CaseInsensitive); // replace symbolic "SPACE" with ' ' m_separator = m_separator.replace(QLatin1String("2xSPACE"), QLatin1String(" "), Qt::CaseInsensitive); m_separator = m_separator.replace(QLatin1String("3xSPACE"), QLatin1String(" "), Qt::CaseInsensitive); m_separator = m_separator.replace(QLatin1String("4xSPACE"), QLatin1String(" "), Qt::CaseInsensitive); m_separator = m_separator.replace(QLatin1String("SPACE"), QLatin1String(" "), Qt::CaseInsensitive); firstLineStringList = firstLine.split(m_separator, (QString::SplitBehavior)skipEmptyParts); } DEBUG("separator: \'" << m_separator.toStdString() << '\''); DEBUG("number of columns: " << firstLineStringList.size()); QDEBUG("first line: " << firstLineStringList); //all columns are read plus the optional column for the index and for the timestamp m_actualCols = firstLineStringList.size() + int(createIndexEnabled) + int(createTimestampEnabled); //column names: //when reading the message strings for different topics, it's not possible to specify vector names //since the different topics can have different content and different number of columns/vectors //->we always set the vector names here to fixed values vectorNames.clear(); columnModes.clear(); //add index column if (createIndexEnabled) { vectorNames << i18n("index"); columnModes << AbstractColumn::Integer; } //add timestamp column if (createTimestampEnabled) { vectorNames << i18n("timestamp"); columnModes << AbstractColumn::DateTime; } //parse the first data line to determine data type for each column int i = 1; for (auto& valueString : firstLineStringList) { if (simplifyWhitespacesEnabled) valueString = valueString.simplified(); if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); vectorNames << i18n("value %1", i); columnModes << AbstractFileFilter::columnMode(valueString, dateTimeFormat, numberFormat); ++i; } m_actualStartRow = startRow; m_actualRows = lines.size(); QDEBUG("column modes = " << columnModes); DEBUG("actual cols/rows (w/o header): " << m_actualCols << ' ' << m_actualRows); return 0; } /*! * generates the preview for the string \s message. */ QVector AsciiFilterPrivate::preview(const QString& message) { QVector dataStrings; prepareToRead(message); //number formatting DEBUG("locale = " << QLocale::languageToString(numberFormat).toStdString()); QLocale locale(numberFormat); // Read the data QStringList lines = message.split('\n'); int i = 0; for (auto line : lines) { if (simplifyWhitespacesEnabled) line = line.simplified(); if (line.isEmpty() || (!commentCharacter.isEmpty() && line.startsWith(commentCharacter))) // skip empty or commented lines continue; const QStringList& lineStringList = line.split(m_separator, (QString::SplitBehavior)skipEmptyParts); QDEBUG(" line = " << lineStringList); QStringList lineString; // index column if required if (createIndexEnabled) lineString += QString::number(i + 1); // timestamp column if required if (createTimestampEnabled) lineString += QDateTime::currentDateTime().toString(); int offset = int(createIndexEnabled) + int(createTimestampEnabled); for (int n = 0; n < m_actualCols - offset; ++n) { if (n < lineStringList.size()) { QString valueString = lineStringList.at(n); //DEBUG(" valueString = " << valueString.toStdString()); if (skipEmptyParts && !QString::compare(valueString, " ")) // handle left white spaces continue; // set value depending on data type switch (columnModes[n+offset]) { case AbstractColumn::Numeric: { bool isNumber; const double value = locale.toDouble(valueString, &isNumber); lineString += QString::number(isNumber ? value : nanValue, 'g', 15); break; } case AbstractColumn::Integer: { bool isNumber; const int value = locale.toInt(valueString, &isNumber); lineString += QString::number(isNumber ? value : 0); break; } case AbstractColumn::DateTime: { const QDateTime valueDateTime = QDateTime::fromString(valueString, dateTimeFormat); lineString += valueDateTime.isValid() ? valueDateTime.toString(dateTimeFormat) : QLatin1String(" "); break; } case AbstractColumn::Text: if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); lineString += valueString; break; case AbstractColumn::Month: // never happens case AbstractColumn::Day: break; } } else // missing columns in this line lineString += QString(); } ++i; dataStrings << lineString; } return dataStrings; } /*! * \brief Returns the statistical data that is needed by the topic for its MQTTClient's will message * \param topic */ QString AsciiFilterPrivate::MQTTColumnStatistics(const MQTTTopic* topic) const { Column* const tempColumn = topic->child(m_actualCols - 1); QString statistics; QVector willStatistics = topic->mqttClient()->willStatistics(); //Add every statistical data to the string, the flag of which is set true for (int i = 0; i <= willStatistics.size(); i++) { if (willStatistics[i]) { switch (static_cast(i) ) { case MQTTClient::WillStatisticsType::ArithmeticMean: statistics += QLatin1String("Arithmetic mean: ") + QString::number(tempColumn->statistics().arithmeticMean) + "\n"; break; case MQTTClient::WillStatisticsType::ContraharmonicMean: statistics += QLatin1String("Contraharmonic mean: ") + QString::number(tempColumn->statistics().contraharmonicMean) + "\n"; break; case MQTTClient::WillStatisticsType::Entropy: statistics += QLatin1String("Entropy: ") + QString::number(tempColumn->statistics().entropy) + "\n"; break; case MQTTClient::WillStatisticsType::GeometricMean: statistics += QLatin1String("Geometric mean: ") + QString::number(tempColumn->statistics().geometricMean) + "\n"; break; case MQTTClient::WillStatisticsType::HarmonicMean: statistics += QLatin1String("Harmonic mean: ") + QString::number(tempColumn->statistics().harmonicMean) + "\n"; break; case MQTTClient::WillStatisticsType::Kurtosis: statistics += QLatin1String("Kurtosis: ") + QString::number(tempColumn->statistics().kurtosis) + "\n"; break; case MQTTClient::WillStatisticsType::Maximum: statistics += QLatin1String("Maximum: ") + QString::number(tempColumn->statistics().maximum) + "\n"; break; case MQTTClient::WillStatisticsType::MeanDeviation: statistics += QLatin1String("Mean deviation: ") + QString::number(tempColumn->statistics().meanDeviation) + "\n"; break; case MQTTClient::WillStatisticsType::MeanDeviationAroundMedian: statistics += QLatin1String("Mean deviation around median: ") + QString::number(tempColumn->statistics().meanDeviationAroundMedian) + "\n"; break; case MQTTClient::WillStatisticsType::Median: statistics += QLatin1String("Median: ") + QString::number(tempColumn->statistics().median) + "\n"; break; case MQTTClient::WillStatisticsType::MedianDeviation: statistics += QLatin1String("Median deviation: ") + QString::number(tempColumn->statistics().medianDeviation) + "\n"; break; case MQTTClient::WillStatisticsType::Minimum: statistics += QLatin1String("Minimum: ") + QString::number(tempColumn->statistics().minimum) + "\n"; break; case MQTTClient::WillStatisticsType::Skewness: statistics += QLatin1String("Skewness: ") + QString::number(tempColumn->statistics().skewness) + "\n"; break; case MQTTClient::WillStatisticsType::StandardDeviation: statistics += QLatin1String("Standard deviation: ") + QString::number(tempColumn->statistics().standardDeviation) + "\n"; break; case MQTTClient::WillStatisticsType::Variance: statistics += QLatin1String("Variance: ") + QString::number(tempColumn->statistics().variance) + "\n"; break; case MQTTClient::WillStatisticsType::NoStatistics: default: break; } } } return statistics; } AbstractColumn::ColumnMode AsciiFilterPrivate::MQTTColumnMode() const { return columnModes[m_actualCols - 1]; } /*! * \brief reads the content of a message received by the topic. * Uses the settings defined in the MQTTTopic's MQTTClient * \param message * \param topic * \param dataSource */ void AsciiFilterPrivate::readMQTTTopic(const QString& message, AbstractDataSource* dataSource) { //If the message is empty, there is nothing to do if (message.isEmpty()) { DEBUG("No new data available"); return; } MQTTTopic* spreadsheet = dynamic_cast(dataSource); const int keepNValues = spreadsheet->mqttClient()->keepNValues(); if (!m_prepared) { qDebug()<<"Start preparing filter for: " << spreadsheet->topicName(); //Prepare the filter const int mqttPrepareError = prepareToRead(message); if (mqttPrepareError != 0) { DEBUG("Mqtt Prepare Error = " << mqttPrepareError); qDebug()<setUndoAware(false); spreadsheet->resize(AbstractFileFilter::Replace, vectorNames, m_actualCols); qDebug() << "fds resized to col: " << m_actualCols; qDebug() << "fds rowCount: " << spreadsheet->rowCount(); //columns in a MQTTTopic don't have any manual changes. //make the available columns undo unaware and suppress the "data changed" signal. //data changes will be propagated via an explicit Column::setChanged() call once new data was read. for (int i = 0; i < spreadsheet->childCount(); i++) { spreadsheet->child(i)->setUndoAware(false); spreadsheet->child(i)->setSuppressDataChangedSignal(true); } if (keepNValues == 0) spreadsheet->setRowCount(m_actualRows > 1 ? m_actualRows : 1); else { spreadsheet->setRowCount(spreadsheet->mqttClient()->keepNValues()); m_actualRows = spreadsheet->mqttClient()->keepNValues(); } m_dataContainer.resize(m_actualCols); for (int n = 0; n < m_actualCols; ++n) { // data() returns a void* which is a pointer to any data type (see ColumnPrivate.cpp) spreadsheet->child(n)->setColumnMode(columnModes[n]); switch (columnModes[n]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } } #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportTotal: "); #endif MQTTClient::ReadingType readingType; if (!m_prepared) { //if filter is not prepared we read till the end readingType = MQTTClient::ReadingType::TillEnd; } else { //we have to read all the data when reading from end //so we set readingType to TillEnd if (static_cast (spreadsheet->mqttClient()->readingType()) == MQTTClient::ReadingType::FromEnd) readingType = MQTTClient::ReadingType::TillEnd; else readingType = spreadsheet->mqttClient()->readingType(); } //count the new lines, increase actualrows on each //now we read all the new lines, if we want to use sample rate //then here we can do it, if we have actually sample rate number of lines :-? int newLinesForSampleSizeNotTillEnd = 0; int newLinesTillEnd = 0; QVector newData; if (readingType != MQTTClient::ReadingType::TillEnd) { newData.reserve(spreadsheet->mqttClient()->sampleSize()); newData.resize(spreadsheet->mqttClient()->sampleSize()); } int newDataIdx = 0; //TODO: bool sampleSizeReached = false; { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportReadingFromFile: "); #endif QStringList newDataList = message.split(QRegExp("\n|\r\n|\r"), QString::SkipEmptyParts); for (auto& line : newDataList) { newData.push_back(line); newLinesTillEnd++; if (readingType != MQTTClient::ReadingType::TillEnd) { newLinesForSampleSizeNotTillEnd++; //for Continuous reading and FromEnd we read sample rate number of lines if possible if (newLinesForSampleSizeNotTillEnd == spreadsheet->mqttClient()->sampleSize()) { //TODO: sampleSizeReached = true; break; } } } } qDebug()<<"Processing message done"; //now we reset the readingType if (spreadsheet->mqttClient()->readingType() == MQTTClient::ReadingType::FromEnd) readingType = static_cast(spreadsheet->mqttClient()->readingType()); //we had less new lines than the sample rate specified if (readingType != MQTTClient::ReadingType::TillEnd) qDebug() << "Removed empty lines: " << newData.removeAll(QString()); const int spreadsheetRowCountBeforeResize = spreadsheet->rowCount(); if (m_prepared ) { if (keepNValues == 0) m_actualRows = spreadsheetRowCountBeforeResize; else { //if the keepNValues changed since the last read we have to manage the columns accordingly if (m_actualRows != spreadsheet->mqttClient()->keepNValues()) { if (m_actualRows < spreadsheet->mqttClient()->keepNValues()) { spreadsheet->setRowCount(spreadsheet->mqttClient()->keepNValues()); qDebug()<<"rowcount set to: " << spreadsheet->mqttClient()->keepNValues(); } //Calculate the difference between the old and new keepNValues int rowDiff = 0; if (m_actualRows > spreadsheet->mqttClient()->keepNValues()) rowDiff = m_actualRows - spreadsheet->mqttClient()->keepNValues(); if (m_actualRows < spreadsheet->mqttClient()->keepNValues()) rowDiff = spreadsheet->mqttClient()->keepNValues() - m_actualRows; for (int n = 0; n < columnModes.size(); ++n) { // data() returns a void* which is a pointer to any data type (see ColumnPrivate.cpp) switch (columnModes[n]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); m_dataContainer[n] = static_cast(vector); //if the keepNValues got smaller then we move the last keepNValues count of data //in the first keepNValues places if (m_actualRows > spreadsheet->mqttClient()->keepNValues()) { for (int i = 0; i < spreadsheet->mqttClient()->keepNValues(); i++) { static_cast*>(m_dataContainer[n])->operator[] (i) = static_cast*>(m_dataContainer[n])->operator[](m_actualRows - spreadsheet->mqttClient()->keepNValues() + i); } } //if the keepNValues got bigger we move the existing values to the last m_actualRows positions //then fill the remaining lines with NaN if (m_actualRows < spreadsheet->mqttClient()->keepNValues()) { vector->reserve( spreadsheet->mqttClient()->keepNValues()); vector->resize( spreadsheet->mqttClient()->keepNValues()); for (int i = 1; i <= m_actualRows; i++) { static_cast*>(m_dataContainer[n])->operator[] (spreadsheet->mqttClient()->keepNValues() - i) = static_cast*>(m_dataContainer[n])->operator[](spreadsheet->mqttClient()->keepNValues() - i - rowDiff); } for (int i = 0; i < rowDiff; i++) static_cast*>(m_dataContainer[n])->operator[](i) = nanValue; } break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); m_dataContainer[n] = static_cast(vector); //if the keepNValues got smaller then we move the last keepNValues count of data //in the first keepNValues places if (m_actualRows > spreadsheet->mqttClient()->keepNValues()) { for (int i = 0; i < spreadsheet->mqttClient()->keepNValues(); i++) { static_cast*>(m_dataContainer[n])->operator[] (i) = static_cast*>(m_dataContainer[n])->operator[](m_actualRows - spreadsheet->mqttClient()->keepNValues() + i); } } //if the keepNValues got bigger we move the existing values to the last m_actualRows positions //then fill the remaining lines with 0 if (m_actualRows < spreadsheet->mqttClient()->keepNValues()) { vector->reserve( spreadsheet->mqttClient()->keepNValues()); vector->resize( spreadsheet->mqttClient()->keepNValues()); for (int i = 1; i <= m_actualRows; i++) { static_cast*>(m_dataContainer[n])->operator[] (spreadsheet->mqttClient()->keepNValues() - i) = static_cast*>(m_dataContainer[n])->operator[](spreadsheet->mqttClient()->keepNValues() - i - rowDiff); } for (int i = 0; i < rowDiff; i++) static_cast*>(m_dataContainer[n])->operator[](i) = 0; } break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(spreadsheet->child(n)->data()); m_dataContainer[n] = static_cast(vector); //if the keepNValues got smaller then we move the last keepNValues count of data //in the first keepNValues places if (m_actualRows > spreadsheet->mqttClient()->keepNValues()) { for (int i = 0; i < spreadsheet->mqttClient()->keepNValues(); i++) { static_cast*>(m_dataContainer[n])->operator[] (i) = static_cast*>(m_dataContainer[n])->operator[](m_actualRows - spreadsheet->mqttClient()->keepNValues() + i); } } //if the keepNValues got bigger we move the existing values to the last m_actualRows positions //then fill the remaining lines with empty lines if (m_actualRows < spreadsheet->mqttClient()->keepNValues()) { vector->reserve( spreadsheet->mqttClient()->keepNValues()); vector->resize( spreadsheet->mqttClient()->keepNValues()); for (int i = 1; i <= m_actualRows; i++) { static_cast*>(m_dataContainer[n])->operator[] (spreadsheet->mqttClient()->keepNValues() - i) = static_cast*>(m_dataContainer[n])->operator[](spreadsheet->mqttClient()->keepNValues() - i - rowDiff); } for (int i = 0; i < rowDiff; i++) static_cast*>(m_dataContainer[n])->operator[](i).clear(); } break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); m_dataContainer[n] = static_cast(vector); //if the keepNValues got smaller then we move the last keepNValues count of data //in the first keepNValues places if (m_actualRows > spreadsheet->mqttClient()->keepNValues()) { for (int i = 0; i < spreadsheet->mqttClient()->keepNValues(); i++) { static_cast*>(m_dataContainer[n])->operator[] (i) = static_cast*>(m_dataContainer[n])->operator[](m_actualRows - spreadsheet->mqttClient()->keepNValues() + i); } } //if the keepNValues got bigger we move the existing values to the last m_actualRows positions //then fill the remaining lines with null datetime if (m_actualRows < spreadsheet->mqttClient()->keepNValues()) { vector->reserve( spreadsheet->mqttClient()->keepNValues()); vector->resize( spreadsheet->mqttClient()->keepNValues()); for (int i = 1; i <= m_actualRows; i++) { static_cast*>(m_dataContainer[n])->operator[] (spreadsheet->mqttClient()->keepNValues() - i) = static_cast*>(m_dataContainer[n])->operator[](spreadsheet->mqttClient()->keepNValues() - i - rowDiff); } for (int i = 0; i < rowDiff; i++) static_cast*>(m_dataContainer[n])->operator[](i) = QDateTime(); } break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } //if the keepNValues got smaller resize the spreadsheet if (m_actualRows > spreadsheet->mqttClient()->keepNValues()) spreadsheet->setRowCount(spreadsheet->mqttClient()->keepNValues()); //set the new row count m_actualRows = spreadsheet->mqttClient()->keepNValues(); qDebug()<<"actual rows: "<mqttClient()->sampleSize()); else { m_actualRows += newData.size(); } } //fixed size if (keepNValues != 0) { if (readingType == MQTTClient::ReadingType::TillEnd) { //we had more lines than the fixed size, so we read m_actualRows number of lines if (newLinesTillEnd > m_actualRows) { linesToRead = m_actualRows; } else linesToRead = newLinesTillEnd; } else { //we read max sample size number of lines when the reading mode //is ContinuouslyFixed or FromEnd if (spreadsheet->mqttClient()->sampleSize() <= spreadsheet->mqttClient()->keepNValues()) linesToRead = qMin(spreadsheet->mqttClient()->sampleSize(), newLinesTillEnd); else linesToRead = qMin(spreadsheet->mqttClient()->keepNValues(), newLinesTillEnd); } } else linesToRead = m_actualRows - spreadsheetRowCountBeforeResize; if (linesToRead == 0) return; } else { if (keepNValues != 0) linesToRead = newLinesTillEnd > m_actualRows ? m_actualRows : newLinesTillEnd; else linesToRead = newLinesTillEnd; } qDebug()<<"linestoread = " << linesToRead; //new rows/resize columns if we don't have a fixed size if (keepNValues == 0) { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportResizing: "); #endif if (spreadsheet->rowCount() < m_actualRows) spreadsheet->setRowCount(m_actualRows); if (!m_prepared) currentRow = 0; else { // indexes the position in the vector(column) currentRow = spreadsheetRowCountBeforeResize; } // if we have fixed size, we do this only once in preparation, here we can use // m_prepared and we need something to decide whether it has a fixed size or increasing for (int n = 0; n < m_actualCols; ++n) { // data() returns a void* which is a pointer to any data type (see ColumnPrivate.cpp) switch (columnModes[n]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(spreadsheet->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } } else { //when we have a fixed size we have to pop sampleSize number of lines if specified //here popping, setting currentRow if (!m_prepared) currentRow = m_actualRows - qMin(newLinesTillEnd, m_actualRows); else { if (readingType == MQTTClient::ReadingType::TillEnd) { if (newLinesTillEnd > m_actualRows) currentRow = 0; else currentRow = m_actualRows - newLinesTillEnd; } else { //we read max sample rate number of lines when the reading mode //is ContinuouslyFixed or FromEnd currentRow = m_actualRows - linesToRead; } } if (m_prepared) { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportPopping: "); #endif for (int row = 0; row < linesToRead; ++row) { for (int col = 0; col < m_actualCols; ++col) { switch (columnModes[col]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(spreadsheet->child(col)->data()); vector->pop_front(); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(spreadsheet->child(col)->data()); vector->pop_front(); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(spreadsheet->child(col)->data()); vector->pop_front(); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(spreadsheet->child(col)->data()); vector->pop_front(); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[col] = static_cast(vector); break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } } } } // from the last row we read the new data in the spreadsheet qDebug() << "reading from line: " << currentRow << " lines till end: " << newLinesTillEnd; qDebug() << "Lines to read: " << linesToRead <<" actual rows: " << m_actualRows; newDataIdx = 0; //From end means that we read the last sample size amount of data if (readingType == MQTTClient::ReadingType::FromEnd) { if (m_prepared) { if (newData.size() > spreadsheet->mqttClient()->sampleSize()) newDataIdx = newData.size() - spreadsheet->mqttClient()->sampleSize(); } } qDebug() << "newDataIdx: " << newDataIdx; //read the data static int indexColumnIdx = 0; { #ifdef PERFTRACE_LIVE_IMPORT PERFTRACE("AsciiLiveDataImportFillingContainers: "); #endif int row = 0; QLocale locale(numberFormat); for (; row < linesToRead; ++row) { QString line; if (readingType == MQTTClient::ReadingType::FromEnd) line = newData.at(newDataIdx++); else line = newData.at(row); if (simplifyWhitespacesEnabled) line = line.simplified(); if (line.isEmpty() || (!commentCharacter.isEmpty() && line.startsWith(commentCharacter))) continue; //add index if required int offset = 0; if (createIndexEnabled) { int index = (keepNValues != 0) ? indexColumnIdx++ : currentRow; static_cast*>(m_dataContainer[0])->operator[](currentRow) = index; ++offset; } //add current timestamp if required if (createTimestampEnabled) { static_cast*>(m_dataContainer[offset])->operator[](currentRow) = QDateTime::currentDateTime(); ++offset; } //parse the columns QStringList lineStringList = line.split(m_separator, (QString::SplitBehavior)skipEmptyParts); qDebug()<<"########################################################################"; qDebug()<*>(m_dataContainer[col])->operator[](currentRow) = (isNumber ? value : nanValue); break; } case AbstractColumn::Integer: { bool isNumber; const int value = locale.toInt(valueString, &isNumber); static_cast*>(m_dataContainer[col])->operator[](currentRow) = (isNumber ? value : 0); break; } case AbstractColumn::DateTime: { const QDateTime valueDateTime = QDateTime::fromString(valueString, dateTimeFormat); static_cast*>(m_dataContainer[col])->operator[](currentRow) = valueDateTime.isValid() ? valueDateTime : QDateTime(); break; } case AbstractColumn::Text: if (removeQuotesEnabled) valueString.remove(QLatin1Char('"')); static_cast*>(m_dataContainer[col])->operator[](currentRow) = valueString; break; case AbstractColumn::Month: //TODO break; case AbstractColumn::Day: //TODO break; } } else { DEBUG(" missing columns in this line"); switch (columnModes[n]) { case AbstractColumn::Numeric: static_cast*>(m_dataContainer[col])->operator[](currentRow) = nanValue; break; case AbstractColumn::Integer: static_cast*>(m_dataContainer[col])->operator[](currentRow) = 0; break; case AbstractColumn::DateTime: static_cast*>(m_dataContainer[col])->operator[](currentRow) = QDateTime(); break; case AbstractColumn::Text: static_cast*>(m_dataContainer[col])->operator[](currentRow).clear(); break; case AbstractColumn::Month: //TODO break; case AbstractColumn::Day: //TODO break; } } } currentRow++; } } if (m_prepared) { //notify all affected columns and plots about the changes PERFTRACE("AsciiLiveDataImport, notify affected columns and plots"); const Project* project = spreadsheet->project(); QVector curves = project->children(AbstractAspect::Recursive); QVector plots; for (int n = 0; n < m_actualCols; ++n) { Column* column = spreadsheet->column(n); //determine the plots where the column is consumed for (const auto* curve : curves) { if (curve->xColumn() == column || curve->yColumn() == column) { CartesianPlot* plot = dynamic_cast(curve->parentAspect()); if (plots.indexOf(plot) == -1) { plots << plot; plot->setSuppressDataChangedSignal(true); } } } column->setChanged(); } //loop over all affected plots and retransform them for (auto* const plot : plots) { //TODO setting this back to true triggers again a lot of retransforms in the plot (one for each curve). // plot->setSuppressDataChangedSignal(false); plot->dataChanged(); } } else m_prepared = true; DEBUG("AsciiFilterPrivate::readFromMQTTTopic() DONE"); } /*! * \brief After the MQTTTopic was loaded, the filter is prepared for reading * \param prepared * \param topic * \param separator */ void AsciiFilterPrivate::setPreparedForMQTT(bool prepared, MQTTTopic* topic, const QString& separator) { m_prepared = prepared; //If originally it was prepared we have to restore the settings if (prepared) { m_separator = separator; m_actualCols = endColumn - startColumn + 1; m_actualRows = topic->rowCount(); //set the column modes columnModes.resize(topic->columnCount()); for (int i = 0; i < topic->columnCount(); ++i) { columnModes[i] = topic->column(i)->columnMode(); } //set the data containers m_dataContainer.resize(m_actualCols); for (int n = 0; n < m_actualCols; ++n) { // data() returns a void* which is a pointer to any data type (see ColumnPrivate.cpp) topic->child(n)->setColumnMode(columnModes[n]); switch (columnModes[n]) { case AbstractColumn::Numeric: { QVector* vector = static_cast* >(topic->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Integer: { QVector* vector = static_cast* >(topic->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::Text: { QVector* vector = static_cast*>(topic->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } case AbstractColumn::DateTime: { QVector* vector = static_cast* >(topic->child(n)->data()); vector->reserve(m_actualRows); vector->resize(m_actualRows); m_dataContainer[n] = static_cast(vector); break; } //TODO case AbstractColumn::Month: case AbstractColumn::Day: break; } } } } #endif /*! * \brief Returns the separator used by the filter * \return */ QString AsciiFilterPrivate::separator() const { return m_separator; } diff --git a/src/backend/datasources/filters/AsciiFilter.h b/src/backend/datasources/filters/AsciiFilter.h index f5ffb9abf..ac3337522 100644 --- a/src/backend/datasources/filters/AsciiFilter.h +++ b/src/backend/datasources/filters/AsciiFilter.h @@ -1,133 +1,134 @@ /*************************************************************************** File : AsciiFilter.h Project : LabPlot Description : ASCII I/O-filter -------------------------------------------------------------------- Copyright : (C) 2009-2019 Alexander Semke (alexander.semke@web.de) Copyright : (C) 2017 Stefan Gerlach (stefan.gerlach@uni.kn) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef ASCIIFILTER_H #define ASCIIFILTER_H #include "backend/datasources/filters/AbstractFileFilter.h" #include "backend/core/AbstractColumn.h" class Spreadsheet; class QStringList; class QIODevice; class AsciiFilterPrivate; class QAbstractSocket; class MQTTTopic; class MQTTClient; class AsciiFilter : public AbstractFileFilter { Q_OBJECT public: AsciiFilter(); ~AsciiFilter() override; static QStringList separatorCharacters(); static QStringList commentCharacters(); static QStringList dataTypes(); static QStringList predefinedFilters(); static QString fileInfoString(const QString&); static int columnNumber(const QString& fileName, const QString& separator = QString()); static size_t lineNumber(const QString& fileName); size_t lineNumber(QIODevice&) const; // calculate number of lines if device supports it // read data from any device void readDataFromDevice(QIODevice& device, AbstractDataSource*, AbstractFileFilter::ImportMode = AbstractFileFilter::Replace, int lines = -1); void readFromLiveDeviceNotFile(QIODevice& device, AbstractDataSource*dataSource); qint64 readFromLiveDevice(QIODevice& device, AbstractDataSource*, qint64 from = -1); // overloaded function to read from file void readDataFromFile(const QString& fileName, AbstractDataSource* = nullptr, AbstractFileFilter::ImportMode = AbstractFileFilter::Replace) override; void write(const QString& fileName, AbstractDataSource*) override; QVector preview(const QString& fileName, int lines); QVector preview(QIODevice& device); void loadFilterSettings(const QString&) override; void saveFilterSettings(const QString&) const override; #ifdef HAVE_MQTT QVector preview(const QString& message); QString MQTTColumnStatistics(const MQTTTopic*) const; AbstractColumn::ColumnMode MQTTColumnMode() const; void readMQTTTopic(const QString& message, AbstractDataSource*); void setPreparedForMQTT(bool, MQTTTopic*, const QString&); #endif QString separator() const; void setCommentCharacter(const QString&); QString commentCharacter() const; void setSeparatingCharacter(const QString&); QString separatingCharacter() const; void setDateTimeFormat(const QString&); QString dateTimeFormat() const; void setNumberFormat(QLocale::Language); QLocale::Language numberFormat() const; void setAutoModeEnabled(const bool); bool isAutoModeEnabled() const; void setHeaderEnabled(const bool); bool isHeaderEnabled() const; void setSkipEmptyParts(const bool); bool skipEmptyParts() const; void setSimplifyWhitespacesEnabled(const bool); bool simplifyWhitespacesEnabled() const; void setNaNValueToZero(const bool); bool NaNValueToZeroEnabled() const; void setRemoveQuotesEnabled(const bool); bool removeQuotesEnabled() const; void setCreateIndexEnabled(const bool); bool createIndexEnabled() const; void setCreateTimestampEnabled(const bool); bool createTimestampEnabled() const; void setVectorNames(const QString&); + void setVectorNames(QStringList); QStringList vectorNames() const; QVector columnModes(); void setStartRow(const int); int startRow() const; void setEndRow(const int); int endRow() const; void setStartColumn(const int); int startColumn() const; void setEndColumn(const int); int endColumn() const; void save(QXmlStreamWriter*) const override; bool load(XmlStreamReader*) override; int isPrepared(); private: std::unique_ptr const d; friend class AsciiFilterPrivate; }; #endif diff --git a/src/kdefrontend/datasources/ImportDatasetWidget.cpp b/src/kdefrontend/datasources/ImportDatasetWidget.cpp index 3af34d165..4134b0eba 100644 --- a/src/kdefrontend/datasources/ImportDatasetWidget.cpp +++ b/src/kdefrontend/datasources/ImportDatasetWidget.cpp @@ -1,836 +1,851 @@ /*************************************************************************** File : ImportDatasetWidget.cpp Project : LabPlot Description : import online dataset widget -------------------------------------------------------------------- Copyright : (C) 2019 Kovacs Ferencz (kferike98@gmail.com) Copyright : (C) 2019 by Alexander Semke (alexander.semke@web.de) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "backend/datasources/DatasetHandler.h" #include "kdefrontend/datasources/ImportDatasetWidget.h" #include "kdefrontend/datasources/DatasetMetadataManagerDialog.h" #include "kdefrontend/DatasetModel.h" #include +#include +#include +#include #include #include +#include +#include +#include #include -#include -#include #include -#include -#include #include // #include // #include // #include // #include /*! \class ImportDatasetWidget \brief Widget for importing data from a dataset. \ingroup kdefrontend */ ImportDatasetWidget::ImportDatasetWidget(QWidget* parent) : QWidget(parent), m_categoryCompleter(new QCompleter), m_datasetCompleter(new QCompleter) { ui.setupUi(this); m_jsonDir = QStandardPaths::locate(QStandardPaths::AppDataLocation, QLatin1String("datasets"), QStandardPaths::LocateDirectory); loadCategories(); ui.lwDatasets->setSelectionMode(QAbstractItemView::SingleSelection); ui.twCategories->setSelectionMode(QAbstractItemView::SingleSelection); const int size = ui.leSearchCategories->height(); ui.lSearchCategories->setPixmap( QIcon::fromTheme(QLatin1String("go-next")).pixmap(size, size) ); ui.lSearchDatasets->setPixmap( QIcon::fromTheme(QLatin1String("go-next")).pixmap(size, size) ); QString info = i18n("Enter the name of the category to navigate to it."); ui.lSearchCategories->setToolTip(info); ui.leSearchCategories->setToolTip(info); ui.leSearchCategories->setPlaceholderText(i18n("Category...")); info = i18n("Enter the name of the dataset to navigate to it."); ui.lSearchDatasets->setToolTip(info); ui.leSearchDatasets->setToolTip(info); ui.leSearchDatasets->setPlaceholderText(i18n("Dataset...")); connect(ui.cbCollections, static_cast(&QComboBox::currentIndexChanged), this, &ImportDatasetWidget::collectionChanged); connect(ui.twCategories, &QTreeWidget::itemDoubleClicked, this, &ImportDatasetWidget::listDatasetsForSubcategory); connect(ui.twCategories, &QTreeWidget::itemSelectionChanged, [this] { if(!m_loadingCategories) listDatasetsForSubcategory(ui.twCategories->selectedItems().first()); }); connect(ui.leSearchDatasets, &QLineEdit::textChanged, this, &ImportDatasetWidget::scrollToDatasetListItem); // connect(ui.bClearCache, &QPushButton::clicked, this, &ImportDatasetWidget::clearCache); connect(ui.leSearchCategories, &QLineEdit::textChanged, this, &ImportDatasetWidget::scrollToCategoryTreeItem); // connect(ui.bRefresh, &QPushButton::clicked, this, &ImportDatasetWidget::refreshCategories); // connect(ui.bRestore, &QPushButton::clicked, this, &ImportDatasetWidget::restoreBackup); connect(ui.bNewDataset, &QPushButton::clicked, this, &ImportDatasetWidget::showDatasetMetadataManager); connect(ui.lwDatasets, &QListWidget::itemSelectionChanged, [this]() { datasetChanged(); }); connect(ui.lwDatasets, &QListWidget::doubleClicked, [this]() {emit datasetDoubleClicked(); }); ui.bRefresh->hide(); ui.bClearCache->hide(); ui.bRestore->hide(); ui.bNewDataset->hide(); } ImportDatasetWidget::~ImportDatasetWidget() { if(m_categoryCompleter != nullptr) delete m_categoryCompleter; if(m_datasetCompleter != nullptr) delete m_datasetCompleter; } /** * @brief Processes the json metadata file that contains the list of categories and subcategories and their datasets. */ void ImportDatasetWidget::loadCategories() { m_datasetsMap.clear(); ui.cbCollections->clear(); const QString collectionsFileName = m_jsonDir + QLatin1String("/DatasetCollections.json"); QFile file(collectionsFileName); if (file.open(QIODevice::ReadOnly)) { QJsonDocument document = QJsonDocument::fromJson(file.readAll()); file.close(); if (!document.isArray()) { QDEBUG("Invalid definition of " + collectionsFileName); return; } m_collections = document.array(); for (int col = 0; col < m_collections.size(); col++) { const QJsonObject& collection = m_collections[col].toObject(); const QString& collectionName = collection["name"].toString(); QString path = m_jsonDir + QLatin1Char('/') + collectionName + ".json"; QFile collectionFile(path); if (collectionFile.open(QIODevice::ReadOnly)) { QJsonDocument collectionDocument = QJsonDocument::fromJson(collectionFile.readAll()); if (!collectionDocument.isObject()) { QDEBUG("Invalid definition of " + path); continue; } QJsonObject collectionObject = collectionDocument.object(); QJsonArray categoryArray = collectionObject.value("categories").toArray(); //processing categories for(int i = 0 ; i < categoryArray.size(); ++i) { const QJsonObject& currentCategory = categoryArray[i].toObject(); const QString& categoryName = currentCategory.value("category_name").toString(); const QJsonArray& subcategories = currentCategory.value("subcategories").toArray(); //processing subcategories for(int j = 0; j < subcategories.size(); ++j) { QJsonObject currentSubCategory = subcategories[j].toObject(); QString subcategoryName = currentSubCategory.value("subcategory_name").toString(); const QJsonArray& datasetArray = currentSubCategory.value("datasets").toArray(); //processing the datasets of the actual subcategory for (const auto& dataset : datasetArray) m_datasetsMap[collectionName][categoryName][subcategoryName].push_back(dataset.toObject().value("filename").toString()); } } } } if(m_datasetModel) delete m_datasetModel; m_datasetModel = new DatasetModel(m_datasetsMap); //Fill up collections combo box ui.cbCollections->addItem(i18n("All") + QString(" (" + QString::number(m_datasetModel->allDatasetsList().toStringList().size()) + ")"), QLatin1String("All")); for (QString collection : m_datasetModel->collections()) ui.cbCollections->addItem(collection + " (" + QString::number(m_datasetModel->datasetCount(collection)) + ")", collection); collectionChanged(ui.cbCollections->currentIndex()); } else QMessageBox::critical(this, i18n("File not found"), i18n("Couldn't open the dataset collections file %1. Please check your installation.", collectionsFileName)); } /** * Shows all categories and sub-categories for the currently selected collection */ void ImportDatasetWidget::collectionChanged(int index) { bool allCollections = (index == 0); QString collectionName = ui.cbCollections->itemData(index).toString(); //update the info field QString info; if (!allCollections) { for (int i = 0; i < m_collections.size(); ++i) { const QJsonObject& collection = m_collections[i].toObject(); if ( collectionName == collection["name"].toString() ) { info += collection["description"].toString(); info += "



"; break; } } } ui.lInfo->setText(info); m_loadingCategories = true; ui.lwDatasets->clear(); ui.twCategories->clear(); QStringList categories = allCollections ? m_datasetModel->allCategories().toStringList() : m_datasetModel->categories(collectionName); //Go through every category that was previously processed. for(auto category : categories) { QStringList categoryList(category); categoryList.append(QString::number(m_datasetModel->datasetCount(collectionName, category))); QTreeWidgetItem* const currentCategoryItem = new QTreeWidgetItem(categoryList); ui.twCategories->addTopLevelItem(currentCategoryItem); QStringList subcategories = allCollections ? m_datasetModel->allSubcategories(category).toStringList() : m_datasetModel->subcategories(collectionName, category); //Go through every subcategory of the current category, that was previously processed. for(auto subcategory : subcategories) { QStringList subcategoryList(subcategory); subcategoryList.append(QString::number(m_datasetModel->datasetCount(collectionName, category, subcategory))); currentCategoryItem->addChild(new QTreeWidgetItem(QStringList(subcategoryList))); } } if(m_selectedCollection == collectionName) { restoreSelectedSubcategory(collectionName); } else { m_selectedCollection = collectionName; m_selectedCategory = ""; m_selectedSubcategory = ""; } m_loadingCategories = false; updateCategoryCompleter(); } /** * @brief Restores the lastly selected collection, category and subcategory making it the selected QTreeWidgetItem and also lists the datasets belonigng to it */ void ImportDatasetWidget::restoreSelectedSubcategory(const QString& collectionName) { if(m_datasetModel->categories(collectionName).contains(m_selectedCategory)) { const QTreeWidgetItem* const categoryItem = ui.twCategories->findItems(m_selectedCategory, Qt::MatchExactly).first(); if(m_datasetModel->subcategories(collectionName, m_selectedCategory).contains(m_selectedSubcategory)) { for(int i = 0; i < categoryItem->childCount(); ++i) { if(categoryItem->child(i)->text(0).compare(m_selectedSubcategory) == 0) { QTreeWidgetItem* const subcategoryItem = categoryItem->child(i); ui.twCategories->setCurrentItem(subcategoryItem); subcategoryItem->setSelected(true); m_selectedSubcategory.clear(); listDatasetsForSubcategory(subcategoryItem); break; } } } } } /** * @brief Populates lwDatasets with the datasets of the selected subcategory. * @param item the selected subcategory */ void ImportDatasetWidget::listDatasetsForSubcategory(QTreeWidgetItem* item) { if(item->childCount() == 0) { if(m_selectedSubcategory.compare(item->text(0)) != 0) { m_selectedSubcategory = item->text(0); m_selectedCategory = item->parent()->text(0); QString categoryName = item->parent()->text(0); ui.lwDatasets->clear(); for(QString dataset : m_datasetModel->datasets(m_selectedCollection, categoryName, m_selectedSubcategory)) ui.lwDatasets->addItem(new QListWidgetItem(dataset)); updateDatasetCompleter(); } } else { if(item->text(0).compare(m_selectedCategory) != 0) { m_selectedCategory = item->text(0); m_selectedSubcategory = ""; ui.lwDatasets->clear(); item->setExpanded(true); } } } /** * @brief Updates the completer used for searching among datasets. */ void ImportDatasetWidget::updateDatasetCompleter() { QStringList datasetList; for(int i = 0; i count(); ++i) { datasetList.append(ui.lwDatasets->item(i)->text()); } if(!datasetList.isEmpty()) { if(m_datasetCompleter != nullptr) delete m_datasetCompleter; m_datasetCompleter = new QCompleter(datasetList); m_datasetCompleter->setCompletionMode(QCompleter::PopupCompletion); m_datasetCompleter->setCaseSensitivity(Qt::CaseSensitive); ui.leSearchDatasets->setCompleter(m_datasetCompleter); } else ui.leSearchDatasets->setCompleter(nullptr); } /** * @brief Updates the completer used for searching among categories and subcategories. */ void ImportDatasetWidget::updateCategoryCompleter() { QStringList categoryList; for (int i = 0; i < ui.twCategories->topLevelItemCount(); ++i) { categoryList.append(ui.twCategories->topLevelItem(i)->text(0)); for(int j = 0; j < ui.twCategories->topLevelItem(i)->childCount(); ++j) { QString text = ui.twCategories->topLevelItem(i)->text(0) + QLatin1Char(':') + ui.twCategories->topLevelItem(i)->child(j)->text(0); categoryList.append(text); } } if(!categoryList.isEmpty()) { if(m_categoryCompleter != nullptr) delete m_categoryCompleter; m_categoryCompleter = new QCompleter(categoryList); m_categoryCompleter->setCompletionMode(QCompleter::PopupCompletion); m_categoryCompleter->setCaseSensitivity(Qt::CaseSensitive); ui.leSearchCategories->setCompleter(m_categoryCompleter); } else ui.leSearchCategories->setCompleter(nullptr); } /** * @brief Scrolls the twCategories to the given category or subcategory * @param rootName the name of the category or category+subcategory */ void ImportDatasetWidget::scrollToCategoryTreeItem(const QString& rootName) { int topItemIdx = -1; for (int i = 0; i < ui.twCategories->topLevelItemCount(); ++i) if (rootName.startsWith(ui.twCategories->topLevelItem(i)->text(0))) { topItemIdx = i; break; } if (topItemIdx >= 0) { if (!rootName.contains(QLatin1Char(':'))) { ui.twCategories->scrollToItem(ui.twCategories->topLevelItem(topItemIdx), QAbstractItemView::ScrollHint::PositionAtTop); } else { int childIdx = -1; for (int j = 0; j < ui.twCategories->topLevelItem(topItemIdx)->childCount(); ++j) { if(rootName.endsWith(ui.twCategories->topLevelItem(topItemIdx)->child(j)->text(0))) { childIdx = j; break; } } if (childIdx >= 0) ui.twCategories->scrollToItem(ui.twCategories->topLevelItem(topItemIdx)->child(childIdx), QAbstractItemView::ScrollHint::PositionAtTop); else ui.twCategories->scrollToItem(ui.twCategories->topLevelItem(topItemIdx), QAbstractItemView::ScrollHint::PositionAtTop); } } } /** * @brief Scrolls the lwDatasets to the given dataset name. * @param rootName the name of the dataset */ void ImportDatasetWidget::scrollToDatasetListItem(const QString& rootName) { int itemIdx = -1; for (int i = 0; i < ui.lwDatasets->count(); ++i) if (ui.lwDatasets->item(i)->text() == rootName) { itemIdx = i; break; } if (itemIdx >= 0) ui.lwDatasets->scrollToItem(ui.lwDatasets->item(itemIdx), QAbstractItemView::ScrollHint::PositionAtTop); } /** * @brief Returns the name of the selected dataset */ QString ImportDatasetWidget::getSelectedDataset() const { if (ui.lwDatasets->selectedItems().count() > 0) return ui.lwDatasets->selectedItems().at(0)->text(); else return QString(); } /** * @brief Initiates the processing of the dataset's metadata file and of the dataset itself. * @param datasetHandler the DatasetHanlder that downloads processes the dataset */ void ImportDatasetWidget::loadDatasetToProcess(DatasetHandler* datasetHandler) { datasetHandler->processMetadata(m_datasetObject); } /** * @brief Returns the QJsonObject associated with the currently selected dataset. */ QJsonObject ImportDatasetWidget::loadDatasetObject() { bool allCollections = (ui.cbCollections->currentIndex() == 0); for (int i = 0; i < m_collections.size(); ++i) { const QJsonObject& collection = m_collections[i].toObject(); const QString& collectionName = collection["name"].toString(); //we have to find the selected collection in the metadata file. if(allCollections || collectionName == m_selectedCollection) { QFile file(m_jsonDir + QLatin1Char('/') + collectionName + ".json"); //open the metadata file of the current collection if (file.open(QIODevice::ReadOnly)) { QJsonDocument collectionDocument = QJsonDocument::fromJson(file.readAll()); file.close(); if(!collectionDocument.isObject()) { qDebug()<< "The " + collectionName + ".json file is invalid"; return QJsonObject(); } QJsonObject collectionObject = collectionDocument.object(); QJsonArray categoryArray = collectionObject.value("categories").toArray(); //processing categories for(int i = 0 ; i < categoryArray.size(); ++i) { const QJsonObject currentCategory = categoryArray[i].toObject(); const QString categoryName = currentCategory.value("category_name").toString(); if(categoryName.compare(m_selectedCategory) == 0) { const QJsonArray subcategories = currentCategory.value("subcategories").toArray(); //processing subcategories for(int j = 0; j < subcategories.size(); ++j) { QJsonObject currentSubCategory = subcategories[j].toObject(); QString subcategoryName = currentSubCategory.value("subcategory_name").toString(); if(subcategoryName.compare(m_selectedSubcategory) == 0) { const QJsonArray datasetArray = currentSubCategory.value("datasets").toArray(); //processing the datasets o the actual subcategory for (const auto& dataset : datasetArray) { if(getSelectedDataset().compare(dataset.toObject().value("filename").toString()) == 0) return dataset.toObject(); } } } } } } if (!allCollections) break; } } return QJsonObject(); } /** * @brief Opens the DatasetMetadataManagerDialog when the user wants to add a new dataset. */ void ImportDatasetWidget::showDatasetMetadataManager() { DatasetMetadataManagerDialog* dlg = new DatasetMetadataManagerDialog(this, m_datasetsMap); if (dlg->exec() == QDialog::Accepted) { const QString pathToJson = m_jsonDir + QLatin1String("DatasetCategories.json"); const QString dirPath = QFileInfo(pathToJson).dir().absolutePath(); //update the metadata document dlg->updateDocument(m_jsonDir); //Not working due to problems with KNS3 library /*uploadCategoryFile(); uploadDatasetFile(dlg->getMetadataFilePath());*/ //process the changes made in the metadata files loadCategories(); } delete dlg; } /** * @brief Places the metadata file containing the list of collections into a specific directory. */ /* void ImportDatasetWidget::downloadCollectionsFile() { const QString fileNameOld = QStandardPaths::locate(QStandardPaths::AppDataLocation, "datasets/DatasetCollections.json"); const QString fileNameNew =m_jsonDir + QLatin1String("DatasetCollections.json"); const QString parentDir = m_jsonDir.left(m_jsonDir.left(m_jsonDir.length() - 1).lastIndexOf(QDir::separator())); if(!QDir(m_jsonDir).exists()) { qDebug() << parentDir; QDir(parentDir).mkdir(QLatin1String("labplot_data")); } QFile::copy(fileNameOld, fileNameNew); } */ /** * @brief Places the metadata file of the given dataset into a specific directory. * @param datasetName the name of the dataset */ /* void ImportDatasetWidget::downloadCollectionFile(const QString& collectionName) { const QString fileNameOld = QStandardPaths::locate(QStandardPaths::AppDataLocation, QLatin1String("datasets") + QDir::separator() + collectionName); const QString fileNameNew =m_jsonDir + collectionName; QFile::copy(fileNameOld, fileNameNew); } */ /** * @brief Refreshes the categories, subcategories and datasets. */ /* void ImportDatasetWidget::refreshCategories() { QMessageBox::StandardButton reply; reply = QMessageBox::question(this, "Refresh metadata files", "Are you sure to refresh all of the metadata files? (every change will be removed, but a backup will be created)", QMessageBox::Yes|QMessageBox::No); if(reply == QMessageBox::Yes) { QString fileNameNew = m_jsonDir + QLatin1String("DatasetCollections.json"); QFile existingCategoriesFile(fileNameNew); if(existingCategoriesFile.exists()) { //Delete old backup QFile oldBackup(m_jsonDir + QLatin1String("DatasetCollections_backup.json")); if(oldBackup.exists()) { oldBackup.remove(); } oldBackup.close(); //Create new backup if(!existingCategoriesFile.rename(m_jsonDir + QLatin1String("DatasetCollections_backup.json"))) qDebug() << " Couldn't create backup because " << existingCategoriesFile.errorString(); } //Obtain the new file downloadCollectionsFile(); QString filePath = m_jsonDir + "DatasetCollections.json"; QFile file(filePath); if (file.open(QIODevice::ReadOnly)) { m_datasetsMap.clear(); QJsonDocument document = QJsonDocument::fromJson(file.readAll()); QJsonArray collections; if(document.isArray()) collections = document.array(); else { qDebug()<< "The DatasetCollections.json file is invalid"; return; } //Go trough every collection's metadata file for (int collectionIndex = 0; collectionIndex < collections.size(); collectionIndex++) { const QString currentCollection = collections[collectionIndex].toString(); QFile existingCollectionFile(m_jsonDir + currentCollection + ".json"); //we copy the file to the data location if it doesn't exist if(!existingCollectionFile.exists()) { downloadCollectionFile(currentCollection + ".json"); } //otherwise we have to create a backup first else { QFile oldBackupCollection(m_jsonDir + currentCollection + "_backup.json"); if(oldBackupCollection.exists()) { oldBackupCollection.remove(); } oldBackupCollection.close(); if(!existingCollectionFile.rename(m_jsonDir + currentCollection + "_backup.json")) qDebug() << " Couldn't create backup because " << existingCollectionFile.errorString(); } } } //process the "refreshed" files and update the widget accordingly loadCategories(); } } */ /** * @brief Restores the saved metadata files. Revokes the effect of refreshCategories(). */ /* void ImportDatasetWidget::restoreBackup() { QMessageBox::StandardButton reply; reply = QMessageBox::question(this, "Restore backup", "Are you sure to restore the backup metadata files?", QMessageBox::Yes|QMessageBox::No); if(reply == QMessageBox::Yes) { //Restore the collection list first QFile backup(m_jsonDir + QLatin1String("DatasetCollections_backup.json")); if(backup.exists()) { QFile deleteFile(m_jsonDir + QLatin1String("DatasetCollections.json")); deleteFile.remove(); if(!backup.rename(m_jsonDir + QLatin1String("DatasetCollections.json"))) { qDebug() << " Couldn't create backup because " << backup.errorString(); downloadCollectionsFile(); } } QString filePath = m_jsonDir + "DatasetCollections.json"; QFile file(filePath); if (file.open(QIODevice::ReadOnly)) { m_datasetsMap.clear(); QJsonDocument document = QJsonDocument::fromJson(file.readAll()); QJsonArray collections; if(document.isArray()) collections = document.array(); else { qDebug()<< "The DatasetCollections.json file is invalid"; return; } //Restore every collection's metadata file for (int collectionIndex = 0; collectionIndex < collections.size(); collectionIndex++) { const QString currentCollection = collections[collectionIndex].toString(); QFile backupCollection(m_jsonDir + currentCollection + "_backup.json"); if(backupCollection.exists()) { QFile collectionFile(m_jsonDir + currentCollection + ".json"); collectionFile.remove(); if(!backupCollection.rename(m_jsonDir + currentCollection + ".json")) { qDebug() << " Couldn't create backup because " << backupCollection.errorString(); downloadCollectionFile(currentCollection + ".json"); } } } } //process the restored files and update the widget accordingly loadCategories(); } } */ /** * @brief Clears the content of the directory in which the download of metadata files was done. */ /* void ImportDatasetWidget::clearCache() { QMessageBox::StandardButton reply = QMessageBox::question(this, "Clear cache", "Are you sure to remove every downloaded dataset?", QMessageBox::Yes|QMessageBox::No); if(reply == QMessageBox::Yes) { QDir dir(QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + QLatin1String("/datasets_local/")); if(dir.exists()) { for(const auto& entry : dir.entryList()) { //delete every file that isn't potentially a metadata file if(!(entry.endsWith(QLatin1String(".json")) || entry.startsWith(QLatin1Char('.')))) { QFile deleteFile (dir.path() + QLatin1Char('/') + entry); if(deleteFile.exists()) { deleteFile.remove(); } } } } else { qDebug("Couldn't clear cache, containing folder doesn't exist!"); } } } */ /** * @brief TODO: uploads the metadata file that contains the categories to store.kde.org -- Library doesn't work for indefinite time. */ /* void ImportDatasetWidget::uploadCategoryFile() { KNS3::UploadDialog dialog("labplot2_datasets.knsrc", this); QFile file(m_jsonDir + "DatasetCategories.json"); qDebug() << "file " << m_jsonDir + "DatasetCategories.json "<< file.exists(); qDebug() << "file can be opened: " << file.open(QIODevice::ReadOnly) << " " << file.errorString(); file.close(); QUrl payloadFile ="file:" + m_jsonDir + "DatasetCategories.json"; QFile file2(payloadFile.toLocalFile()); qDebug() << "Local file: " << payloadFile.toLocalFile(); if (!file2.open(QIODevice::ReadOnly)) { qDebug() << i18n("File not found: %1 ", payloadFile.url()); } else { qDebug() << i18n("File found: %1 ", payloadFile.url()); } file2.close(); dialog.setUploadFile("file:" + m_jsonDir + "DatasetCategories.json"); qDebug("Upload file set!"); dialog.setUploadName("Dataset Categories"); qDebug() << "Upload name set: "; dialog.exec(); } */ /** * @brief TODO: uploads the metadata file of a dataset to store.kde.org -- Library doesn't work for indefinite time. */ /* void ImportDatasetWidget::uploadDatasetFile(const QString& filePath) { KNS3::UploadDialog dialog("labplot2_datasets.knsrc", this); QFile file(filePath); qDebug() << filePath + " " << file.exists(); qDebug() << "file can be opened: " << file.open(QIODevice::ReadOnly) << " " << file.errorString(); file.close(); QUrl payloadFile ="file:" + filePath; QFile file2(payloadFile.toLocalFile()); qDebug() << "Local file: " << payloadFile.toLocalFile(); if (!file2.open(QIODevice::ReadOnly)) { qDebug() << i18n("File not found: %1 ", payloadFile.url()); } else { qDebug() << i18n("File found: %1 ", payloadFile.url()); } file2.close(); dialog.setUploadFile("file:" + filePath); qDebug("Upload file set!"); dialog.setUploadName("Dataset Categories"); qDebug() << "Upload name set: "; dialog.exec(); } */ /** * @brief Returns the structure containing the categories, subcategories and datasets. * @return the structure containing the categories, subcategories and datasets */ const DatasetsMap& ImportDatasetWidget::getDatasetsMap() { return m_datasetsMap; } /** * @brief Sets the currently selected collection * @param category the name of the collection */ void ImportDatasetWidget::setCollection(const QString& collection) { ui.cbCollections->setCurrentText(collection + " (" + QString(m_datasetModel->datasetCount(collection)) + ")"); } /** * @brief Sets the currently selected category * @param category the name of the category */ void ImportDatasetWidget::setCategory(const QString &category) { for(int i = 0; i < ui.twCategories->topLevelItemCount(); i++) { if (ui.twCategories->topLevelItem(i)->text(0).compare(category) == 0) { listDatasetsForSubcategory(ui.twCategories->topLevelItem(i)); break; } } } /** * @brief Sets the currently selected subcategory * @param subcategory the name of the subcategory */ void ImportDatasetWidget::setSubcategory(const QString &subcategory) { for(int i = 0; i < ui.twCategories->topLevelItemCount(); i++) { if (ui.twCategories->topLevelItem(i)->text(0).compare(m_selectedCategory) == 0) { QTreeWidgetItem* categoryItem = ui.twCategories->topLevelItem(i); for(int j = 0; j childCount(); j++) { if(categoryItem->child(j)->text(0).compare(subcategory) == 0) { listDatasetsForSubcategory(categoryItem->child(j)); break; } } break; } } } /** * @brief Sets the currently selected dataset * @param the currently selected dataset */ void ImportDatasetWidget::setDataset(const QString &datasetName) { for(int i = 0; i < ui.lwDatasets->count() ; i++) { if(ui.lwDatasets->item(i)->text().compare(datasetName) == 0) { ui.lwDatasets->item(i)->setSelected(true); break; } } } /** * @brief Updates the details of the currently selected dataset */ void ImportDatasetWidget::datasetChanged() { QString info; if (ui.cbCollections->currentIndex() != 0) { const QString& collectionName = ui.cbCollections->itemData(ui.cbCollections->currentIndex()).toString(); for (int i = 0; i < m_collections.size(); ++i) { const QJsonObject& collection = m_collections[i].toObject(); if ( collectionName.startsWith(collection["name"].toString()) ) { info += collection["description"].toString(); info += "

"; break; } } } if(!getSelectedDataset().isEmpty()) { m_datasetObject = loadDatasetObject(); info += "" + i18n("Dataset") + ":
"; info += m_datasetObject["name"].toString(); info += "

"; info += "" + i18n("Description") + ":
"; - info += m_datasetObject["description"].toString(); + + if (m_datasetObject.contains("description_url")) { + QNetworkAccessManager* manager = new QNetworkAccessManager(this); + connect(manager, &QNetworkAccessManager::finished, [this] (QNetworkReply* reply) { + QByteArray ba = reply->readAll(); + QString info(ba); + info = info.replace(QLatin1Char('\n'), QLatin1String("
")); + ui.lInfo->setText(ui.lInfo->text() + info); + } + ); + manager->get(QNetworkRequest(QUrl(m_datasetObject["description_url"].toString()))); + + } else + info += m_datasetObject["description"].toString(); } else m_datasetObject = QJsonObject(); ui.lInfo->setText(info); emit datasetSelected(); }