DEVELOPMENT... { "data_id": "41263", "name": "rainfall_bangladesh", "exact_name": "rainfall_bangladesh", "version": 2, "version_label": null, "description": "Mankind have been attempting to predict the weather from prehistory. For good reason for knowing when to plant crops, when to build and when to prepare for drought and flood. In a nation such as Bangladesh being able to predict the weather, especially rainfall has never been so vitally important. The proposed research work pursues to produce prediction model on rainfall using the machine learning algorithms. The base data for this work has been collected from Bangladesh Meteorological Department. It is mainly focused on the development of models for long term rainfall prediction of Bangladesh divisions and districts (Weather Stations). Rainfall prediction is very important for the Bangladesh economy and day to day life. Scarcity or heavy - both rainfall effects rural and urban life to a great extent with the changing pattern of the climate. Unusual rainfall and long lasting rainy season is a great factor to take account into. We want to see whether too much unusual behavior is taking place another pattern resulting new clamatorial description. As agriculture is dependent on rain and heavy rainfall caused flood frequently leading to great loss to crops, rainfall is a very complex phenomenon which is dependent on various atmospheric, oceanic and geographical parameters. The relationship between these parameters and rainfall is unstable. Beside this changing behavior of clamatorial facts making the existing meteorological forecasting less usable to the users.\n\nInitially linear regression models were developed for monthly rainfall prediction of station and national level as per day month year. Here humidity, temperatures & wind parameters are used as predictors. The study is further extended by developing another popular regression analysis algorithm named Random Forest Regression. After then, few other classification algorithms have been used for model building, training and prediction. Those are Naive Bayes Classification, Decision Tree Classification (Entropy and Gini) and Random Forest Classification. In all model building and training predictor parameters were Station, Year, Month and Day. As the effect of rainfall affecting parameters is embedded in rainfall, rainfall was the label or dependent variable in these models. The developed and trained model is capable of predicting rainfall in advance for a month of a given year for a given area (for area we used here are the stations (weather parameters values are measured by Bangladesh Meteorological Department). The accuracy of rainfall estimation is above 65%. Accuracy percentage varies from algorithm to algorithm. \nTwo regression analysis and three classification analysis models has been developed for rainfall prediction of 33 Bangladeshi weather station. Apache Spark library has been used for machine library in Scala programming language. The main idea behind the use of classification and regression analysis is to see the comparative difference between types of algorithms prediction output and the predictability along with usability. \nThis thesis is a contribution to the effort of rainfall prediction within Bangladesh. It takes the strategy of applying machine learning models to historical weather data gathered in Bangladesh. As part of this work, a web-based software application was written using Apache Spark, Scala and HighCharts to demonstrate rainfall prediction using multiple machine learning models. Models are successively improved with the rainfall prediction accuracy.", "format": "ARFF", "uploader": " ", "uploader_id": 5243, "visibility": "public", "creator": null, "contributor": null, "date": "2018-11-09 18:00:58", "update_comment": null, "last_update": "2018-11-09 18:00:58", "licence": "Public", "status": "in_preparation", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/20649203\/file277c9c35fc8.arff", "default_target_attribute": "Rainfall", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "rainfall_bangladesh", "Mankind have been attempting to predict the weather from prehistory. For good reason for knowing when to plant crops, when to build and when to prepare for drought and flood. In a nation such as Bangladesh being able to predict the weather, especially rainfall has never been so vitally important. The proposed research work pursues to produce prediction model on rainfall using the machine learning algorithms. The base data for this work has been collected from Bangladesh Meteorological Department " ], "weight": 5 }, "qualities": { "NumberOfInstances": 16755, "NumberOfFeatures": 4, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 1, "Quartile1MutualInformation": null, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "PercentageOfNumericFeatures": 50, "PercentageOfSymbolicFeatures": 25, "Quartile1AttributeEntropy": null, "Quartile1KurtosisOfNumericAtts": -1.1261372723532137, "Quartile1MeansOfNumericAtts": 202.09722470904296, "StdvNominalAttDistinctValues": 0, "Quartile1SkewnessOfNumericAtts": -0.11283915939224061, "Quartile1StdDevOfNumericAtts": 13.162071023827039, "Quartile2AttributeEntropy": null, "Quartile2KurtosisOfNumericAtts": 1.9660740531751966, "Quartile2MeansOfNumericAtts": 1098.287764846316, "Quartile2MutualInformation": null, "Quartile2SkewnessOfNumericAtts": 0.880987098900844, "Quartile2StdDevOfNumericAtts": 131.5332736487337, "Quartile3AttributeEntropy": null, "Quartile3KurtosisOfNumericAtts": 5.058285378703607, "Quartile3MeansOfNumericAtts": 1994.478304983589, "Quartile3MutualInformation": null, "Quartile3SkewnessOfNumericAtts": 1.8748133571939287, "Quartile3StdDevOfNumericAtts": 249.9044762736404, "AutoCorrelation": -136.5060284111257, "MeanMeansOfNumericAtts": 1098.287764846316, "ClassEntropy": null, "Dimensionality": 0.00023873470605789316, "EquivalentNumberOfAtts": null, "MajorityClassPercentage": null, "MajorityClassSize": null, "MaxAttributeEntropy": null, "MaxKurtosisOfNumericAtts": 5.058285378703607, "MaxMeansOfNumericAtts": 1994.478304983589, "MaxMutualInformation": null, "MaxNominalAttDistinctValues": 33, "MaxSkewnessOfNumericAtts": 1.8748133571939287, "MaxStdDevOfNumericAtts": 249.9044762736404, "MeanAttributeEntropy": null, "MeanKurtosisOfNumericAtts": 1.9660740531751966, "NumberOfBinaryFeatures": 0, "MeanMutualInformation": null, "MeanNoiseToSignalRatio": null, "MeanNominalAttDistinctValues": 33, "MeanSkewnessOfNumericAtts": 0.880987098900844, "MeanStdDevOfNumericAtts": 131.5332736487337, "MinAttributeEntropy": null, "MinKurtosisOfNumericAtts": -1.1261372723532137, "MinMeansOfNumericAtts": 202.09722470904296, "MinMutualInformation": null, "MinNominalAttDistinctValues": 33, "MinSkewnessOfNumericAtts": -0.11283915939224061, "MinStdDevOfNumericAtts": 13.162071023827039, "MinorityClassPercentage": null, "MinorityClassSize": null }, "tags": [], "features": [ { "name": "Rainfall", "index": "3", "type": "numeric", "distinct": "1128", "missing": "0", "target": "1", "min": "0", "max": "3001", "mean": "202", "stdev": "250" }, { "name": "Year", "index": "0", "type": "numeric", "distinct": "47", "missing": "0", "min": "1970", "max": "2016", "mean": "1994", "stdev": "13" }, { "name": "Station", "index": "1", "type": "nominal", "distinct": "33", "missing": "0", "distr": [] }, { "name": "Month", "index": "2", "type": "string", "distinct": "12", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }