diff --git a/Code/Day 3_Multiple_Linear_Regression.py b/Code/Day 3_Multiple_Linear_Regression.py index 291598f..5527270 100644 --- a/Code/Day 3_Multiple_Linear_Regression.py +++ b/Code/Day 3_Multiple_Linear_Regression.py @@ -31,4 +31,4 @@ y_pred = regressor.predict(X_test) # regression evaluation from sklearn.metrics import r2_score -print(r2_score(Y_test,y_pred)) +print(r2_score(Y_test, y_pred)) diff --git a/Code/KafkaProducer.py b/Code/KafkaProducer.py new file mode 100644 index 0000000..4bc99d4 --- /dev/null +++ b/Code/KafkaProducer.py @@ -0,0 +1,13 @@ +#!/usr/bin/python + +from kafka import KafkaProducer + +kafkaHosts=["kafka01.paas.longfor.sit:9092" + ,"kafka02.paas.longfor.sit:9092" + ,"kafka03.paas.longfor.sit:9092"] + +producer = KafkaProducer(bootstrap_servers=kafkaHosts); + +for _ in range(20): + producer.send("testapplog_plm-prototype",b"Hello....") +producer.flush(); \ No newline at end of file diff --git a/Code/TestKafka.py b/Code/TestKafka.py new file mode 100644 index 0000000..6bd1dbe --- /dev/null +++ b/Code/TestKafka.py @@ -0,0 +1,24 @@ +#!/usr/bin/python + +from kafka import KafkaConsumer; + + +kafkaHosts=["kafka01.paas.longfor.sit:9092" + ,"kafka02.paas.longfor.sit:9092" + ,"kafka03.paas.longfor.sit:9092"] + +''' +earliest +当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,从头开始消费 +latest +当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据 +none +topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常 +''' +consumer = KafkaConsumer( + bootstrap_servers=kafkaHosts,group_id='mdf_group',auto_offset_reset='latest'); + +consumer.subscribe("testapplog_plm-prototype"); + +for msg in consumer: + print(msg.value) \ No newline at end of file diff --git a/datasets/studentscores.csv b/datasets/studentscores.csv index 2de5f0e..865bd49 100644 --- a/datasets/studentscores.csv +++ b/datasets/studentscores.csv @@ -24,3 +24,6 @@ Hours,Scores 3.8,35 6.9,76 7.8,86 +2.1,93 +2.2,93 +2.5,93 \ No newline at end of file