Example 1
#Using advertising.csv, clicked on ad 0 or 1 #import libraries import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns #get data dataset = pd.read_csv('Social_Network_Ads.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values #Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) #Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) #Logistic Regression from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state = 0) classifier.fit(X_train, y_train) #Predictions and Evaluations y_pred = classifier.predict(X_test) print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)) #Making the Confusion Matrix from sklearn.metrics import confusion_matrix, accuracy_score cm = confusion_matrix(y_test, y_pred) print(cm) accuracy_score(y_test, y_pred) [[65 3] [ 8 24]] 0.89 #Visualising the Training set results from matplotlib.colors import ListedColormap X_set, y_set = sc.inverse_transform(X_train), y_train X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25), np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25)) plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j) plt.title('Logistic Regression (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() #Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = sc.inverse_transform(X_test), y_test X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25), np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25)) plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j) plt.title('Logistic Regression (Test set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show()


Example 2
#import pandas import pandas as pd col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label'] # load dataset pima = pd.read_csv("pima-indians-diabetes.csv", header=None, names=col_names) pima.head() #split dataset in features and target variable feature_cols = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree'] X = pima[feature_cols] # Features y = pima.label # Target variable # split X and y into training and testing sets #Here, Dataset is broken into two parts in the ratio of 75:25. It means 75% of data will be used for model training and 25% for model testing. from sklearn.cross_validation import train_test_split X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0) #Model Development and Prediction # import the class from sklearn.linear_model import LogisticRegression # instantiate the model (using the default parameters) logreg = LogisticRegression() # fit the model with data logreg.fit(X_train,y_train) # predict y_pred=logreg.predict(X_test) #Model Evaluation using Confusion Matrix # import the metrics class from sklearn import metrics cnf_matrix = metrics.confusion_matrix(y_test, y_pred) cnf_matrix Output: array([[119, 11], [ 26, 36]]) #Visualizing Confusion Matrix using Heatmap # import required modules import numpy as np import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline class_names=[0,1] # name of classes fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names) # create heatmap sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g') ax.xaxis.set_label_position("top") plt.tight_layout() plt.title('Confusion matrix', y=1.1) plt.ylabel('Actual label') plt.xlabel('Predicted label') #Confusion Matrix Evaluation Metrics print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) print("Precision:",metrics.precision_score(y_test, y_pred)) print("Recall:",metrics.recall_score(y_test, y_pred)) Output: Accuracy: 0.8072916666666666 Precision: 0.7659574468085106 Recall: 0.5806451612903226 #ROC Curve y_pred_proba = logreg.predict_proba(X_test)[::,1] fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba) auc = metrics.roc_auc_score(y_test, y_pred_proba) plt.plot(fpr,tpr,label="data 1, auc="+str(auc)) plt.legend(loc=4) plt.show()

Hyperparameter Tuning Using Grid Search
Here's how you can set up a grid search for tuning a logistic regression model:
import pandas as pd from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report, confusion_matrix # Load dataset (example with a generic dataset) data = pd.read_csv('your_dataset.csv') # Split data into features and target X = data.drop('target', axis=1) y = data['target'] # Split into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Define the parameter grid param_grid = { 'penalty': ['l1', 'l2', 'elasticnet', 'none'], 'C': [0.1, 1.0, 10.0, 100.0], 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 'max_iter': [100, 200, 300], 'l1_ratio': [0, 0.5, 1] # Only used for 'elasticnet' } # Initialize GridSearchCV grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy') # Fit GridSearchCV grid_search.fit(X_train, y_train) # Print the best parameters and score print("Best Parameters:", grid_search.best_params_) print("Best Cross-Validation Score:", grid_search.best_score_) # Best model best_model = grid_search.best_estimator_ # Predict on the test set y_pred = best_model.predict(X_test) # Print classification report and confusion matrix print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) print("Classification Report:\n", classification_report(y_test, y_pred))
