Artemis2467 · Artemis2467 · Mar 22, 2026 · Mar 22, 2026 · Mar 23, 2026 · Mar 25, 2026
diff --git a/code/Functions.py b/code/Functions.py
@@ -2,51 +2,62 @@
 import math
 import re
 import json
+import seaborn as sns
 import matplotlib.pyplot as plt
 from tqdm import tqdm
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from sklearn.metrics import roc_curve, auc, precision_score, recall_score, f1_score
+from sklearn.metrics import roc_curve, auc, f1_score, classification_report, precision_recall_curve
 from sentence_transformers import SentenceTransformer
 from StoreDataset import FileLoader, retrieve_to
 
 class LinearConfig:
 
-    pos_weight = torch.tensor(0.502773)
+    pos_weight = torch.tensor(0.65)
     criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
 
-    train_p = 0.8
-    val_p = 0.1
+    train_p = 0.75
+    val_p = 0.15
 
-    d_model = 128
+    d_model_choices = [32, 64, 128, 256]
 
     num_epochs = 50
     batch_num = 32
-    learning_rate = 0.01
+    learning_rate_choices = [0.1, 0.05, 0.01, 0.005, 0.001]
     stop_patience = 10
 
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+    learning_rate = 0.1
+    d_model = 32
+
 class LogitConfig:
 
-    pos_weight = torch.tensor(0.5)
+    pos_weight = torch.tensor(0.7)
     criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
+    add_conv_choices = [False, True]
 
-    train_p = 0.8
-    val_p = 0.1
+    train_p = 0.75
+    val_p = 0.15
 
-    d_model = 128
-    conv_ch = 32
+    d_model_choices = [32, 64, 128, 256]
+    conv_ch_choices = [32, 64]
     drop_out = 0.1
 
-    num_epochs = 50
+    num_epochs = 100
     batch_num = 32
-    learning_rate = 0.01
+    learning_rate_choices = [0.1, 0.05, 0.01, 0.005, 0.001]
     patience = 10
 
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+    add_conv = False
+    d_model = 64
+    conv_ch = 64
+    learning_rate = 0.05
+
 def count_files(dir):
     files = [f for f in os.listdir(dir)]
     return len(files)
@@ -201,7 +212,12 @@ def run_batch(config,
 
     return total_loss
 
-def plot_loss(history, is_linear):
+
+# ————————————————————————————————
+# plot graphs and caculate scores
+# ————————————————————————————————
+
+def plot_loss(history, is_linear, show, length=None):
     plt.figure(figsize=(8, 5))
     plt.plot(history["running_loss"], label='running_loss', color='blue')
     plt.plot(history["val_loss"], label='val_loss', color='red', linestyle="dashed")
@@ -211,52 +227,114 @@ def plot_loss(history, is_linear):
     plt.legend()
 
     if is_linear:
-        length = count_files(r"test_results\loss\linear")
-        plt.savefig(fr"test_results\loss\linear\linear_{length + 1}.pdf")
+        if not length:
+            length = count_files(r"test_results\loss\linear") + 1
+        plt.savefig(fr"test_results\loss\linear\linear_{length}.pdf")
     else:
-        length = count_files(r"test_results\loss\logprob")
-        plt.savefig(fr"test_results\loss\logprob\logprob_{length + 1}.pdf")
-    plt.show()
+        if not length:
+            length = count_files(r"test_results\loss\logprob") + 1
+        plt.savefig(fr"test_results\loss\logprob\logprob_{length}.pdf")
+
+    if show:
+        plt.show()
+
+def calculate_threshold(model, config, val_loader):
+
+    model.eval()
+    results = []
+    targets = []
+
+    with torch.no_grad():
+        for batch in val_loader:
+            batch = {k: v.to(config.device) for k, v in batch.items()}
+
+            if model.is_linear:
+                y_pred = model(batch["cos_sim"], batch["entropy"])
+            else:
+                y_pred = model(
+                    batch["logprobs1"],
+                    batch["logprobs2"],
+                    batch["cos_sim"],
+                    batch["entropy"]
+                )
+            y_pred = torch.sigmoid(y_pred.squeeze(1))
+            results.append(y_pred.tolist())
+            targets.append(batch["labels"].tolist())
+
+    results = [item for res in results for item in res]
+    targets = [item for tar in targets for item in tar]
+
+    precision, recall, thresholds = precision_recall_curve(targets, results)
 
-def graph_roc_curve(config, test_loader, model, parameter_path: str):
+    f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
 
+    f1_scores = f1_scores[:-1]
+    best_index = np.argmax(f1_scores)
+    best_threshold = thresholds[best_index]
+
+    return best_threshold
+
+def test_model(config, test_loader, threshold, model, parameter_path, plot_distribution:bool = False):
     model.load_state_dict(torch.load(os.path.join("models", parameter_path)))
 
-    results, targets = run_batch(
+    y_pred, y_true = run_batch(
         config,
         test_loader,
         model,
         None,
         "test",
     )
 
-    fpr, tpr, threshold = roc_curve(targets, results)
+    if plot_distribution:
+        plt.close(1)
+        plt.figure(2)
+        sns.histplot(data=y_pred, bins=5, kde=True, color="teal", label="model output")
+        plt.xlim(0, 1)
+
+        if input("Print 0.5 threshold? [y/n] ") == "y":
+            plt.axvline(0.5, color="b", linestyle="dashed", label="0.5 threshold")
+        if input("Print calculated threshold? [y/n] ") == "y":
+            plt.axvline(threshold, color="r", linestyle="dashed", label="calculated threshold")
+        plt.title("distribution of model output")
+        plt.legend()
+
+        save = input("save? [y/n] ") == "y"
+
+        if save:
+            name = input("input name: ")
+            plt.savefig(fr"test_results\{name}.png")
+
+        plt.show()
+
+    fpr, tpr, thre = roc_curve(y_true, y_pred)
     auroc = auc(fpr, tpr)
-    results = [0 if result < 0.5 else 1 for result in results]
-    f1 = f1_score(targets, results, average="binary")
-    prec = precision_score(targets, results)
-    recall = recall_score(targets, results)
-    print(f1)
-    print(prec)
-    print(recall)
+    y_pred = [0 if result < (threshold if threshold != -1 else 0.5) else 1 for result in y_pred] 
+    f1 = f1_score(y_true, y_pred)
+    report = classification_report(y_true, y_pred, zero_division=0)
+    report_dict = classification_report(y_true, y_pred, output_dict=True, zero_division=0)
+
+    return auroc, f1, fpr, tpr, report, report_dict
+
+
+def graph_roc_curve(auroc, fpr, tpr, is_linear, length=None):
 
     plt.figure()  
     plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auroc)
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
-    plt.title(f'ROC Curve for {"linear model" if model.is_linear else "logprob model"}')
+    plt.title(f'ROC Curve for {"linear model" if is_linear else "logprob model"}')
     plt.legend()
 
-    if model.is_linear:
-        length = count_files(r"test_results\ROC\linear")
-        plt.savefig(fr"test_results\ROC\linear\{length + 1}.pdf")
+    if is_linear:
+        if not length:
+            length = count_files(r"test_results\ROC\linear") + 1
+        plt.savefig(fr"test_results\ROC\linear\{length}.pdf")
     else:
-        length = count_files(r"test_results\ROC\logprob")
-        plt.savefig(fr"test_results\ROC\logprob\{length + 1}.pdf")
-
-    plt.show()
+        if not length:
+            length = count_files(r"test_results\ROC\logprob") + 1
+        plt.savefig(fr"test_results\ROC\logprob\{length}.pdf")
 
 if __name__ == "__main__":
     store_data(ans_dataset="TruthfulDataset.jsonl", cal_dataset="CalDataset.jsonl")
diff --git a/code/Layers.py b/code/Layers.py
@@ -21,7 +21,8 @@ def __init__(self):
         self.register_buffer("pe", pe)
 
     def forward(self, x):
-        return x + self.pe[:x.size(1), :].unsqueeze(0)
+        res = x + self.pe[:x.size(1), :].unsqueeze(0)
+        return res
 
 class Attention(nn.Module):
     def __init__(self, inner_feature):
@@ -54,20 +55,26 @@ class LogitModel(nn.Module):
     def __init__(self, config):
         super().__init__()
 
+        self.con = config
         self.is_linear = False
 
         self.pe = PositionalEncoding()
         self.self_attention = Attention(inner_feature=config.d_model)
-        self.pooling_layer = nn.AdaptiveAvgPool1d(1)
-        self.dropout = nn.Dropout(p=config.drop_out)
 
-        self.fc_attention = nn.Linear(in_features=config.d_model, out_features=1)
+        if config.add_conv and config.conv_ch:
+            self.conv = nn.Conv2d(1, config.conv_ch, kernel_size=(3, 3))
+            self.pooling_layer = nn.AdaptiveMaxPool2d((1, 1))
+            self.dropout = nn.Dropout(p=config.drop_out)
+            self.fc_attention = nn.Linear(in_features=config.conv_ch, out_features=1)
+        elif not config.add_conv:
+            self.pooling_layer = nn.AdaptiveAvgPool1d(1)
+            self.dropout = nn.Dropout(p=config.drop_out)
+            self.fc_attention = nn.Linear(in_features=config.d_model, out_features=1)
+        else:
+            raise RuntimeError("add_conv not properly structured")
+
         self.fc_final = nn.Linear(in_features=3, out_features=1)
 
-        # self.conv = nn.Conv2d(1, config.conv_ch, kernel_size=(3, 3))
-        # self.pooling_layer = nn.AdaptiveMaxPool2d((1, 1))
-        # self.dropout = nn.Dropout(p=config.drop_out)
-        # self.fc_attention = nn.Linear(in_features=config.conv_ch, out_features=1)
 
     def forward(self, resp1_logits, resp2_logits, cosine_sim, entropy):
         position1 = self.pe(resp1_logits)
@@ -78,12 +85,15 @@ def forward(self, resp1_logits, resp2_logits, cosine_sim, entropy):
             position2,
             position2,
             )
-
-        pooled = self.pooling_layer(self_attention_values)
-        pooled = pooled.view(pooled.size(0), -1)
 
-        # conv_output = self.conv(self_attention_values.unsqueeze(1))
-        # pooled = self.pooling_layer(conv_output).view(conv_output.size(0), conv_output.size(1))
+        if self.con.add_conv:
+            conv_output = self.conv(self_attention_values.unsqueeze(1))
+            pooled = self.pooling_layer(conv_output).view(conv_output.size(0), conv_output.size(1))
+        elif not self.con.add_conv:
+            pooled = self.pooling_layer(self_attention_values)
+            pooled = pooled.view(pooled.size(0), -1)
+        else:
+            raise RuntimeError("add_conv not properly structured")
 
         dropped = self.dropout(pooled)
         attention_score = self.fc_attention(dropped)
@@ -102,15 +112,26 @@ def __init__(self, config):
         self.is_linear = True
 
         self.fc1 = nn.Linear(in_features=2, out_features=config.d_model)
-        self.batch_norm = nn.BatchNorm1d(num_features=config.d_model)
-        self.fc2 = nn.Linear(in_features=config.d_model, out_features=1)
+        self.batch_norm1 = nn.BatchNorm1d(num_features=config.d_model)
+        self.activation1 = nn.ReLU()
+
+        self.fc2 = nn.Linear(in_features=config.d_model, out_features=config.d_model)
+        self.batch_norm2 = nn.BatchNorm1d(num_features=config.d_model)
+        self.activation2 = nn.ReLU()
 
+        self.fc3 = nn.Linear(in_features=config.d_model, out_features=1)
 
     def forward(self, cosine_sim, entropy):
 
         combined = torch.cat([cosine_sim.unsqueeze(1), entropy.unsqueeze(1)], dim=1)
-        linear_output1 = self.fc1(combined)
-        normalized = self.batch_norm(linear_output1)
-        res = self.fc2(normalized)
+        x = self.fc1(combined)
+        x = self.batch_norm1(x)
+        x = self.activation1(x)
+
+        x = self.fc2(x)
+        x = self.batch_norm2(x)
+        x = self.activation2(x)
 
+        res = self.fc3(x)
+
         return res