import { GetObjectCommand, NoSuchKey, S3Client } from '@aws-sdk/client-s3';
import { logger } from '../analytics/KatalLogger';
import { getS3Client } from './aws-s3-services';

// Interface to standardize the result of each S3 object fetch
interface S3FetchResult {
  key: string;
  data: any[] | null;
  error?: string;
}

/**
 * Fetches a single object from S3 and handles potential errors.
 *
 * @param client - The S3 client to use for the request
 * @param bucket - The name of the S3 bucket
 * @param key - The key of the object to fetch
 * @returns A promise that resolves to an S3FetchResult
 */
const getObjectFromS3 = async (client: S3Client, bucket: string, key: string): Promise<S3FetchResult> => {
  try {
    const command = new GetObjectCommand({ Bucket: bucket, Key: key });
    const response = await client.send(command);
    const str = await response.Body?.transformToString();

    // Check for empty response to avoid parsing errors
    if (!str) {
      return { key, data: null, error: 'Empty response body' };
    }
    return { key, data: JSON.parse(str) };
  } catch (error: any) {
    // Specifically handle case where the key doesn't exist
    if (error instanceof NoSuchKey) {
      logger.warn(`Key does not exist in S3: ${key}`);
      return { key, data: null, error: 'Key does not exist' };
    }
    // Log other errors for debugging
    logger.error(`Error fetching object from S3: ${key}`, error);
    return { key, data: null, error: error.message };
  }
};

/**
 * Fetches multiple S3 objects in parallel.
 *
 * @param client - The S3 client to use for the requests
 * @param bucket - The name of the S3 bucket
 * @param keys - An array of S3 object keys to fetch
 * @returns A promise that resolves to an array of S3FetchResults
 */
const batchFetchFromS3 = async (client: S3Client, bucket: string, keys: string[]): Promise<S3FetchResult[]> => {
  // Create an array of promises, one for each key
  const fetchPromises = keys.map((key) => getObjectFromS3(client, bucket, key));
  // Wait for all promises to resolve
  return Promise.all(fetchPromises);
};

/**
 * Fetches and combines data from multiple S3 objects, processing in batches to manage load.
 *
 * @param bucket - The name of the S3 bucket
 * @param s3Keys - An array of S3 object keys to fetch
 * @param batchSize - The number of keys to process in each batch (default: 100)
 * @returns A promise that resolves to an object containing the combined data and individual results
 */
export const fetchAndCombineS3Data = async (
  bucket: string,
  s3Keys: string[],
  batchSize: number = 100
): Promise<{ combinedData: any[]; results: S3FetchResult[] }> => {
  const client = await getS3Client();

  let combinedData: any[] = [];
  let allResults: S3FetchResult[] = [];

  // Process keys in batches to manage memory usage and avoid overwhelming S3
  for (let i = 0; i < s3Keys.length; i += batchSize) {
    const batchKeys = s3Keys.slice(i, i + batchSize);

    try {
      const batchResults = await batchFetchFromS3(client, bucket, batchKeys);
      allResults = allResults.concat(batchResults);

      // Filter out failed fetches and flatten the data
      const batchData = batchResults.filter((result) => result.data !== null).flatMap((result) => result.data!);
      combinedData = combinedData.concat(batchData);

      // Log progress for visibility in long-running operations
      logger.info(`Processed batch ${i / batchSize + 1} of ${Math.ceil(s3Keys.length / batchSize)}`);
    } catch (error: any) {
      // Log batch errors but continue processing other batches
      logger.error(`Error processing batch ${i / batchSize + 1}:`, error);
    }
  }

  // Calculate and log summary statistics
  const successCount = allResults.filter((result) => result.data !== null).length;
  const failCount = allResults.length - successCount;

  logger.info(`Fetched data from ${successCount} S3 objects. ${failCount} objects failed or were not found.`);

  return { combinedData, results: allResults };
};
