From 25948c9232156e553374ccaa8917c004e1f35fdd Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 3 Feb 2024 11:29:46 +0000 Subject: [PATCH 1/7] simpler json-isation thanks Alina for the suggestion --- .../src/queue/processors/ImportNotesProcessorService.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index e5177d28e5..0554566748 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -130,11 +130,11 @@ export class ImportNotesProcessorService { return typeof obj[Symbol.iterator] === 'function'; } - private parseTwitterFile(str : string) : null | [{ tweet: any }] { - const removed = str.replace(new RegExp('window\\.YTD\\.tweets\\.part0 = ', 'g'), ''); - + private parseTwitterFile(str : string) : null | { tweet: object }[] { + const jsonStr = str.replace(/^\s*window\.YTD\.tweets\.part0\s*=\s*/, ''); + try { - return JSON.parse(removed); + return JSON.parse(jsonStr); } catch (error) { //The format is not what we expected. Either this file was tampered with or twitters exports changed return null; From 7684f45a5e0394ec1b9d72a07eada96c9f801c4b Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 3 Feb 2024 11:30:39 +0000 Subject: [PATCH 2/7] simpler mapping thanks Alina --- .../src/queue/processors/ImportNotesProcessorService.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index 0554566748..6e15edf970 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -187,14 +187,11 @@ export class ImportNotesProcessorService { this.logger.succ(`Unzipping to ${outputPath}`); ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath)); - const unprocessedTweetJson = this.parseTwitterFile(fs.readFileSync(outputPath + '/data/tweets.js', 'utf-8')); + const unprocessedTweets = this.parseTwitterFile(fs.readFileSync(outputPath + '/data/tweets.js', 'utf-8')); //Make sure that it isnt null (because if something went wrong in parseTwitterFile it returns null) if (unprocessedTweetJson) { - const tweets = Object.keys(unprocessedTweetJson).reduce((m, key, i, obj) => { - return m.concat(unprocessedTweetJson[i].tweet); - }, []); - + const tweets = unprocessedTweets.map(e=>e.tweet); const processedTweets = await this.recreateChain(['id_str'], ['in_reply_to_status_id_str'], tweets, false); this.queueService.createImportTweetsToDbJob(job.data.user, processedTweets, null); } else { From 3a3a051bb5de1c375b6b4b7acdfda1bc7b899679 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 3 Feb 2024 11:33:42 +0000 Subject: [PATCH 3/7] make almost all fs ops async there's no `fs.promises.exists` --- .../processors/ImportNotesProcessorService.ts | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index 6e15edf970..7664c282df 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -51,7 +51,7 @@ export class ImportNotesProcessorService { @bindThis private async uploadFiles(dir: string, user: MiUser, folder?: MiDriveFolder['id']) { - const fileList = fs.readdirSync(dir); + const fileList = await fs.promises.readdir(dir); for await (const file of fileList) { const name = `${dir}/${file}`; if (fs.statSync(name).isDirectory()) { @@ -173,7 +173,7 @@ export class ImportNotesProcessorService { const destPath = path + '/twitter.zip'; try { - fs.writeFileSync(destPath, '', 'binary'); + await fs.promises.writeFile(destPath, '', 'binary'); await this.downloadService.downloadUrl(file.url, destPath); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -187,7 +187,7 @@ export class ImportNotesProcessorService { this.logger.succ(`Unzipping to ${outputPath}`); ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath)); - const unprocessedTweets = this.parseTwitterFile(fs.readFileSync(outputPath + '/data/tweets.js', 'utf-8')); + const unprocessedTweets = this.parseTwitterFile(await fs.promises.readFile(outputPath + '/data/tweets.js', 'utf-8')); //Make sure that it isnt null (because if something went wrong in parseTwitterFile it returns null) if (unprocessedTweetJson) { @@ -208,7 +208,7 @@ export class ImportNotesProcessorService { const destPath = path + '/facebook.zip'; try { - fs.writeFileSync(destPath, '', 'binary'); + await fs.promises.writeFile(destPath, '', 'binary'); await this.downloadService.downloadUrl(file.url, destPath); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -221,7 +221,7 @@ export class ImportNotesProcessorService { try { this.logger.succ(`Unzipping to ${outputPath}`); ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath)); - const postsJson = fs.readFileSync(outputPath + '/your_activity_across_facebook/posts/your_posts__check_ins__photos_and_videos_1.json', 'utf-8'); + const postsJson = await fs.promises.readFile(outputPath + '/your_activity_across_facebook/posts/your_posts__check_ins__photos_and_videos_1.json', 'utf-8'); const posts = JSON.parse(postsJson); const facebookFolder = await this.driveFoldersRepository.findOneBy({ name: 'Facebook', userId: job.data.user.id, parentId: folder?.id }); if (facebookFolder == null && folder) { @@ -241,7 +241,7 @@ export class ImportNotesProcessorService { const destPath = path + '/unknown.zip'; try { - fs.writeFileSync(destPath, '', 'binary'); + await fs.promises.writeFile(destPath, '', 'binary'); await this.downloadService.downloadUrl(file.url, destPath); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -257,7 +257,7 @@ export class ImportNotesProcessorService { const isInstagram = type === 'Instagram' || fs.existsSync(outputPath + '/instagram_live') || fs.existsSync(outputPath + '/instagram_ads_and_businesses'); const isOutbox = type === 'Mastodon' || fs.existsSync(outputPath + '/outbox.json'); if (isInstagram) { - const postsJson = fs.readFileSync(outputPath + '/content/posts_1.json', 'utf-8'); + const postsJson = await fs.promises.readFile(outputPath + '/content/posts_1.json', 'utf-8'); const posts = JSON.parse(postsJson); const igFolder = await this.driveFoldersRepository.findOneBy({ name: 'Instagram', userId: job.data.user.id, parentId: folder?.id }); if (igFolder == null && folder) { @@ -267,16 +267,16 @@ export class ImportNotesProcessorService { } this.queueService.createImportIGToDbJob(job.data.user, posts); } else if (isOutbox) { - const actorJson = fs.readFileSync(outputPath + '/actor.json', 'utf-8'); + const actorJson = await fs.promises.readFile(outputPath + '/actor.json', 'utf-8'); const actor = JSON.parse(actorJson); const isPleroma = actor['@context'].some((v: any) => typeof v === 'string' && v.match(/litepub(.*)/)); if (isPleroma) { - const outboxJson = fs.readFileSync(outputPath + '/outbox.json', 'utf-8'); + const outboxJson = await fs.promises.readFile(outputPath + '/outbox.json', 'utf-8'); const outbox = JSON.parse(outboxJson); const processedToots = await this.recreateChain(['object', 'id'], ['object', 'inReplyTo'], outbox.orderedItems.filter((x: any) => x.type === 'Create' && x.object.type === 'Note'), true); this.queueService.createImportPleroToDbJob(job.data.user, processedToots, null); } else { - const outboxJson = fs.readFileSync(outputPath + '/outbox.json', 'utf-8'); + const outboxJson = await fs.promises.readFile(outputPath + '/outbox.json', 'utf-8'); const outbox = JSON.parse(outboxJson); let mastoFolder = await this.driveFoldersRepository.findOneBy({ name: 'Mastodon', userId: job.data.user.id, parentId: folder?.id }); if (mastoFolder == null && folder) { @@ -299,7 +299,7 @@ export class ImportNotesProcessorService { this.logger.info(`Temp dir is ${path}`); try { - fs.writeFileSync(path, '', 'utf-8'); + await fs.promises.writeFile(path, '', 'utf-8'); await this.downloadService.downloadUrl(file.url, path); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -308,7 +308,7 @@ export class ImportNotesProcessorService { throw e; } - const notesJson = fs.readFileSync(path, 'utf-8'); + const notesJson = await fs.promises.readFile(path, 'utf-8'); const notes = JSON.parse(notesJson); const processedNotes = await this.recreateChain(['id'], ['replyId'], notes, false); this.queueService.createImportKeyNotesToDbJob(job.data.user, processedNotes, null); From a981bca7a3d8c5f44c24728f8c8b20134d311869 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 3 Feb 2024 11:37:20 +0000 Subject: [PATCH 4/7] simpler logic thanks Alina --- .../processors/ImportNotesProcessorService.ts | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index 7664c282df..2476f94359 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -130,6 +130,7 @@ export class ImportNotesProcessorService { return typeof obj[Symbol.iterator] === 'function'; } + @bindThis private parseTwitterFile(str : string) : null | { tweet: object }[] { const jsonStr = str.replace(/^\s*window\.YTD\.tweets\.part0\s*=\s*/, ''); @@ -137,7 +138,8 @@ export class ImportNotesProcessorService { return JSON.parse(jsonStr); } catch (error) { //The format is not what we expected. Either this file was tampered with or twitters exports changed - return null; + this.logger.warn('Failed to import twitter notes due to malformed file'); + throw error; } } @@ -189,14 +191,9 @@ export class ImportNotesProcessorService { const unprocessedTweets = this.parseTwitterFile(await fs.promises.readFile(outputPath + '/data/tweets.js', 'utf-8')); - //Make sure that it isnt null (because if something went wrong in parseTwitterFile it returns null) - if (unprocessedTweetJson) { - const tweets = unprocessedTweets.map(e=>e.tweet); - const processedTweets = await this.recreateChain(['id_str'], ['in_reply_to_status_id_str'], tweets, false); - this.queueService.createImportTweetsToDbJob(job.data.user, processedTweets, null); - } else { - this.logger.warn('Failed to import twitter notes due to malformed file'); - } + const tweets = unprocessedTweets.map(e=>e.tweet); + const processedTweets = await this.recreateChain(['id_str'], ['in_reply_to_status_id_str'], tweets, false); + this.queueService.createImportTweetsToDbJob(job.data.user, processedTweets, null); } finally { cleanup(); } From 1bb5021c5479ad4a6bfad2714a4a4f2b6ba02493 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 3 Feb 2024 12:05:08 +0000 Subject: [PATCH 5/7] decode entity references from tweets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apparently *some* tweets have those ☹ --- .../src/queue/processors/ImportNotesProcessorService.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index 2476f94359..44c92fabb6 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -584,7 +584,8 @@ export class ImportNotesProcessorService { try { const date = new Date(tweet.created_at); - const textReplaceURLs = tweet.entities.urls && tweet.entities.urls.length > 0 ? await replaceTwitterUrls(tweet.full_text, tweet.entities.urls) : tweet.full_text; + const decodedText = tweet.full_text.replaceAll('>','>').replaceAll('<','<').replaceAll('&','&'); + const textReplaceURLs = tweet.entities.urls && tweet.entities.urls.length > 0 ? await replaceTwitterUrls(decodedText, tweet.entities.urls) : decodedText; const text = tweet.entities.user_mentions && tweet.entities.user_mentions.length > 0 ? await replaceTwitterMentions(textReplaceURLs, tweet.entities.user_mentions) : textReplaceURLs; const files: MiDriveFile[] = []; From bb3694bfed3ac86ed9d4975800d76baf272c9817 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 3 Feb 2024 12:55:46 +0000 Subject: [PATCH 6/7] lint --- .../src/queue/processors/ImportNotesProcessorService.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index 44c92fabb6..5fb68f0a1a 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -131,7 +131,7 @@ export class ImportNotesProcessorService { } @bindThis - private parseTwitterFile(str : string) : null | { tweet: object }[] { + private parseTwitterFile(str : string) : { tweet: object }[] { const jsonStr = str.replace(/^\s*window\.YTD\.tweets\.part0\s*=\s*/, ''); try { @@ -191,7 +191,7 @@ export class ImportNotesProcessorService { const unprocessedTweets = this.parseTwitterFile(await fs.promises.readFile(outputPath + '/data/tweets.js', 'utf-8')); - const tweets = unprocessedTweets.map(e=>e.tweet); + const tweets = unprocessedTweets.map(e => e.tweet); const processedTweets = await this.recreateChain(['id_str'], ['in_reply_to_status_id_str'], tweets, false); this.queueService.createImportTweetsToDbJob(job.data.user, processedTweets, null); } finally { @@ -584,7 +584,7 @@ export class ImportNotesProcessorService { try { const date = new Date(tweet.created_at); - const decodedText = tweet.full_text.replaceAll('>','>').replaceAll('<','<').replaceAll('&','&'); + const decodedText = tweet.full_text.replaceAll('>', '>').replaceAll('<', '<').replaceAll('&', '&'); const textReplaceURLs = tweet.entities.urls && tweet.entities.urls.length > 0 ? await replaceTwitterUrls(decodedText, tweet.entities.urls) : decodedText; const text = tweet.entities.user_mentions && tweet.entities.user_mentions.length > 0 ? await replaceTwitterMentions(textReplaceURLs, tweet.entities.user_mentions) : textReplaceURLs; const files: MiDriveFile[] = []; From 4bc517ca89a137f81d94fa8d89a97b62afdbb4be Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 3 Feb 2024 12:55:56 +0000 Subject: [PATCH 7/7] import fs/promises the right way thanks Marie --- .../processors/ImportNotesProcessorService.ts | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index 5fb68f0a1a..b772578b33 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -1,4 +1,5 @@ import * as fs from 'node:fs'; +import * as fsp from 'node:fs/promises'; import * as vm from 'node:vm'; import * as crypto from 'node:crypto'; import { Inject, Injectable } from '@nestjs/common'; @@ -51,7 +52,7 @@ export class ImportNotesProcessorService { @bindThis private async uploadFiles(dir: string, user: MiUser, folder?: MiDriveFolder['id']) { - const fileList = await fs.promises.readdir(dir); + const fileList = await fsp.readdir(dir); for await (const file of fileList) { const name = `${dir}/${file}`; if (fs.statSync(name).isDirectory()) { @@ -175,7 +176,7 @@ export class ImportNotesProcessorService { const destPath = path + '/twitter.zip'; try { - await fs.promises.writeFile(destPath, '', 'binary'); + await fsp.writeFile(destPath, '', 'binary'); await this.downloadService.downloadUrl(file.url, destPath); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -187,9 +188,9 @@ export class ImportNotesProcessorService { const outputPath = path + '/twitter'; try { this.logger.succ(`Unzipping to ${outputPath}`); - ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath)); + ZipReader.withDestinationPath(outputPath).viaBuffer(await fsp.readFile(destPath)); - const unprocessedTweets = this.parseTwitterFile(await fs.promises.readFile(outputPath + '/data/tweets.js', 'utf-8')); + const unprocessedTweets = this.parseTwitterFile(await fsp.readFile(outputPath + '/data/tweets.js', 'utf-8')); const tweets = unprocessedTweets.map(e => e.tweet); const processedTweets = await this.recreateChain(['id_str'], ['in_reply_to_status_id_str'], tweets, false); @@ -205,7 +206,7 @@ export class ImportNotesProcessorService { const destPath = path + '/facebook.zip'; try { - await fs.promises.writeFile(destPath, '', 'binary'); + await fsp.writeFile(destPath, '', 'binary'); await this.downloadService.downloadUrl(file.url, destPath); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -217,8 +218,8 @@ export class ImportNotesProcessorService { const outputPath = path + '/facebook'; try { this.logger.succ(`Unzipping to ${outputPath}`); - ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath)); - const postsJson = await fs.promises.readFile(outputPath + '/your_activity_across_facebook/posts/your_posts__check_ins__photos_and_videos_1.json', 'utf-8'); + ZipReader.withDestinationPath(outputPath).viaBuffer(await fsp.readFile(destPath)); + const postsJson = await fsp.readFile(outputPath + '/your_activity_across_facebook/posts/your_posts__check_ins__photos_and_videos_1.json', 'utf-8'); const posts = JSON.parse(postsJson); const facebookFolder = await this.driveFoldersRepository.findOneBy({ name: 'Facebook', userId: job.data.user.id, parentId: folder?.id }); if (facebookFolder == null && folder) { @@ -238,7 +239,7 @@ export class ImportNotesProcessorService { const destPath = path + '/unknown.zip'; try { - await fs.promises.writeFile(destPath, '', 'binary'); + await fsp.writeFile(destPath, '', 'binary'); await this.downloadService.downloadUrl(file.url, destPath); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -250,11 +251,11 @@ export class ImportNotesProcessorService { const outputPath = path + '/unknown'; try { this.logger.succ(`Unzipping to ${outputPath}`); - ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath)); + ZipReader.withDestinationPath(outputPath).viaBuffer(await fsp.readFile(destPath)); const isInstagram = type === 'Instagram' || fs.existsSync(outputPath + '/instagram_live') || fs.existsSync(outputPath + '/instagram_ads_and_businesses'); const isOutbox = type === 'Mastodon' || fs.existsSync(outputPath + '/outbox.json'); if (isInstagram) { - const postsJson = await fs.promises.readFile(outputPath + '/content/posts_1.json', 'utf-8'); + const postsJson = await fsp.readFile(outputPath + '/content/posts_1.json', 'utf-8'); const posts = JSON.parse(postsJson); const igFolder = await this.driveFoldersRepository.findOneBy({ name: 'Instagram', userId: job.data.user.id, parentId: folder?.id }); if (igFolder == null && folder) { @@ -264,16 +265,16 @@ export class ImportNotesProcessorService { } this.queueService.createImportIGToDbJob(job.data.user, posts); } else if (isOutbox) { - const actorJson = await fs.promises.readFile(outputPath + '/actor.json', 'utf-8'); + const actorJson = await fsp.readFile(outputPath + '/actor.json', 'utf-8'); const actor = JSON.parse(actorJson); const isPleroma = actor['@context'].some((v: any) => typeof v === 'string' && v.match(/litepub(.*)/)); if (isPleroma) { - const outboxJson = await fs.promises.readFile(outputPath + '/outbox.json', 'utf-8'); + const outboxJson = await fsp.readFile(outputPath + '/outbox.json', 'utf-8'); const outbox = JSON.parse(outboxJson); const processedToots = await this.recreateChain(['object', 'id'], ['object', 'inReplyTo'], outbox.orderedItems.filter((x: any) => x.type === 'Create' && x.object.type === 'Note'), true); this.queueService.createImportPleroToDbJob(job.data.user, processedToots, null); } else { - const outboxJson = await fs.promises.readFile(outputPath + '/outbox.json', 'utf-8'); + const outboxJson = await fsp.readFile(outputPath + '/outbox.json', 'utf-8'); const outbox = JSON.parse(outboxJson); let mastoFolder = await this.driveFoldersRepository.findOneBy({ name: 'Mastodon', userId: job.data.user.id, parentId: folder?.id }); if (mastoFolder == null && folder) { @@ -296,7 +297,7 @@ export class ImportNotesProcessorService { this.logger.info(`Temp dir is ${path}`); try { - await fs.promises.writeFile(path, '', 'utf-8'); + await fsp.writeFile(path, '', 'utf-8'); await this.downloadService.downloadUrl(file.url, path); } catch (e) { // TODO: 何度か再試行 if (e instanceof Error || typeof e === 'string') { @@ -305,7 +306,7 @@ export class ImportNotesProcessorService { throw e; } - const notesJson = await fs.promises.readFile(path, 'utf-8'); + const notesJson = await fsp.readFile(path, 'utf-8'); const notes = JSON.parse(notesJson); const processedNotes = await this.recreateChain(['id'], ['replyId'], notes, false); this.queueService.createImportKeyNotesToDbJob(job.data.user, processedNotes, null);